diff --git a/meta-openembedded/meta-filesystems/recipes-support/fuse/fuse3_3.11.0.bb b/meta-openembedded/meta-filesystems/recipes-support/fuse/fuse3_3.12.0.bb
similarity index 96%
rename from meta-openembedded/meta-filesystems/recipes-support/fuse/fuse3_3.11.0.bb
rename to meta-openembedded/meta-filesystems/recipes-support/fuse/fuse3_3.12.0.bb
index 8055fb0..b886eff 100644
--- a/meta-openembedded/meta-filesystems/recipes-support/fuse/fuse3_3.11.0.bb
+++ b/meta-openembedded/meta-filesystems/recipes-support/fuse/fuse3_3.12.0.bb
@@ -13,7 +13,7 @@
 
 SRC_URI = "https://github.com/libfuse/libfuse/releases/download/fuse-${PV}/fuse-${PV}.tar.xz \
 "
-SRC_URI[sha256sum] = "8982c4c521daf3974dda8a5d55d575c988da13a571970f00aea149eb54fdf14c"
+SRC_URI[sha256sum] = "33b8a92d6f7a88e6a889f0009206933482f48f3eb85d88cf09ef551313ac7373"
 
 S = "${WORKDIR}/fuse-${PV}"
 
diff --git a/meta-openembedded/meta-filesystems/recipes-utils/xfstests/xfstests_2022.08.07.bb b/meta-openembedded/meta-filesystems/recipes-utils/xfstests/xfstests_2022.09.04.bb
similarity index 77%
rename from meta-openembedded/meta-filesystems/recipes-utils/xfstests/xfstests_2022.08.07.bb
rename to meta-openembedded/meta-filesystems/recipes-utils/xfstests/xfstests_2022.09.04.bb
index ba8b1a2..90f49f7 100644
--- a/meta-openembedded/meta-filesystems/recipes-utils/xfstests/xfstests_2022.08.07.bb
+++ b/meta-openembedded/meta-filesystems/recipes-utils/xfstests/xfstests_2022.09.04.bb
@@ -4,14 +4,13 @@
 
 SRCREV_FORMAT = "xfstests_unionmount"
 
-SRC_URI = "\
-    git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git;branch=master;name=xfstests \
-    git://github.com/amir73il/unionmount-testsuite.git;branch=master;protocol=https;name=unionmount;destsuffix=unionmount-testsuite \
-    file://0001-Add-a-return-type-to-aio_rw.patch \
-    file://0002-Drop-detached_mounts_propagation-and-remove-sys-moun.patch \
-"
+SRC_URI = "git://git.kernel.org/pub/scm/fs/xfs/xfstests-dev.git;branch=master;name=xfstests \
+           git://github.com/amir73il/unionmount-testsuite.git;branch=master;protocol=https;name=unionmount;destsuffix=unionmount-testsuite \
+           file://0001-Add-a-return-type-to-aio_rw.patch \
+           file://0002-Drop-detached_mounts_propagation-and-remove-sys-moun.patch \
+           "
 
-SRCREV_xfstests = "16ddbd1aee295f64695916cf3621aef57f1163ba"
+SRCREV_xfstests = "890c50823b6430bf0929d9a57e76b9b4d6bbc25f"
 SRCREV_unionmount = "e3825b16b46f4c4574a1a69909944c059835f914"
 
 S = "${WORKDIR}/git"
diff --git a/meta-openembedded/meta-gnome/recipes-connectivity/folks/folks_0.15.5.bb b/meta-openembedded/meta-gnome/recipes-connectivity/folks/folks_0.15.5.bb
index 51620bb..ad61ee6 100644
--- a/meta-openembedded/meta-gnome/recipes-connectivity/folks/folks_0.15.5.bb
+++ b/meta-openembedded/meta-gnome/recipes-connectivity/folks/folks_0.15.5.bb
@@ -8,8 +8,9 @@
 "
 
 GNOMEBASEBUILDCLASS = "meson"
-EXTRA_OEMESON += "-Dtests=false"
+EXTRA_OEMESON += "-Dtests=false -Db_lto=false "
 
+CFLAGS:append:toolchain-clang = " -Wno-error=implicit-function-declaration"
 # gobject-introspection is mandatory and cannot be configured
 REQUIRED_DISTRO_FEATURES = "gobject-introspection-data"
 GIR_MESON_OPTION = ""
diff --git a/meta-openembedded/meta-gnome/recipes-connectivity/geary/geary_40.0.bb b/meta-openembedded/meta-gnome/recipes-connectivity/geary/geary_40.0.bb
index 4b454dc..3ff0a41 100644
--- a/meta-openembedded/meta-gnome/recipes-connectivity/geary/geary_40.0.bb
+++ b/meta-openembedded/meta-gnome/recipes-connectivity/geary/geary_40.0.bb
@@ -52,6 +52,10 @@
 PACKAGECONFIG[valadoc] = "-Dvaladoc=enabled,-Dvaladoc=disabled"
 
 PACKAGECONFIG ??= ""
+# rfc822/rfc822-message.c:2097:12: error: incompatible pointer to integer conversion returning 'void *' from a function with result type 'gboolean' (aka 'int') [-Wint-conversion]
+#|                                 return NULL;
+#|                                        ^~~~
+CFLAGS:append:toolchain-clang = " -Wno-error=int-conversion"
 
 FILES:${PN} += "${datadir}"
 
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/gexiv2/gexiv2_0.14.0.bb b/meta-openembedded/meta-gnome/recipes-gnome/gexiv2/gexiv2_0.14.0.bb
index afec302..fed29da 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/gexiv2/gexiv2_0.14.0.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/gexiv2/gexiv2_0.14.0.bb
@@ -19,3 +19,10 @@
 PACKAGES =+ "${PN}-python3"
 FILES:${PN}-python3 = "${PYTHON_SITEPACKAGES_DIR}"
 RDEPENDS:${PN}-python3 = "${PN}"
+
+PACKAGE_PREPROCESS_FUNCS += "src_package_preprocess"
+src_package_preprocess () {
+        # Trim build paths from comments in generated sources to ensure reproducibility
+        sed -i -e "s,${B}/../${BPN}-${PV}/${BPN}/,,g" \
+            ${B}/gexiv2/gexiv2-enums.cpp
+}
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/gnome-bluetooth/gnome-bluetooth_42.3.bb b/meta-openembedded/meta-gnome/recipes-gnome/gnome-bluetooth/gnome-bluetooth_42.4.bb
similarity index 90%
rename from meta-openembedded/meta-gnome/recipes-gnome/gnome-bluetooth/gnome-bluetooth_42.3.bb
rename to meta-openembedded/meta-gnome/recipes-gnome/gnome-bluetooth/gnome-bluetooth_42.4.bb
index cf73f82..75e1244 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/gnome-bluetooth/gnome-bluetooth_42.3.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/gnome-bluetooth/gnome-bluetooth_42.4.bb
@@ -26,7 +26,7 @@
 
 REQUIRED_DISTRO_FEATURES = "x11"
 
-SRC_URI[archive.sha256sum] = "c37a2a07f77d4816b261e6c2086a056ed9767c3881dfabc826f4f82f6e1aa302"
+SRC_URI[archive.sha256sum] = "1d6fcf1cdb6cc9923ab334a2c0bc37a2c1bba9d18de153d484eedc04f3c0bcdc"
 
 BT_PULSE_PACKS = " \
     pulseaudio-lib-bluez5-util \
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/gnome-font-viewer/gnome-font-viewer_42.0.bb b/meta-openembedded/meta-gnome/recipes-gnome/gnome-font-viewer/gnome-font-viewer_43.0.bb
similarity index 82%
rename from meta-openembedded/meta-gnome/recipes-gnome/gnome-font-viewer/gnome-font-viewer_42.0.bb
rename to meta-openembedded/meta-gnome/recipes-gnome/gnome-font-viewer/gnome-font-viewer_43.0.bb
index c14e57c..50fd034 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/gnome-font-viewer/gnome-font-viewer_42.0.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/gnome-font-viewer/gnome-font-viewer_43.0.bb
@@ -16,7 +16,7 @@
 
 REQUIRED_DISTRO_FEATURES = "x11"
 
-SRC_URI[archive.sha256sum] = "cfa2b8dfff21a105a1a021dadfa213f13627e6a179a77c7b59fdcedaca848dcc"
+SRC_URI[archive.sha256sum] = "81c6bffb06d5332346e00eaecaec1bdcfd617c51dfd95bcd058d6c76c76dd2b9"
 
 FILES:${PN} += " \
     ${datadir}/dbus-1 \
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/gnome-keyring/gnome-keyring_40.0.bb b/meta-openembedded/meta-gnome/recipes-gnome/gnome-keyring/gnome-keyring_42.1.bb
similarity index 92%
rename from meta-openembedded/meta-gnome/recipes-gnome/gnome-keyring/gnome-keyring_40.0.bb
rename to meta-openembedded/meta-gnome/recipes-gnome/gnome-keyring/gnome-keyring_42.1.bb
index a30303c..5937e74 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/gnome-keyring/gnome-keyring_40.0.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/gnome-keyring/gnome-keyring_42.1.bb
@@ -21,7 +21,7 @@
 
 ANY_OF_DISTRO_FEATURES = "${GTK3DISTROFEATURES}"
 
-SRC_URI[archive.sha256sum] = "a3d24db08ee2fdf240fbbf0971a98c8ee295aa0e1a774537f4ea938038a3b931"
+SRC_URI[archive.sha256sum] = "c7f4d040cc76a6b7fe67e08ef9106911c3c80d40fc88cbfc8e2684a4c946e3e6"
 SRC_URI += " \
     file://0001-Set-paths-to-ssh-agent-and-ssh-add-by-configure-opti.patch \
     file://musl.patch \
@@ -41,6 +41,7 @@
     ${datadir}/xdg-desktop-portal \
     ${base_libdir}/security/*${SOLIBSDEV} \
     ${libdir}/pkcs11/gnome-keyring-pkcs11.so \
+    ${systemd_user_unitdir} \
 "
 # fix | gnome-keyring-daemon: insufficient process capabilities, unsecure memory might get used
 pkg_postinst:${PN} () {
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/libadwaita/libadwaita_1.1.4.bb b/meta-openembedded/meta-gnome/recipes-gnome/libadwaita/libadwaita_1.2.0.bb
similarity index 85%
rename from meta-openembedded/meta-gnome/recipes-gnome/libadwaita/libadwaita_1.1.4.bb
rename to meta-openembedded/meta-gnome/recipes-gnome/libadwaita/libadwaita_1.2.0.bb
index 44d18f5..7a1f86a 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/libadwaita/libadwaita_1.1.4.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/libadwaita/libadwaita_1.2.0.bb
@@ -11,7 +11,7 @@
 
 inherit gnomebase gobject-introspection gtk-doc vala features_check
 
-SRC_URI[archive.sha256sum] = "fcc6d56669d33ac3d030098d7571d8045a02e18dc083b49a5a5a6325068e6b58"
+SRC_URI[archive.sha256sum] = "322f3e1be39ba67981d9fe7228a85818eccaa2ed0aa42bcafe263af881c6460c"
 
 ANY_OF_DISTRO_FEATURES = "${GTK3DISTROFEATURES}"
 REQUIRED_DISTRO_FEATURES = "opengl"
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/libgtop/libgtop/0001-Pass-correct-parameter.patch b/meta-openembedded/meta-gnome/recipes-gnome/libgtop/libgtop/0001-Pass-correct-parameter.patch
new file mode 100644
index 0000000..5ea8eed
--- /dev/null
+++ b/meta-openembedded/meta-gnome/recipes-gnome/libgtop/libgtop/0001-Pass-correct-parameter.patch
@@ -0,0 +1,29 @@
+From e969ac59335d3fb1cd228f8e7c4f6c2dda4fa536 Mon Sep 17 00:00:00 2001
+From: Avinash Sonawane <rootkea@gmail.com>
+Date: Mon, 20 Dec 2021 13:33:42 +0530
+Subject: [PATCH] Pass correct parameter
+
+Upstream-Status: Backport [https://gitlab.gnome.org/GNOME/libgtop/-/merge_requests/35]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/daemon/main.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/src/daemon/main.c b/src/daemon/main.c
+index b51addf..eaee3b3 100644
+--- a/src/daemon/main.c
++++ b/src/daemon/main.c
+@@ -222,8 +222,9 @@ handle_parent_connection (int s)
+ 		       0, NULL);
+ 	    break;
+     case GLIBTOP_CMND_PROC_IO:
++        memcpy (&pid, parameter, sizeof (pid_t));
+ 	    glibtop_get_proc_io_l
+-		(server, &resp->u.data.proc_io, parameter);
++		(server, &resp->u.data.proc_io, pid);
+ 	    do_output (s, resp, _offset_data (proc_io),
+ 		       0, NULL);
+ 	    break;
+-- 
+2.37.2
+
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/libgtop/libgtop_2.40.0.bb b/meta-openembedded/meta-gnome/recipes-gnome/libgtop/libgtop_2.40.0.bb
index 2e60264..fa95e0b 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/libgtop/libgtop_2.40.0.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/libgtop/libgtop_2.40.0.bb
@@ -6,7 +6,9 @@
 
 ANY_OF_DISTRO_FEATURES = "${GTK3DISTROFEATURES}"
 
-SRC_URI += "file://0001-fix-compile-error-for-cross-compile.patch"
+SRC_URI += "file://0001-fix-compile-error-for-cross-compile.patch \
+            file://0001-Pass-correct-parameter.patch \
+            "
 
 SRC_URI[archive.sha256sum] = "78f3274c0c79c434c03655c1b35edf7b95ec0421430897fb1345a98a265ed2d4"
 
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/nautilus/nautilus/0001-Provide-parameter-prototype-for-functions-without-pa.patch b/meta-openembedded/meta-gnome/recipes-gnome/nautilus/nautilus/0001-Provide-parameter-prototype-for-functions-without-pa.patch
new file mode 100644
index 0000000..6c743d8
--- /dev/null
+++ b/meta-openembedded/meta-gnome/recipes-gnome/nautilus/nautilus/0001-Provide-parameter-prototype-for-functions-without-pa.patch
@@ -0,0 +1,94 @@
+From 76f1625bae95212ec6d4bc1bd1c8ff1232150c48 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 13:14:09 -0700
+Subject: [PATCH] Provide parameter prototype for functions without parameter
+
+Fixes build with clang-15 with -Wstrict-prototypes
+
+src/nautilus-toolbar.c:1205:22: error: a function declaration without a prototype is deprecated in all versions of C [-Werror,-Wstrict-prototypes]
+nautilus_toolbar_new ()
+                     ^
+                      void
+
+Upstream-Status: Backport [https://gitlab.gnome.org/GNOME/nautilus/-/commit/95d35aedf68f0398a6ee9e0b0af9ce80528fdc22]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/nautilus-file-undo-manager.c | 4 ++--
+ src/nautilus-list-model.c        | 2 +-
+ src/nautilus-starred-directory.c | 2 +-
+ src/nautilus-toolbar.c           | 2 +-
+ src/nautilus-view-model.c        | 2 +-
+ 5 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/src/nautilus-file-undo-manager.c b/src/nautilus-file-undo-manager.c
+index 2a886176f..f2d1eebcf 100644
+--- a/src/nautilus-file-undo-manager.c
++++ b/src/nautilus-file-undo-manager.c
+@@ -258,13 +258,13 @@ nautilus_file_undo_manager_get_state (void)
+ 
+ 
+ gboolean
+-nautilus_file_undo_manager_is_operating ()
++nautilus_file_undo_manager_is_operating (void)
+ {
+     return undo_singleton->is_operating;
+ }
+ 
+ NautilusFileUndoManager *
+-nautilus_file_undo_manager_get ()
++nautilus_file_undo_manager_get (void)
+ {
+     return undo_singleton;
+ }
+diff --git a/src/nautilus-list-model.c b/src/nautilus-list-model.c
+index 7e2aeeeee..741007105 100644
+--- a/src/nautilus-list-model.c
++++ b/src/nautilus-list-model.c
+@@ -1625,7 +1625,7 @@ nautilus_list_model_get_drag_view (NautilusListModel *model,
+ }
+ 
+ GtkTargetList *
+-nautilus_list_model_get_drag_target_list ()
++nautilus_list_model_get_drag_target_list (void)
+ {
+     GtkTargetList *target_list;
+ 
+diff --git a/src/nautilus-starred-directory.c b/src/nautilus-starred-directory.c
+index fd41418a4..07ed62693 100644
+--- a/src/nautilus-starred-directory.c
++++ b/src/nautilus-starred-directory.c
+@@ -556,7 +556,7 @@ nautilus_starred_directory_class_init (NautilusFavoriteDirectoryClass *klass)
+ }
+ 
+ NautilusFavoriteDirectory *
+-nautilus_starred_directory_new ()
++nautilus_starred_directory_new (void)
+ {
+     NautilusFavoriteDirectory *self;
+ 
+diff --git a/src/nautilus-toolbar.c b/src/nautilus-toolbar.c
+index 5fe4e63a4..15a696827 100644
+--- a/src/nautilus-toolbar.c
++++ b/src/nautilus-toolbar.c
+@@ -1202,7 +1202,7 @@ nautilus_toolbar_class_init (NautilusToolbarClass *klass)
+ }
+ 
+ GtkWidget *
+-nautilus_toolbar_new ()
++nautilus_toolbar_new (void)
+ {
+     return g_object_new (NAUTILUS_TYPE_TOOLBAR,
+                          NULL);
+diff --git a/src/nautilus-view-model.c b/src/nautilus-view-model.c
+index 84c40302d..2935809ae 100644
+--- a/src/nautilus-view-model.c
++++ b/src/nautilus-view-model.c
+@@ -190,7 +190,7 @@ compare_data_func (gconstpointer a,
+ }
+ 
+ NautilusViewModel *
+-nautilus_view_model_new ()
++nautilus_view_model_new (void)
+ {
+     return g_object_new (NAUTILUS_TYPE_VIEW_MODEL, NULL);
+ }
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/nautilus/nautilus_42.2.bb b/meta-openembedded/meta-gnome/recipes-gnome/nautilus/nautilus_42.2.bb
index f9c2ed0..a0f928c 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/nautilus/nautilus_42.2.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/nautilus/nautilus_42.2.bb
@@ -21,6 +21,7 @@
 def gnome_verdir(v):
     return oe.utils.trim_version(v, 1)
 
+SRC_URI += " file://0001-Provide-parameter-prototype-for-functions-without-pa.patch"
 SRC_URI[archive.sha256sum] = "99212d2eb75996f181728ad04a2e2d86f2577b064e68a34c8b81a7037df4ccb2"
 
 REQUIRED_DISTRO_FEATURES = "x11"
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker-miners/0001-Set-header-file-to-a-fixed-path-instead-of-a-host-pa.patch b/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker-miners/0001-Set-header-file-to-a-fixed-path-instead-of-a-host-pa.patch
new file mode 100644
index 0000000..1332a95
--- /dev/null
+++ b/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker-miners/0001-Set-header-file-to-a-fixed-path-instead-of-a-host-pa.patch
@@ -0,0 +1,41 @@
+From 6f6d7e3995c370b8121206a7f9cc0d234553bbfe Mon Sep 17 00:00:00 2001
+From: Lei Maohui <leimaohui@fujitsu.com>
+Date: Thu, 15 Sep 2022 16:35:39 +0900
+Subject: [PATCH] Set header file to a fixed path instead of a host path.
+
+Upstream-Status: Inappropriate [embedded specific]
+
+Signed-off-by: Lei Maohui <leimaohui@fujitsu.com>
+---
+ src/libtracker-miner/tracker-miner-enum-types.c.template        | 2 +-
+ .../tracker-miners-enum-types.c.template                        | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/libtracker-miner/tracker-miner-enum-types.c.template b/src/libtracker-miner/tracker-miner-enum-types.c.template
+index 7be7a01..46a4dc9 100644
+--- a/src/libtracker-miner/tracker-miner-enum-types.c.template
++++ b/src/libtracker-miner/tracker-miner-enum-types.c.template
+@@ -7,7 +7,7 @@
+
+ /*** BEGIN file-production ***/
+ /* enumerations from "@basename@" */
+-#include "@filename@"
++#include "tracker-miner-enums.h"
+ /*** END file-production ***/
+
+
+diff --git a/src/libtracker-miners-common/tracker-miners-enum-types.c.template b/src/libtracker-miners-common/tracker-miners-enum-types.c.template
+index c712dda..cb449e2 100644
+--- a/src/libtracker-miners-common/tracker-miners-enum-types.c.template
++++ b/src/libtracker-miners-common/tracker-miners-enum-types.c.template
+@@ -7,7 +7,7 @@
+
+ /*** BEGIN file-production ***/
+ /* enumerations from "@basename@" */
+-#include "@filename@"
++#include "tracker-enums.h"
+ /*** END file-production ***/
+
+
+--
+2.25.1
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker-miners_3.2.1.bb b/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker-miners_3.2.1.bb
index 2847635..92039a2 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker-miners_3.2.1.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker-miners_3.2.1.bb
@@ -15,7 +15,10 @@
 
 inherit gnomebase gsettings gobject-introspection vala bash-completion features_check
 
-SRC_URI += "file://0001-meson.build-Just-warn-if-we-build-without-libseccomp.patch"
+SRC_URI += "file://0001-meson.build-Just-warn-if-we-build-without-libseccomp.patch \
+            file://0001-Set-header-file-to-a-fixed-path-instead-of-a-host-pa.patch \
+           "
+
 SRC_URI[archive.sha256sum] = "44369f53e2edef41437406dbeecd477a97f8a9afdd9134832ea45d1ba2aa2c47"
 
 # gobject-introspection is mandatory and cannot be configured
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker_3.3.3.bb b/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker_3.4.0.bb
similarity index 93%
rename from meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker_3.3.3.bb
rename to meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker_3.4.0.bb
index 91d90dd..ed0fbb6 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker_3.3.3.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/tracker/tracker_3.4.0.bb
@@ -22,7 +22,7 @@
 
 inherit gnomebase gsettings gobject-introspection vala gtk-doc manpages bash-completion features_check python3native
 
-SRC_URI[archive.sha256sum] = "4094f704e338f2247fa6b94633279cfd07f7e952bb24627128fab78edb242464"
+SRC_URI[archive.sha256sum] = "b3b380c9571d7c7423b5f401e4a2f2d78de47143b035eb2c1281e2423c59218b"
 
 # gobject-introspection is mandatory and cannot be configured
 REQUIRED_DISTRO_FEATURES = "gobject-introspection-data"
diff --git a/meta-openembedded/meta-gnome/recipes-gnome/yelp/yelp-xsl_42.0.bb b/meta-openembedded/meta-gnome/recipes-gnome/yelp/yelp-xsl_42.1.bb
similarity index 85%
rename from meta-openembedded/meta-gnome/recipes-gnome/yelp/yelp-xsl_42.0.bb
rename to meta-openembedded/meta-gnome/recipes-gnome/yelp/yelp-xsl_42.1.bb
index 54a6aa6..ba25364 100644
--- a/meta-openembedded/meta-gnome/recipes-gnome/yelp/yelp-xsl_42.0.bb
+++ b/meta-openembedded/meta-gnome/recipes-gnome/yelp/yelp-xsl_42.1.bb
@@ -10,7 +10,7 @@
 
 DEPENDS += "libxml2"
 
-SRC_URI[archive.sha256sum] = "29b273cc0bd16efb6e983443803f1e9fdc03511e5c4ff6348fd30a604d4dc846"
+SRC_URI[archive.sha256sum] = "238be150b1653080ce139971330fd36d3a26595e0d6a040a2c030bf3d2005bcd"
 
 RDEPENDS:${PN}:append:class-target = " libxml2 itstool"
 
diff --git a/meta-openembedded/meta-gnome/recipes-support/appstream-glib/appstream-glib_0.7.18.bb b/meta-openembedded/meta-gnome/recipes-support/appstream-glib/appstream-glib_0.7.18.bb
index 45a28ed..c56e9e3 100644
--- a/meta-openembedded/meta-gnome/recipes-support/appstream-glib/appstream-glib_0.7.18.bb
+++ b/meta-openembedded/meta-gnome/recipes-support/appstream-glib/appstream-glib_0.7.18.bb
@@ -17,7 +17,7 @@
     libstemmer \
 "
 
-inherit meson gobject-introspection gettext bash-completion pkgconfig
+inherit meson gobject-introspection gettext bash-completion pkgconfig manpages
 
 SRC_URI = "https://people.freedesktop.org/~hughsient/${BPN}/releases/${BP}.tar.xz"
 SRC_URI[sha256sum] = "ca1ed22e3bde3912cb903aaa7de085d55771da454f1c0573fd9608e1de9c4002"
@@ -26,6 +26,9 @@
     -Drpm=false \
 "
 
+PACKAGECONFIG ?= ""
+PACKAGECONFIG[manpages] = "-Dman=true,-Dman=false,docbook-xml-dtd4-native docbook-xsl-stylesheets-native libxslt-native"
+
 FILES:${PN} += "${libdir}/asb-plugins-5"
 
 FILES:${PN}-dev += " \
diff --git a/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0001-docs-Disable-building.patch b/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0001-docs-Disable-building.patch
new file mode 100644
index 0000000..add2872
--- /dev/null
+++ b/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0001-docs-Disable-building.patch
@@ -0,0 +1,29 @@
+From e6bdf74a424652c4f9a38457c7fa93a2051157f5 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 30 Aug 2022 13:09:07 -0700
+Subject: [PATCH] docs: Disable building
+
+It needs linuxdoc-tools which we do not have in OE anymore
+
+Upstream-Status: Inappropriate [OE-Specific]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ docs/Makefile.am | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/docs/Makefile.am b/docs/Makefile.am
+index daa2288..b355185 100644
+--- a/docs/Makefile.am
++++ b/docs/Makefile.am
+@@ -8,7 +8,7 @@ CLEANFILES = $(man_MANS) $(SGML_OUTPUTS)
+ EXTRA_DIST = attributes.txt libuser.conf.5.in rfc2307.txt sgml/libuser.sgml \
+ 	$(SGML_OUTPUTS)
+ 
+-all: sgml/libuser.txt sgml/libuser.html
++all:
+ 
+ libuser.conf.5: $(srcdir)/libuser.conf.5.in Makefile
+ 	sed 's,@sysconfdir\@,$(sysconfdir),g' \
+-- 
+2.37.2
+
diff --git a/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0001-modules-files.c-parse_field-fix-string-formating-in-.patch b/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0001-modules-files.c-parse_field-fix-string-formating-in-.patch
deleted file mode 100644
index 7c47df2..0000000
--- a/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0001-modules-files.c-parse_field-fix-string-formating-in-.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From a4857911ece5ebfcdef42aee4c070eb216f39597 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?An=C3=ADbal=20Lim=C3=B3n?= <anibal.limon@linux.intel.com>
-Date: Fri, 13 May 2016 11:40:13 -0500
-Subject: [PATCH] modules/files.c: parse_field fix string formating in
- g_warnings
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-[YOCTO #9547]
-
-Signed-off-by: Aníbal Limón <anibal.limon@linux.intel.com>
-
-Upstream-Status: Pending
----
- modules/files.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/modules/files.c b/modules/files.c
-index 4ef0a57..35eafc9 100644
---- a/modules/files.c
-+++ b/modules/files.c
-@@ -534,7 +534,7 @@ parse_field(const struct format_specifier *format, GValue *value,
- 						 string, &err);
- 	if (ret == FALSE) {
- 		g_assert(err != NULL);
--		g_warning(lu_strerror(err));
-+		g_warning(lu_strerror(err), NULL);
- 		lu_error_free(&err);
- 	}
- 	return ret;
--- 
-2.1.4
-
diff --git a/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0003-python-Compilation-warnings-update.patch b/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0003-python-Compilation-warnings-update.patch
new file mode 100644
index 0000000..45d6952
--- /dev/null
+++ b/meta-openembedded/meta-gnome/recipes-support/libuser/libuser/0003-python-Compilation-warnings-update.patch
@@ -0,0 +1,84 @@
+From acd7b051993d501f4b5e3a593e0f18d1336ba2de Mon Sep 17 00:00:00 2001
+From: Tomas Halman <thalman@redhat.com>
+Date: Wed, 21 Jul 2021 15:36:59 +0200
+Subject: [PATCH] python: Compilation warnings update
+
+The compilation produces few warnings about discard const qualifier
+and pointer to int conversion. This patch fixes the const qualifiers
+and replaces the tp_print inicialization in PyTypeObject with 0 instead
+of NULL
+
+Upstream-Status: Backport [https://pagure.io/libuser/c/3cb7ea54e7b50da6ea313a0e7c7187c8aa5e6ee9?branch=master]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ python/admin.c | 2 +-
+ python/ent.c   | 8 ++++----
+ python/misc.c  | 2 +-
+ 3 files changed, 6 insertions(+), 6 deletions(-)
+
+diff --git a/python/admin.c b/python/admin.c
+index 83595af..e92fca4 100644
+--- a/python/admin.c
++++ b/python/admin.c
+@@ -1512,7 +1512,7 @@ PyTypeObject AdminType = {
+ 	sizeof(struct libuser_admin), /* tp_basicsize */
+ 	0,			/* tp_itemsize */
+ 	libuser_admin_destroy,	/* tp_dealloc */
+-	NULL,			/* tp_print */
++	0,	            /* tp_print */
+ 	NULL,			/* tp_getattr */
+ 	NULL,			/* tp_setattr */
+ 	NULL,			/* tp_compare */
+diff --git a/python/ent.c b/python/ent.c
+index ee712d2..fc3d654 100644
+--- a/python/ent.c
++++ b/python/ent.c
+@@ -255,7 +255,7 @@ libuser_convert_to_value(PyObject *item, GValue *value)
+ static int
+ libuser_entity_setattro(PyObject *self, PyObject *attr_name, PyObject *value)
+ {
+-	char *name;
++	const char *name;
+ 	struct libuser_entity *me;
+ 	PyObject *list;
+ 	struct lu_ent *copy;
+@@ -616,7 +616,7 @@ static PyObject *
+ libuser_entity_get_item(PyObject *self, PyObject *item)
+ {
+ 	struct libuser_entity *me;
+-	char *attr;
++	const char *attr;
+ 
+ 	DEBUG_ENTRY;
+ 	me = (struct libuser_entity *)self;
+@@ -664,7 +664,7 @@ static int
+ libuser_entity_set_item(PyObject *self, PyObject *item, PyObject *args)
+ {
+ 	struct libuser_entity *me;
+-	char *attr = NULL;
++	const char *attr = NULL;
+ 	Py_ssize_t i, size;
+ 	int ret;
+ 	GValue value;
+@@ -800,7 +800,7 @@ PyTypeObject EntityType = {
+ 	sizeof(struct libuser_entity), /* tp_basicsize */
+ 	0,			/* tp_itemsize */
+ 	libuser_entity_destroy, /* tp_dealloc */
+-	NULL,			/* tp_print */
++	0,              /* tp_print */
+ 	NULL,			/* tp_getattr */
+ 	NULL,			/* tp_setattr */
+ 	NULL,			/* tp_compare */
+diff --git a/python/misc.c b/python/misc.c
+index c4ce819..810a846 100644
+--- a/python/misc.c
++++ b/python/misc.c
+@@ -488,7 +488,7 @@ PyTypeObject PromptType = {
+ 	sizeof(struct libuser_prompt), /* tp_basicsize */
+ 	0,			/* tp_itemsize */
+ 	libuser_prompt_destroy,	/* tp_dealloc */
+-	NULL,			/* tp_print */
++	0,              /* tp_print */
+ 	NULL,			/* tp_getattr */
+ 	NULL,			/* tp_setattr */
+ 	NULL,			/* tp_compare */
diff --git a/meta-openembedded/meta-gnome/recipes-support/libuser/libuser_0.62.bb b/meta-openembedded/meta-gnome/recipes-support/libuser/libuser_0.62.bb
deleted file mode 100644
index 22ddd10..0000000
--- a/meta-openembedded/meta-gnome/recipes-support/libuser/libuser_0.62.bb
+++ /dev/null
@@ -1,34 +0,0 @@
-SUMMARY = "user and group account administration library"
-DESCRIPTION = "The libuser library implements a standardized interface for manipulating and administering user \
-and group accounts"
-HOMEPAGE = "https://pagure.io/libuser"
-BUGTRACKER = "https://pagure.io/libuser/issues"
-
-LICENSE = "LGPL-2.0-only"
-LIC_FILES_CHKSUM = "file://COPYING;md5=5f30f0716dfdd0d91eb439ebec522ec2 \
-                    file://lib/user.h;endline=19;md5=76b301f63c39fa992062395efbdc9558 \
-                    file://samples/testuser.c;endline=19;md5=3b87fa660fa3f4a6bb31d624afe30ba1"
-
-SECTION = "base"
-
-SRC_URI = "https://releases.pagure.org/libuser/libuser-${PV}.tar.xz \
-           file://0002-remove-unused-execinfo.h.patch \
-           file://0001-modules-files.c-parse_field-fix-string-formating-in-.patch \
-           "
-
-SRC_URI[md5sum] = "63e5e5c551e99dc5302b40b80bd6d4f2"
-SRC_URI[sha256sum] = "a58ff4fabb01a25043b142185a33eeea961109dd60d4b40b6a9df4fa3cace20b"
-
-DEPENDS = "popt libpam glib-2.0 python3"
-
-inherit features_check
-REQUIRED_DISTRO_FEATURES = "pam"
-
-inherit autotools gettext python3native python3-dir pkgconfig gtk-doc
-
-EXTRA_OEMAKE = "PYTHON_CPPFLAGS=-I${STAGING_INCDIR}/${PYTHON_DIR}${PYTHON_ABI}"
-
-PACKAGES += "${PN}-python "
-
-FILES:${PN}-python = "${PYTHON_SITEPACKAGES_DIR}"
-
diff --git a/meta-openembedded/meta-gnome/recipes-support/libuser/libuser_0.63.bb b/meta-openembedded/meta-gnome/recipes-support/libuser/libuser_0.63.bb
new file mode 100644
index 0000000..e191e11
--- /dev/null
+++ b/meta-openembedded/meta-gnome/recipes-support/libuser/libuser_0.63.bb
@@ -0,0 +1,46 @@
+SUMMARY = "user and group account administration library"
+DESCRIPTION = "The libuser library implements a standardized interface for manipulating and administering user \
+and group accounts"
+HOMEPAGE = "https://pagure.io/libuser"
+BUGTRACKER = "https://pagure.io/libuser/issues"
+
+LICENSE = "LGPL-2.0-only"
+LIC_FILES_CHKSUM = "file://COPYING;md5=5f30f0716dfdd0d91eb439ebec522ec2 \
+                    file://lib/user.h;endline=19;md5=76b301f63c39fa992062395efbdc9558 \
+                    file://samples/testuser.c;endline=19;md5=3b87fa660fa3f4a6bb31d624afe30ba1"
+
+SECTION = "base"
+
+SRC_URI = "https://releases.pagure.org/libuser/libuser-${PV}.tar.xz \
+           file://0001-docs-Disable-building.patch \
+           file://0002-remove-unused-execinfo.h.patch \
+           file://0003-python-Compilation-warnings-update.patch \
+           "
+
+SRC_URI[sha256sum] = "8dc377255452a68e82c4837ba22c3ee4ae3658971bf0f2ef67ed0b77fc497f91"
+
+DEPENDS = "bison-native popt libpam glib-2.0 python3"
+
+inherit autotools features_check gettext python3native python3-dir pkgconfig gtk-doc
+
+REQUIRED_DISTRO_FEATURES = "pam"
+
+EXTRA_OEMAKE = "PYTHON_CPPFLAGS=-I${STAGING_INCDIR}/${PYTHON_DIR}${PYTHON_ABI}"
+
+GTKDOC_DOCDIR = "${S}/docs/reference"
+
+# run autopoint since it needs ABOUT-NLS and admin/config.rpath from gettext
+#EXTRA_AUTORECONF:remove = "--exclude=autopoint"
+
+do_configure:prepend() {
+    install -d ${S}/admin -d ${S}/m4
+    touch ${S}/ABOUT-NLS ${S}/admin/config.rpath
+    cd ${S}
+    bison lib/getdate.y -o lib/getdate.c
+    cd -
+}
+
+PACKAGES += "${PN}-python "
+
+FILES:${PN}-python = "${PYTHON_SITEPACKAGES_DIR}"
+
diff --git a/meta-openembedded/meta-initramfs/recipes-kernel/kexec/kexec-tools-klibc/140-mips_disable_devicetree_support.patch b/meta-openembedded/meta-initramfs/recipes-kernel/kexec/kexec-tools-klibc/140-mips_disable_devicetree_support.patch
index bd728cc..6e347c0 100644
--- a/meta-openembedded/meta-initramfs/recipes-kernel/kexec/kexec-tools-klibc/140-mips_disable_devicetree_support.patch
+++ b/meta-openembedded/meta-initramfs/recipes-kernel/kexec/kexec-tools-klibc/140-mips_disable_devicetree_support.patch
@@ -22,8 +22,6 @@
  kexec/arch/mips/kexec-mips.h           |  1 +
  4 files changed, 39 insertions(+), 28 deletions(-)
 
-diff --git a/kexec/arch/mips/include/arch/options.h b/kexec/arch/mips/include/arch/options.h
-index 416e224..18d2811 100644
 --- a/kexec/arch/mips/include/arch/options.h
 +++ b/kexec/arch/mips/include/arch/options.h
 @@ -5,6 +5,7 @@
@@ -44,11 +42,9 @@
  
  
  #define KEXEC_ARCH_OPT_STR KEXEC_OPT_STR ""
-diff --git a/kexec/arch/mips/kexec-elf-mips.c b/kexec/arch/mips/kexec-elf-mips.c
-index 849a7ba..5c0e535 100644
 --- a/kexec/arch/mips/kexec-elf-mips.c
 +++ b/kexec/arch/mips/kexec-elf-mips.c
-@@ -141,35 +141,37 @@ int elf_mips_load(int argc, char **argv, const char *buf, off_t len,
+@@ -141,45 +141,49 @@ int elf_mips_load(int argc, char **argv,
  	else
  		cmdline_addr = 0;
  
@@ -63,9 +59,22 @@
 -	} else {
 -		create_flatten_tree(&dtb_buf, &dtb_length, cmdline_buf + strlen(CMDLINE_PREFIX));
 -	}
- 
+-
 -	if (arch_options.initrd_file) {
 -		initrd_buf = slurp_file(arch_options.initrd_file, &initrd_size);
+ 
+-		/* Create initrd entries in dtb - although at this time
+-		 * they would not point to the correct location */
+-		dtb_set_initrd(&dtb_buf, &dtb_length, initrd_buf, initrd_buf + initrd_size);
+-
+-		initrd_base = add_buffer(info, initrd_buf, initrd_size,
+-					initrd_size, sizeof(void *),
+-					_ALIGN_UP(kernel_addr + kernel_size + dtb_length,
+-						pagesize), 0x0fffffff, 1);
+-
+-		/* Now that the buffer for initrd is prepared, update the dtb
+-		 * with an appropriate location */
+-		dtb_set_initrd(&dtb_buf, &dtb_length, initrd_base, initrd_base + initrd_size);
 +	if (!arch_options.no_dtb) {
 +		/* MIPS systems that have been converted to use device tree
 +		 * passed through UHI will use commandline in the DTB and
@@ -78,39 +87,29 @@
 +		} else {
 +			create_flatten_tree(&dtb_buf, &dtb_length, cmdline_buf + strlen(CMDLINE_PREFIX));
 +		}
- 
--		/* Create initrd entries in dtb - although at this time
--		 * they would not point to the correct location */
--		dtb_set_initrd(&dtb_buf, &dtb_length, initrd_buf, initrd_buf + initrd_size);
++
 +		if (arch_options.initrd_file) {
 +			initrd_buf = slurp_file(arch_options.initrd_file, &initrd_size);
- 
--		initrd_base = add_buffer(info, initrd_buf, initrd_size,
--					initrd_size, sizeof(void *),
--					_ALIGN_UP(kernel_addr + kernel_size + dtb_length,
--						pagesize), 0x0fffffff, 1);
++
 +			/* Create initrd entries in dtb - although at this time
 +			 * they would not point to the correct location */
-+			dtb_set_initrd(&dtb_buf, &dtb_length, initrd_buf, initrd_buf + initrd_size);
- 
--		/* Now that the buffer for initrd is prepared, update the dtb
--		 * with an appropriate location */
--		dtb_set_initrd(&dtb_buf, &dtb_length, initrd_base, initrd_base + initrd_size);
--	}
++			dtb_set_initrd(&dtb_buf, &dtb_length, (off_t)initrd_buf, (off_t)initrd_buf + initrd_size);
++
 +			initrd_base = add_buffer(info, initrd_buf, initrd_size,
 +						initrd_size, sizeof(void *),
 +						_ALIGN_UP(kernel_addr + kernel_size + dtb_length,
 +							pagesize), 0x0fffffff, 1);
- 
++
 +			/* Now that the buffer for initrd is prepared, update the dtb
 +			 * with an appropriate location */
 +			dtb_set_initrd(&dtb_buf, &dtb_length, initrd_base, initrd_base + initrd_size);
 +		}
-+	}
+ 	}
  
+-
  	/* This is a legacy method for commandline passing used
  	 * currently by Octeon CPUs only */
-@@ -177,9 +179,11 @@ int elf_mips_load(int argc, char **argv, const char *buf, off_t len,
+ 	add_buffer(info, cmdline_buf, sizeof(cmdline_buf),
  			sizeof(cmdline_buf), sizeof(void *),
  			cmdline_addr, 0x0fffffff, 1);
  
@@ -125,8 +124,6 @@
  
  	return 0;
  }
-diff --git a/kexec/arch/mips/kexec-mips.c b/kexec/arch/mips/kexec-mips.c
-index 415c2ed..e557f8b 100644
 --- a/kexec/arch/mips/kexec-mips.c
 +++ b/kexec/arch/mips/kexec-mips.c
 @@ -89,6 +89,7 @@ void arch_usage(void)
@@ -137,7 +134,7 @@
  	);
  }
  
-@@ -121,6 +122,9 @@ int arch_process_options(int argc, char **argv)
+@@ -121,6 +122,9 @@ int arch_process_options(int argc, char
  		case OPT_RAMDISK:
  			arch_options.initrd_file = optarg;
  			break;
@@ -147,8 +144,6 @@
  		default:
  			break;
  		}
-diff --git a/kexec/arch/mips/kexec-mips.h b/kexec/arch/mips/kexec-mips.h
-index 222c815..90b21c3 100644
 --- a/kexec/arch/mips/kexec-mips.h
 +++ b/kexec/arch/mips/kexec-mips.h
 @@ -22,6 +22,7 @@ struct arch_options_t {
diff --git a/meta-openembedded/meta-multimedia/recipes-multimedia/aom/aom/0001-subpel_variance_neon-Provide-prototypes-for-missing-.patch b/meta-openembedded/meta-multimedia/recipes-multimedia/aom/aom/0001-subpel_variance_neon-Provide-prototypes-for-missing-.patch
new file mode 100644
index 0000000..100507c
--- /dev/null
+++ b/meta-openembedded/meta-multimedia/recipes-multimedia/aom/aom/0001-subpel_variance_neon-Provide-prototypes-for-missing-.patch
@@ -0,0 +1,48 @@
+From c33e07f78982acfb0574a84fb523f8591e55c50e Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Sun, 11 Sep 2022 19:46:28 -0700
+Subject: [PATCH] subpel_variance_neon: Provide prototypes for missing
+ functions
+
+Fixes build with clang-15
+aom_dsp/arm/subpel_variance_neon.c:121:10: error: call to undeclared function 'aom_variance8x8_neon'; ISO C99 and later do not support implicit function dec
+larations [-Wimplicit-function-declaration]
+|   return aom_variance8x8_neon(temp2, 8, dst, dst_stride, sse);
+|          ^
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ aom_dsp/arm/subpel_variance_neon.c | 16 ++++++++++++++++
+ 1 file changed, 16 insertions(+)
+
+diff --git a/aom_dsp/arm/subpel_variance_neon.c b/aom_dsp/arm/subpel_variance_neon.c
+index 4ecf891cbeb..859168ea0c5 100644
+--- a/aom_dsp/arm/subpel_variance_neon.c
++++ b/aom_dsp/arm/subpel_variance_neon.c
+@@ -20,6 +20,22 @@
+ #include "aom_dsp/aom_filter.h"
+ #include "aom_dsp/variance.h"
+ 
++extern unsigned int aom_variance8x8_neon(const uint8_t *a, int a_stride,
++                                         const uint8_t *b, int b_stride,
++                                         unsigned int *sse);
++
++extern unsigned int aom_variance16x16_neon(const uint8_t *a, int a_stride,
++                                         const uint8_t *b, int b_stride,
++                                         unsigned int *sse);
++
++extern unsigned int aom_variance32x32_neon(const uint8_t *a, int a_stride,
++                                         const uint8_t *b, int b_stride,
++                                         unsigned int *sse);
++
++extern unsigned int aom_variance64x64_neon(const uint8_t *a, int a_stride,
++                                         const uint8_t *b, int b_stride,
++                                         unsigned int *sse);
++
+ // Load 2 sets of 4 bytes when alignment is not guaranteed.
+ static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) {
+   uint32_t a;
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-multimedia/recipes-multimedia/aom/aom_3.4.0.bb b/meta-openembedded/meta-multimedia/recipes-multimedia/aom/aom_3.4.0.bb
index 36db45e..9cd6f7a 100644
--- a/meta-openembedded/meta-multimedia/recipes-multimedia/aom/aom_3.4.0.bb
+++ b/meta-openembedded/meta-multimedia/recipes-multimedia/aom/aom_3.4.0.bb
@@ -6,7 +6,9 @@
                     file://PATENTS;md5=e69ad12202bd20da3c76a5d3648cfa83 \
                    "
 
-SRC_URI = "git://aomedia.googlesource.com/aom;protocol=https;branch=main"
+SRC_URI = "git://aomedia.googlesource.com/aom;protocol=https;branch=main \
+           file://0001-subpel_variance_neon-Provide-prototypes-for-missing-.patch \
+          "
 
 SRCREV = "fd0c9275d36930a6eea6d3c35972e7cf9c512944"
 
diff --git a/meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire/0001-avb-fix-compilation-on-big-endian.patch b/meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire/0001-avb-fix-compilation-on-big-endian.patch
deleted file mode 100644
index fc618b4..0000000
--- a/meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire/0001-avb-fix-compilation-on-big-endian.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 1a5ec4452fa21592eaeeb823ad95a1db6eb60376 Mon Sep 17 00:00:00 2001
-From: Wim Taymans <wtaymans@redhat.com>
-Date: Tue, 19 Jul 2022 13:49:42 +0200
-Subject: [PATCH 001/113] avb: fix compilation on big endian
-
-Patch-Status: Backport
-
----
- src/modules/module-avb/aaf.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/modules/module-avb/aaf.h b/src/modules/module-avb/aaf.h
-index cb4871ca6..b444ce251 100644
---- a/src/modules/module-avb/aaf.h
-+++ b/src/modules/module-avb/aaf.h
-@@ -35,7 +35,7 @@ struct avb_packet_aaf {
- 	unsigned gv:1;
- 	unsigned tv:1;
- 
--	uint8_t seq_number;
-+	uint8_t seq_num;
- 
- 	unsigned _r2:7;
- 	unsigned tu:1;
-diff --git a/src/modules/module-avb/iec61883.h b/src/modules/module-avb/iec61883.h
-index d3b3a7daa..6ca8724ad 100644
---- a/src/modules/module-avb/iec61883.h
-+++ b/src/modules/module-avb/iec61883.h
-@@ -37,7 +37,7 @@ struct avb_packet_iec61883 {
- 	unsigned gv:1;
- 	unsigned tv:1;
- 
--	uint8_t seq_number;
-+	uint8_t seq_num;
- 
- 	unsigned _r2:7;
- 	unsigned tu:1;
-diff --git a/spa/plugins/avb/avbtp/packets.h b/spa/plugins/avb/avbtp/packets.h
-index 7047456bf..3d4a652ee 100644
---- a/spa/plugins/avb/avbtp/packets.h
-+++ b/spa/plugins/avb/avbtp/packets.h
-@@ -116,7 +116,7 @@ struct spa_avbtp_packet_aaf {
- 	unsigned gv:1;
- 	unsigned tv:1;
- 
--	uint8_t seq_number;
-+	uint8_t seq_num;
- 
- 	unsigned _r2:7;
- 	unsigned tu:1;
--- 
-2.34.1
-
diff --git a/meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire_0.3.56.bb b/meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire_0.3.57.bb
similarity index 97%
rename from meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire_0.3.56.bb
rename to meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire_0.3.57.bb
index feefe7c..0ca8fd0 100644
--- a/meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire_0.3.56.bb
+++ b/meta-openembedded/meta-multimedia/recipes-multimedia/pipewire/pipewire_0.3.57.bb
@@ -13,11 +13,8 @@
 
 DEPENDS = "dbus ncurses"
 
-SRCREV = "f274e53d25ee8f483ac6fce9e516bb1830abe88b"
-SRC_URI = " \
-	git://gitlab.freedesktop.org/pipewire/pipewire.git;branch=master;protocol=https \
-	file://0001-avb-fix-compilation-on-big-endian.patch \
-"
+SRCREV = "49f1c2038fb7b5249affa56709b117a2a8603b52"
+SRC_URI = "git://gitlab.freedesktop.org/pipewire/pipewire.git;branch=master;protocol=https"
 
 S = "${WORKDIR}/git"
 
@@ -86,7 +83,7 @@
 
 PACKAGECONFIG:class-target ??= " \
     ${@bb.utils.contains('DISTRO_FEATURES', 'zeroconf', 'avahi', '', d)} \
-    ${@bb.utils.contains('DISTRO_FEATURES', 'bluetooth', 'bluez ${BLUETOOTH_AAC}', '', d)} \
+    ${@bb.utils.contains('DISTRO_FEATURES', 'bluetooth', 'bluez bluez-opus ${BLUETOOTH_AAC}', '', d)} \
     ${@bb.utils.contains('DISTRO_FEATURES', 'systemd', 'systemd systemd-system-service systemd-user-service', '', d)} \
     ${@bb.utils.filter('DISTRO_FEATURES', 'alsa vulkan pulseaudio', d)} \
     ${PIPEWIRE_SESSION_MANAGER} \
@@ -102,6 +99,7 @@
 PACKAGECONFIG[avahi] = "-Davahi=enabled,-Davahi=disabled,avahi"
 PACKAGECONFIG[bluez] = "-Dbluez5=enabled,-Dbluez5=disabled,bluez5 sbc"
 PACKAGECONFIG[bluez-aac] = "-Dbluez5-codec-aac=enabled,-Dbluez5-codec-aac=disabled,fdk-aac"
+PACKAGECONFIG[bluez-opus] = "-Dbluez5-codec-opus=enabled,-Dbluez5-codec-opus=disabled,libopus"
 PACKAGECONFIG[docs] = "-Ddocs=enabled,-Ddocs=disabled,doxygen-native graphviz-native"
 PACKAGECONFIG[ffmpeg] = "-Dffmpeg=enabled,-Dffmpeg=disabled,ffmpeg"
 PACKAGECONFIG[gstreamer] = "-Dgstreamer=enabled,-Dgstreamer=disabled,glib-2.0 gstreamer1.0 gstreamer1.0-plugins-base,,gstreamer1.0-pipewire"
@@ -248,7 +246,7 @@
 FILES:${PN} = " \
     ${datadir}/pipewire \
     ${systemd_system_unitdir}/pipewire* \
-    ${systemd_user_unitdir}/pipewire* \
+    ${systemd_user_unitdir} \
     ${bindir}/pipewire \
     ${bindir}/pipewire-avb \
 "
diff --git a/meta-openembedded/meta-multimedia/recipes-multimedia/sox/sox_14.4.2.bb b/meta-openembedded/meta-multimedia/recipes-multimedia/sox/sox_14.4.2.bb
index 0791c85..ace8743 100644
--- a/meta-openembedded/meta-multimedia/recipes-multimedia/sox/sox_14.4.2.bb
+++ b/meta-openembedded/meta-multimedia/recipes-multimedia/sox/sox_14.4.2.bb
@@ -4,7 +4,7 @@
 HOMEPAGE = "http://sox.sourceforge.net"
 SECTION = "audio"
 
-DEPENDS = "libpng ffmpeg libsndfile1"
+DEPENDS = "libpng ffmpeg libsndfile1 libtool"
 
 PACKAGECONFIG ??= "${@bb.utils.filter('DISTRO_FEATURES', 'alsa pulseaudio', d)} \
                    magic \
@@ -36,4 +36,7 @@
 
 inherit autotools pkgconfig
 
+# Enable largefile support
+CFLAGS += "-D_FILE_OFFSET_BITS=64"
+
 EXCLUDE_FROM_WORLD = "${@bb.utils.contains("LICENSE_FLAGS_ACCEPTED", "commercial", "0", "1", d)}"
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/ez-ipupdate/ez-ipupdate_3.0.11b7.bb b/meta-openembedded/meta-networking/recipes-connectivity/ez-ipupdate/ez-ipupdate_3.0.11b7.bb
index 8648f2e..42ecf9b 100644
--- a/meta-openembedded/meta-networking/recipes-connectivity/ez-ipupdate/ez-ipupdate_3.0.11b7.bb
+++ b/meta-openembedded/meta-networking/recipes-connectivity/ez-ipupdate/ez-ipupdate_3.0.11b7.bb
@@ -5,11 +5,12 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=7783169b4be06b54e86730eb01bc3a31"
 
 SRC_URI = "http://sourceforge.net/projects/ez-ipupdate/files/${BPN}/${PV}/${BPN}-${PV}.tar.gz \
-    file://Makefile.am.patch \
-    file://cache_file.c.patch \
-    file://conf_file.c.patch \
-    file://wformat.patch \
-    "
+           file://Makefile.am.patch \
+           file://cache_file.c.patch \
+           file://conf_file.c.patch \
+           file://wformat.patch \
+           file://0001-ez-ipupdate-Include-time.h-for-time-API-prototype.patch \
+           "
 SRC_URI[md5sum] = "525be4550b4461fdf105aed8e753b020"
 SRC_URI[sha256sum] = "a15ec0dc0b78ec7578360987c68e43a67bc8d3591cbf528a323588830ae22c20"
 
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/ez-ipupdate/files/0001-ez-ipupdate-Include-time.h-for-time-API-prototype.patch b/meta-openembedded/meta-networking/recipes-connectivity/ez-ipupdate/files/0001-ez-ipupdate-Include-time.h-for-time-API-prototype.patch
new file mode 100644
index 0000000..b2a2ebd
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-connectivity/ez-ipupdate/files/0001-ez-ipupdate-Include-time.h-for-time-API-prototype.patch
@@ -0,0 +1,44 @@
+From 6c8fe883df993b9e7987c8f1c849962f8007a373 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 12:50:37 -0700
+Subject: [PATCH] ez-ipupdate: Include time.h for time() API prototype
+
+Fix printf format specifiers for snprintf
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ ez-ipupdate.c | 6 ++++--
+ 1 file changed, 4 insertions(+), 2 deletions(-)
+
+diff --git a/ez-ipupdate.c b/ez-ipupdate.c
+index 15a4c13..5cbe7f3 100644
+--- a/ez-ipupdate.c
++++ b/ez-ipupdate.c
+@@ -38,6 +38,8 @@
+ #  include <config.h>
+ #endif
+ 
++#include <time.h>
++
+ // you man very well need to edit this, don't worry though, email is only sent
+ // if bad things happend and it has to exit when in daemon mode.
+ #define SEND_EMAIL_CMD "mail"
+@@ -2483,7 +2485,7 @@ int DHS_update_entry(void)
+   p += strlen(p);
+   limit = BUFFER_SIZE - 1 - strlen(buf);
+ 
+-  snprintf(buf, BUFFER_SIZE, "Content-length: %d\015\012", strlen(putbuf));
++  snprintf(buf, BUFFER_SIZE, "Content-length: %lu\015\012", strlen(putbuf));
+   output(buf);
+   snprintf(buf, BUFFER_SIZE, "\015\012");
+   output(buf);
+@@ -2620,7 +2622,7 @@ int DHS_update_entry(void)
+     p += strlen(p);
+     limit = BUFFER_SIZE - 1 - strlen(buf);
+ 
+-    snprintf(buf, BUFFER_SIZE, "Content-length: %d\015\012", strlen(putbuf));
++    snprintf(buf, BUFFER_SIZE, "Content-length: %lu\015\012", strlen(putbuf));
+     output(buf);
+     snprintf(buf, BUFFER_SIZE, "\015\012");
+     output(buf);
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager-openvpn_1.8.18.bb b/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager-openvpn_1.10.0.bb
similarity index 94%
rename from meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager-openvpn_1.8.18.bb
rename to meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager-openvpn_1.10.0.bb
index 1d0c38e..1fcb435 100644
--- a/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager-openvpn_1.8.18.bb
+++ b/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager-openvpn_1.10.0.bb
@@ -10,7 +10,7 @@
 
 SRC_URI = "${GNOME_MIRROR}/NetworkManager-openvpn/${@gnome_verdir("${PV}")}/NetworkManager-openvpn-${PV}.tar.xz"
 
-SRC_URI[sha256sum] = "53dfb0acf64478adc728074f162f62e60bff62a605bd897eb88b267e7057927a"
+SRC_URI[sha256sum] = "5745d4107f3398a97afbb93f69239b510c9e45eb7fb62d60d9ed0a7297f4d101"
 
 S = "${WORKDIR}/NetworkManager-openvpn-${PV}"
 
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager/0001-do-not-ask-host-for-ifcfg-defaults.patch b/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager/0001-do-not-ask-host-for-ifcfg-defaults.patch
deleted file mode 100644
index 92b1293..0000000
--- a/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager/0001-do-not-ask-host-for-ifcfg-defaults.patch
+++ /dev/null
@@ -1,24 +0,0 @@
-From 37559b659e22886d5f55837d4f167ba5fda73e85 Mon Sep 17 00:00:00 2001
-From: Adrian Freihofer <adrian.freihofer@siemens.com>
-Date: Sun, 29 Aug 2021 15:50:35 +0200
-Subject: [PATCH] do-not-ask-host-for-ifcfg-defaults
-
----
- meson.build | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/meson.build b/meson.build
-index d0cec83..289aa1b 100644
---- a/meson.build
-+++ b/meson.build
-@@ -304,8 +304,8 @@ else
-   distro = 'unknown'
- endif
- 
--enable_ifcfg_rh = get_option('ifcfg_rh') or (distro == 'redhat')
--enable_ifupdown = get_option('ifupdown') or (distro == 'debian')
-+enable_ifcfg_rh = get_option('ifcfg_rh')
-+enable_ifupdown = get_option('ifupdown')
- 
- config_plugins_default = get_option('config_plugins_default')
- config_h.set_quoted('NM_CONFIG_DEFAULT_MAIN_PLUGINS', config_plugins_default)
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager_1.38.0.bb b/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager_1.40.0.bb
similarity index 97%
rename from meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager_1.38.0.bb
rename to meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager_1.40.0.bb
index ebd25a8..10241e1 100644
--- a/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager_1.38.0.bb
+++ b/meta-openembedded/meta-networking/recipes-connectivity/networkmanager/networkmanager_1.40.0.bb
@@ -29,9 +29,8 @@
     file://${BPN}.initd \
     file://enable-dhcpcd.conf \
     file://enable-iwd.conf \
-    file://0001-do-not-ask-host-for-ifcfg-defaults.patch \
 "
-SRC_URI[sha256sum] = "82a4cf07ddfeb0816787b67c0f5058ae6c50d6259c0b0541a24e35156062b2ef"
+SRC_URI[sha256sum] = "aee7e057bc2cca5dab84e41f15f1da8b795eb290747b04cbeee822bad9e9fc03"
 
 S = "${WORKDIR}/NetworkManager-${PV}"
 
@@ -88,7 +87,7 @@
 # consolekit is not picked by shlibs, so add it to RDEPENDS too
 PACKAGECONFIG[consolekit] = "-Dsession_tracking_consolekit=true,-Dsession_tracking_consolekit=false,consolekit,consolekit"
 PACKAGECONFIG[modemmanager] = "-Dmodem_manager=true,-Dmodem_manager=false,modemmanager mobile-broadband-provider-info"
-PACKAGECONFIG[ppp] = "-Dppp=true -Dpppd=${sbindir}/pppd,-Dppp=false,ppp,ppp"
+PACKAGECONFIG[ppp] = "-Dppp=true -Dpppd=${sbindir}/pppd,-Dppp=false,ppp"
 PACKAGECONFIG[dnsmasq] = "-Ddnsmasq=${bindir}/dnsmasq"
 PACKAGECONFIG[nss] = "-Dcrypto=nss,,nss"
 PACKAGECONFIG[resolvconf] = "-Dresolvconf=${base_sbindir}/resolvconf,-Dresolvconf=no,,resolvconf"
@@ -233,16 +232,21 @@
     ${nonarch_base_libdir}/udev/* \
     ${nonarch_libdir}/firewalld \
     ${nonarch_libdir}/NetworkManager/conf.d \
+    ${nonarch_libdir}/NetworkManager/dispatcher.d/no-wait.d \
     ${nonarch_libdir}/NetworkManager/dispatcher.d/pre-down.d \
     ${nonarch_libdir}/NetworkManager/dispatcher.d/pre-up.d \
-    ${nonarch_libdir}/NetworkManager/dispatcher.d/no-wait.d \
     ${nonarch_libdir}/NetworkManager/system-connections \
     ${nonarch_libdir}/NetworkManager/VPN \
     ${sbindir}/NetworkManager \
     ${sysconfdir}/init.d/network-manager \
     ${sysconfdir}/NetworkManager \
+    ${sysconfdir}/resolv-conf.NetworkManager \
+    ${sysconfdir}/sysconfig/network-scripts \
     ${systemd_system_unitdir} \
 "
+RDEPENDS:${PN}-daemon += "\
+    ${@bb.utils.contains('PACKAGECONFIG', 'ifupdown', 'bash', '', d)} \
+"
 RRECOMMENDS:${PN}-daemon += "\
     ${NETWORKMANAGER_FIREWALL_DEFAULT} \
     ${@bb.utils.filter('PACKAGECONFIG', 'dnsmasq', d)} \
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/0001-bn_mul.h-fix-x86-PIC-inline-ASM-compilation-with-GCC.patch b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/0001-bn_mul.h-fix-x86-PIC-inline-ASM-compilation-with-GCC.patch
new file mode 100644
index 0000000..8122e72
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/0001-bn_mul.h-fix-x86-PIC-inline-ASM-compilation-with-GCC.patch
@@ -0,0 +1,68 @@
+From c0546e351f6d7ab50eb1de8cef1d0d167760fccc Mon Sep 17 00:00:00 2001
+From: Peter Korsgaard <peter@korsgaard.com>
+Date: Mon, 27 Aug 2018 22:50:57 +0200
+Subject: [PATCH] bn_mul.h: fix x86 PIC inline ASM compilation with GCC < 5
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Fixes #1910
+
+With ebx added to the MULADDC_STOP clobber list to fix #1550, the inline
+assembly fails to build with GCC < 5 in PIC mode with the following error:
+
+include/mbedtls/bn_mul.h:46:13: error: PIC register clobbered by ‘ebx’ in ‘asm’
+
+This is because older GCC versions treated the x86 ebx register (which is
+used for the GOT) as a fixed reserved register when building as PIC.
+
+This is fixed by an improved register allocator in GCC 5+.  From the release
+notes:
+
+Register allocation improvements: Reuse of the PIC hard register, instead of
+using a fixed register, was implemented on x86/x86-64 targets.  This
+improves generated PIC code performance as more hard registers can be used.
+
+https://www.gnu.org/software/gcc/gcc-5/changes.html
+
+As a workaround, detect this situation and disable the inline assembly,
+similar to the MULADDC_CANNOT_USE_R7 logic.
+
+Upstream-Status: Backport [https://github.com/Mbed-TLS/mbedtls/commit/c0546e351f6d7ab50eb1de8cef1d0d167760fccc]
+Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
+---
+ library/bn_mul.h | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/third_party/openthread/repo/third_party/mbedtls/repo/include/mbedtls/bn_mul.h
++++ b/third_party/openthread/repo/third_party/mbedtls/repo/include/mbedtls/bn_mul.h
+@@ -55,12 +55,28 @@
+     ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
+ 
+ /*
++ * GCC < 5.0 treated the x86 ebx (which is used for the GOT) as a
++ * fixed reserved register when building as PIC, leading to errors
++ * like: bn_mul.h:46:13: error: PIC register clobbered by 'ebx' in 'asm'
++ *
++ * This is fixed by an improved register allocator in GCC 5+. From the
++ * release notes:
++ * Register allocation improvements: Reuse of the PIC hard register,
++ * instead of using a fixed register, was implemented on x86/x86-64
++ * targets. This improves generated PIC code performance as more hard
++ * registers can be used.
++ */
++#if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
++#define MULADDC_CANNOT_USE_EBX
++#endif
++
++/*
+  * Disable use of the i386 assembly code below if option -O0, to disable all
+  * compiler optimisations, is passed, detected with __OPTIMIZE__
+  * This is done as the number of registers used in the assembly code doesn't
+  * work with the -O0 option.
+  */
+-#if defined(__i386__) && defined(__OPTIMIZE__)
++#if defined(__i386__) && defined(__OPTIMIZE__) && !defined(MULADDC_CANNOT_USE_EBX)
+ 
+ #define MULADDC_INIT                        \
+     asm(                                    \
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/mbedtls.patch b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/mbedtls.patch
new file mode 100644
index 0000000..91b3046
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/mbedtls.patch
@@ -0,0 +1,43 @@
+mbedtls: Disable documentation warning as error with clang
+
+There are shortcomings with doxygen info which clang-15+ flags, dont
+treat them as errors
+
+Remove unused variable
+
+Fixes
+library/bignum.c:1395:29: error: variable 't' set but not used [-Werror,-Wunused-but-set-variable]
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+--- a/third_party/openthread/repo/third_party/mbedtls/repo/library/bignum.c
++++ b/third_party/openthread/repo/third_party/mbedtls/repo/library/bignum.c
+@@ -1544,7 +1544,7 @@ __attribute__ ((noinline))
+ #endif
+ void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mpi_uint b )
+ {
+-    mbedtls_mpi_uint c = 0, t = 0;
++    mbedtls_mpi_uint c = 0;
+ 
+ #if defined(MULADDC_HUIT)
+     for( ; i >= 8; i -= 8 )
+@@ -1595,8 +1595,6 @@ void mpi_mul_hlp( size_t i, mbedtls_mpi_
+     }
+ #endif /* MULADDC_HUIT */
+ 
+-    t++;
+-
+     do {
+         *d += c; c = ( *d < c ); d++;
+     }
+--- a/third_party/openthread/repo/third_party/mbedtls/repo/CMakeLists.txt
++++ b/third_party/openthread/repo/third_party/mbedtls/repo/CMakeLists.txt
+@@ -192,7 +192,7 @@ if(CMAKE_COMPILER_IS_GNU)
+ endif(CMAKE_COMPILER_IS_GNU)
+ 
+ if(CMAKE_COMPILER_IS_CLANG)
+-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wwrite-strings -Wpointer-arith -Wimplicit-fallthrough -Wshadow -Wvla")
++    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wwrite-strings -Wpointer-arith -Wimplicit-fallthrough -Wshadow -Wvla -Wno-error=documentation")
+     set(CMAKE_C_FLAGS_RELEASE     "-O2")
+     set(CMAKE_C_FLAGS_DEBUG       "-O0 -g3")
+     set(CMAKE_C_FLAGS_COVERAGE    "-O0 -g3 --coverage")
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/unused_var.patch b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/unused_var.patch
new file mode 100644
index 0000000..9727cba
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix/unused_var.patch
@@ -0,0 +1,10 @@
+--- a/third_party/openthread/repo/src/cli/cli.cpp
++++ b/third_party/openthread/repo/src/cli/cli.cpp
+@@ -1785,6 +1785,7 @@ template <> otError Interpreter::Process
+ 
+     for (uint8_t i = 0;; i++)
+     {
++    	OT_UNUSED_VARIABLE(i);
+         SuccessOrExit(otThreadGetNextCacheEntry(GetInstancePtr(), &entry, &iterator));
+         OutputEidCacheEntry(entry);
+     }
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix_git.bb b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix_git.bb
index a16b778..720228d 100644
--- a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix_git.bb
+++ b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-br-posix_git.bb
@@ -17,14 +17,18 @@
 SRC_URI = "gitsm://github.com/openthread/ot-br-posix.git;protocol=https;branch=main \
            file://0001-otbr-agent.service.in-remove-pre-exec-hook-for-mdns-.patch \
            file://0001-cmake-Disable-nonnull-compare-warning-on-gcc.patch \
+           file://0001-bn_mul.h-fix-x86-PIC-inline-ASM-compilation-with-GCC.patch \
+           file://mbedtls.patch \
+           file://unused_var.patch \
            "
 
 S = "${WORKDIR}/git"
 SYSTEMD_SERVICE:${PN} = "otbr-agent.service"
 
 inherit pkgconfig cmake systemd
-
-CXXFLAGS:append:libc-musl:toolchain-clang = " -Wno-error=sign-compare"
+# openthread/repo/src/cli/cli.cpp:1786:18: fatal error: variable 'i' set but not used [-Wunused-but-set-variable]
+#    for (uint8_t i = 0;; i++)
+CXXFLAGS:append:libc-musl:toolchain-clang = " -Wno-error=sign-compare -Wno-error=unused-but-set-variable"
 
 EXTRA_OECMAKE = "-DBUILD_TESTING=OFF \
                  -DOTBR_DBUS=ON \
@@ -53,7 +57,7 @@
                  -DOT_DHCP6_SERVER=ON \
                  "
 
-RDEPENDS:${PN} = "iproute2 avahi-daemon"
+RDEPENDS:${PN} = "iproute2 ipset avahi-daemon"
 
 RCONFLICTS:${PN} = "ot-daemon"
 
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon/0001-bn_mul.h-fix-x86-PIC-inline-ASM-compilation-with-GCC.patch b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon/0001-bn_mul.h-fix-x86-PIC-inline-ASM-compilation-with-GCC.patch
new file mode 100644
index 0000000..c9edb00
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon/0001-bn_mul.h-fix-x86-PIC-inline-ASM-compilation-with-GCC.patch
@@ -0,0 +1,68 @@
+From c0546e351f6d7ab50eb1de8cef1d0d167760fccc Mon Sep 17 00:00:00 2001
+From: Peter Korsgaard <peter@korsgaard.com>
+Date: Mon, 27 Aug 2018 22:50:57 +0200
+Subject: [PATCH] bn_mul.h: fix x86 PIC inline ASM compilation with GCC < 5
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Fixes #1910
+
+With ebx added to the MULADDC_STOP clobber list to fix #1550, the inline
+assembly fails to build with GCC < 5 in PIC mode with the following error:
+
+include/mbedtls/bn_mul.h:46:13: error: PIC register clobbered by ‘ebx’ in ‘asm’
+
+This is because older GCC versions treated the x86 ebx register (which is
+used for the GOT) as a fixed reserved register when building as PIC.
+
+This is fixed by an improved register allocator in GCC 5+.  From the release
+notes:
+
+Register allocation improvements: Reuse of the PIC hard register, instead of
+using a fixed register, was implemented on x86/x86-64 targets.  This
+improves generated PIC code performance as more hard registers can be used.
+
+https://www.gnu.org/software/gcc/gcc-5/changes.html
+
+As a workaround, detect this situation and disable the inline assembly,
+similar to the MULADDC_CANNOT_USE_R7 logic.
+
+Upstream-Status: Backport [https://github.com/Mbed-TLS/mbedtls/commit/c0546e351f6d7ab50eb1de8cef1d0d167760fccc]
+Signed-off-by: Peter Korsgaard <peter@korsgaard.com>
+---
+ library/bn_mul.h | 18 +++++++++++++++++-
+ 1 file changed, 17 insertions(+), 1 deletion(-)
+
+--- a/third_party/mbedtls/repo/include/mbedtls/bn_mul.h
++++ b/third_party/mbedtls/repo/include/mbedtls/bn_mul.h
+@@ -55,12 +55,28 @@
+     ( !defined(__ARMCC_VERSION) || __ARMCC_VERSION >= 6000000 )
+ 
+ /*
++ * GCC < 5.0 treated the x86 ebx (which is used for the GOT) as a
++ * fixed reserved register when building as PIC, leading to errors
++ * like: bn_mul.h:46:13: error: PIC register clobbered by 'ebx' in 'asm'
++ *
++ * This is fixed by an improved register allocator in GCC 5+. From the
++ * release notes:
++ * Register allocation improvements: Reuse of the PIC hard register,
++ * instead of using a fixed register, was implemented on x86/x86-64
++ * targets. This improves generated PIC code performance as more hard
++ * registers can be used.
++ */
++#if defined(__GNUC__) && __GNUC__ < 5 && defined(__PIC__)
++#define MULADDC_CANNOT_USE_EBX
++#endif
++
++/*
+  * Disable use of the i386 assembly code below if option -O0, to disable all
+  * compiler optimisations, is passed, detected with __OPTIMIZE__
+  * This is done as the number of registers used in the assembly code doesn't
+  * work with the -O0 option.
+  */
+-#if defined(__i386__) && defined(__OPTIMIZE__)
++#if defined(__i386__) && defined(__OPTIMIZE__) && !defined(MULADDC_CANNOT_USE_EBX)
+ 
+ #define MULADDC_INIT                        \
+     asm(                                    \
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon/mbedtls.patch b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon/mbedtls.patch
new file mode 100644
index 0000000..be26a20
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon/mbedtls.patch
@@ -0,0 +1,43 @@
+mbedtls: Disable documentation warning as error with clang
+
+There are shortcomings with doxygen info which clang-15+ flags, dont
+treat them as errors
+
+Remove unused variable
+
+Fixes
+library/bignum.c:1395:29: error: variable 't' set but not used [-Werror,-Wunused-but-set-variable]
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+--- a/third_party/mbedtls/repo/library/bignum.c
++++ b/third_party/mbedtls/repo/library/bignum.c
+@@ -1544,7 +1544,7 @@ __attribute__ ((noinline))
+ #endif
+ void mpi_mul_hlp( size_t i, mbedtls_mpi_uint *s, mbedtls_mpi_uint *d, mbedtls_mpi_uint b )
+ {
+-    mbedtls_mpi_uint c = 0, t = 0;
++    mbedtls_mpi_uint c = 0;
+ 
+ #if defined(MULADDC_HUIT)
+     for( ; i >= 8; i -= 8 )
+@@ -1595,8 +1595,6 @@ void mpi_mul_hlp( size_t i, mbedtls_mpi_
+     }
+ #endif /* MULADDC_HUIT */
+ 
+-    t++;
+-
+     do {
+         *d += c; c = ( *d < c ); d++;
+     }
+--- a/third_party/mbedtls/repo/CMakeLists.txt
++++ b/third_party/mbedtls/repo/CMakeLists.txt
+@@ -192,7 +192,7 @@ if(CMAKE_COMPILER_IS_GNU)
+ endif(CMAKE_COMPILER_IS_GNU)
+ 
+ if(CMAKE_COMPILER_IS_CLANG)
+-    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wwrite-strings -Wpointer-arith -Wimplicit-fallthrough -Wshadow -Wvla")
++    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall -Wextra -Wwrite-strings -Wpointer-arith -Wimplicit-fallthrough -Wshadow -Wvla -Wno-error=documentation")
+     set(CMAKE_C_FLAGS_RELEASE     "-O2")
+     set(CMAKE_C_FLAGS_DEBUG       "-O0 -g3")
+     set(CMAKE_C_FLAGS_COVERAGE    "-O0 -g3 --coverage")
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon_git.bb b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon_git.bb
index f3f4c70..18703d6 100644
--- a/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon_git.bb
+++ b/meta-openembedded/meta-networking/recipes-connectivity/openthread/ot-daemon_git.bb
@@ -12,6 +12,8 @@
 PV = "0.1+git${SRCPV}"
 
 SRC_URI = "git://github.com/openthread/openthread.git;protocol=https;branch=main \
+           file://0001-bn_mul.h-fix-x86-PIC-inline-ASM-compilation-with-GCC.patch \
+           file://mbedtls.patch \
            "
 
 S = "${WORKDIR}/git"
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/ufw/ufw_0.36.1.bb b/meta-openembedded/meta-networking/recipes-connectivity/ufw/ufw_0.36.1.bb
index b6a768e..c479eef 100644
--- a/meta-openembedded/meta-networking/recipes-connectivity/ufw/ufw_0.36.1.bb
+++ b/meta-openembedded/meta-networking/recipes-connectivity/ufw/ufw_0.36.1.bb
@@ -70,5 +70,5 @@
 
 REQUIRED_DISTRO_FEATURES = "ipv6"
 
-DISTUTILS_BUILD_ARGS:append = " --iptables-dir /usr/sbin"
-DISTUTILS_INSTALL_ARGS:append = " --iptables-dir /usr/sbin"
+SETUPTOOLS_BUILD_ARGS:append = " --iptables-dir /usr/sbin"
+SETUPTOOLS_INSTALL_ARGS:append = " --iptables-dir /usr/sbin"
diff --git a/meta-openembedded/meta-networking/recipes-connectivity/wolfssl/wolfssl_5.4.0.bb b/meta-openembedded/meta-networking/recipes-connectivity/wolfssl/wolfssl_5.5.0.bb
similarity index 93%
rename from meta-openembedded/meta-networking/recipes-connectivity/wolfssl/wolfssl_5.4.0.bb
rename to meta-openembedded/meta-networking/recipes-connectivity/wolfssl/wolfssl_5.5.0.bb
index 6918ece..3aa4f40 100644
--- a/meta-openembedded/meta-networking/recipes-connectivity/wolfssl/wolfssl_5.4.0.bb
+++ b/meta-openembedded/meta-networking/recipes-connectivity/wolfssl/wolfssl_5.5.0.bb
@@ -13,7 +13,7 @@
 RPROVIDES:${PN} = "cyassl"
 
 SRC_URI = "git://github.com/wolfSSL/wolfssl.git;protocol=https;branch=master"
-SRCREV = "57aac1c50b45275c7a99eca32ad985998b292dc8"
+SRCREV = "44f81f8bc082319cebf0e37df8470aa5748c1355"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-networking/recipes-core/packagegroups/packagegroup-meta-networking.bb b/meta-openembedded/meta-networking/recipes-core/packagegroups/packagegroup-meta-networking.bb
index d5a2dfa..11437e2 100644
--- a/meta-openembedded/meta-networking/recipes-core/packagegroups/packagegroup-meta-networking.bb
+++ b/meta-openembedded/meta-networking/recipes-core/packagegroups/packagegroup-meta-networking.bb
@@ -259,6 +259,7 @@
     ettercap \
 "
 RDEPENDS:packagegroup-meta-networking-support:remove:mipsarch = "memcached"
+RDEPENDS:packagegroup-meta-networking-support:remove:libc-musl = "ypbind-mt"
 
 EXCLUDE_FROM_WORLD = "1"
 # Empty packages, only devel headers and libs
diff --git a/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs/0001-autofs-5.1.8-add-autofs_strerror_r-helper-for-musl.patch b/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs/0001-autofs-5.1.8-add-autofs_strerror_r-helper-for-musl.patch
new file mode 100644
index 0000000..5fd9a8d
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs/0001-autofs-5.1.8-add-autofs_strerror_r-helper-for-musl.patch
@@ -0,0 +1,54 @@
+From 88f991b0ebb6fb8fcaad3d0eb8fb51a7439d053e Mon Sep 17 00:00:00 2001
+From: Fabian Groffen <grobian@gentoo.org>
+Date: Wed, 2 Feb 2022 09:27:13 +0800
+Subject: [PATCH 1/2] autofs-5.1.8 - add autofs_strerror_r() helper for musl
+
+If using musl libc the XSI-compliant variant strerror_r() which returns
+an integer instead of a pointer so add a helper function to handle this
+case.
+
+Signed-off-by: Fabian Groffen <grobian@gentoo.org>
+Signed-off-by: Ian Kent <raven@themaw.net>
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ include/automount.h |  5 +++++
+ lib/log.c           | 10 ++++++++++
+ 2 files changed, 15 insertions(+)
+
+diff --git a/include/automount.h b/include/automount.h
+index 8cd8b3a..f759e59 100644
+--- a/include/automount.h
++++ b/include/automount.h
+@@ -51,6 +51,11 @@
+ # endif
+ #endif
+ 
++#ifndef __GLIBC__
++# define strerror_r(N,B,S) autofs_strerror_r(N,B,S)
++char *autofs_strerror_r(int errnum, char *buf, size_t buflen);  /* GNU */
++#endif
++
+ /* We MUST have the paths to mount(8) and umount(8) */
+ #ifndef HAVE_MOUNT
+ #error Failed to locate mount(8)!
+diff --git a/lib/log.c b/lib/log.c
+index 39b1e3b..b99fa39 100644
+--- a/lib/log.c
++++ b/lib/log.c
+@@ -368,3 +368,13 @@ pid_t log_pidinfo(struct autofs_point *ap, pid_t pid, char *label) {
+ 
+ 	return ppid;
+ }
++
++#ifndef __GLIBC__
++# undef strerror_r
++char *autofs_strerror_r(int errnum, char *buf, size_t buflen) {
++	int s = strerror_r(errnum, buf, buflen);
++	if (s)
++		return NULL;
++	return buf;
++}
++#endif
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs/0002-autofs-5.1.8-handle-innetgr-not-present-in-musl.patch b/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs/0002-autofs-5.1.8-handle-innetgr-not-present-in-musl.patch
new file mode 100644
index 0000000..9d0caae
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs/0002-autofs-5.1.8-handle-innetgr-not-present-in-musl.patch
@@ -0,0 +1,106 @@
+From 1c0b0b70a276280f431d72319109a0bbc0267970 Mon Sep 17 00:00:00 2001
+From: Fabian Groffen <grobian@gentoo.org>
+Date: Wed, 2 Feb 2022 10:15:22 +0800
+Subject: [PATCH 2/2] autofs-5.1.8 - handle innetgr() not present in musl
+
+The function innetgr(3) may not be present in musl libc, add a check
+for this.
+
+Originally contributed by Fabian, modified by me.
+
+Upstream-Status: Backport [https://git.kernel.org/pub/scm/linux/storage/autofs/autofs.git/commit/?id=f60e40af3c038b8955325a11b7294ad38c15c9e8]
+Signed-off-by: Fabian Groffen <grobian@gentoo.org>
+Signed-off-by: Ian Kent <raven@themaw.net>
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure           | 6 ++++++
+ configure.in        | 2 +-
+ include/config.h.in | 3 +++
+ modules/parse_amd.c | 7 +++++++
+ 4 files changed, 17 insertions(+), 1 deletion(-)
+
+--- a/configure.in
++++ b/configure.in
+@@ -169,7 +169,7 @@ AF_CHECK_SSS_LIB(SSS_AUTOFS, libsss_auto
+ AC_SUBST(HAVE_SSS_AUTOFS)
+ AC_SUBST(sssldir)
+ 
+-AC_CHECK_FUNCS(pipe2)
++AC_CHECK_FUNCS(pipe2 innetgr)
+ 
+ #
+ # Newer mounts have the -s (sloppy) option to ignore unknown options,
+--- a/include/config.h.in
++++ b/include/config.h.in
+@@ -30,6 +30,9 @@
+ /* Define to 1 if you have the `getservbyname' function. */
+ #undef HAVE_GETSERVBYNAME
+ 
++/* Define to 1 if you have the `innetgr' function. */
++#undef HAVE_INNETGR
++
+ /* Define to 1 if you have the <inttypes.h> header file. */
+ #undef HAVE_INTTYPES_H
+ 
+@@ -45,9 +48,6 @@
+ /* Define if you have the Linux /proc filesystem. */
+ #undef HAVE_LINUX_PROCFS
+ 
+-/* Define to 1 if you have the <memory.h> header file. */
+-#undef HAVE_MEMORY_H
+-
+ /* define if you have MOUNT */
+ #undef HAVE_MOUNT
+ 
+@@ -69,6 +69,9 @@
+ /* Define to 1 if you have the <stdint.h> header file. */
+ #undef HAVE_STDINT_H
+ 
++/* Define to 1 if you have the <stdio.h> header file. */
++#undef HAVE_STDIO_H
++
+ /* Define to 1 if you have the <stdlib.h> header file. */
+ #undef HAVE_STDLIB_H
+ 
+@@ -141,7 +144,9 @@
+ /* define if you have YACC */
+ #undef PATH_YACC
+ 
+-/* Define to 1 if you have the ANSI C header files. */
++/* Define to 1 if all of the C90 standard headers exist (not just the ones
++   required in a freestanding environment). This macro is provided for
++   backward compatibility; new code need not use it. */
+ #undef STDC_HEADERS
+ 
+ /* Define to 1 to use the libtirpc tsd usage workaround */
+--- a/modules/parse_amd.c
++++ b/modules/parse_amd.c
+@@ -424,6 +424,7 @@ static int sel_in_network(struct autofs_
+ 	return ret;
+ }
+ 
++#ifdef HAVE_INNETGR
+ static int sel_netgrp(struct autofs_point *ap,
+ 		      struct selector *s, struct substvar *sv)
+ {
+@@ -488,6 +489,7 @@ out:
+ 
+ 	return ret;
+ }
++#endif
+ 
+ static int eval_selector(struct autofs_point *ap,
+ 			 struct amd_entry *this, struct substvar *sv)
+@@ -627,7 +629,12 @@ static int eval_selector(struct autofs_p
+ 		switch (s->sel->selector) {
+ 		case SEL_NETGRP:
+ 		case SEL_NETGRPD:
++#ifndef HAVE_INNETGR
++			error(logopt, MODPREFIX
++			      "netgroups not available, function innetgr(3) not available");
++#else
+ 			ret = sel_netgrp(ap, s, sv);
++#endif
+ 			break;
+ 
+ 		default:
diff --git a/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs_5.1.8.bb b/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs_5.1.8.bb
index cb80844..ca11f1a 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs_5.1.8.bb
+++ b/meta-openembedded/meta-networking/recipes-daemons/autofs/autofs_5.1.8.bb
@@ -27,6 +27,8 @@
            file://0001-Bug-fix-for-pid_t-not-found-on-musl.patch \
            file://0001-Define-__SWORD_TYPE-if-undefined.patch \
            file://mount_conflict.patch \
+           file://0001-autofs-5.1.8-add-autofs_strerror_r-helper-for-musl.patch \
+           file://0002-autofs-5.1.8-handle-innetgr-not-present-in-musl.patch \
            "
 SRC_URI[sha256sum] = "0bd401c56f0eb1ca6251344c3a3d70bface3eccf9c67117cd184422c4cace30c"
 
diff --git a/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool/0001-pppd-ippool.c-Fix-type-casting-issues-between-in_add.patch b/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool/0001-pppd-ippool.c-Fix-type-casting-issues-between-in_add.patch
new file mode 100644
index 0000000..a98c179
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool/0001-pppd-ippool.c-Fix-type-casting-issues-between-in_add.patch
@@ -0,0 +1,72 @@
+From da67444994bde603c7ff1483a6803bdab24e1f14 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 09:36:55 -0700
+Subject: [PATCH 1/2] pppd/ippool.c: Fix type casting issues between in_addr
+ and ippool_api_ip_addr
+
+Also remove unused variabled
+
+Upstream-Status: Inappropriate [No upstream]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ pppd/ippool.c | 13 ++++++-------
+ 1 file changed, 6 insertions(+), 7 deletions(-)
+
+--- a/pppd/ippool.c
++++ b/pppd/ippool.c
+@@ -16,6 +16,7 @@
+ #include <sys/ioctl.h>
+ #include <sys/socket.h>
+ #include <netinet/in.h>
++#include <arpa/inet.h>
+ #include <signal.h>
+ 
+ #include <linux/types.h>
+@@ -24,7 +25,6 @@
+ 
+ const char pppd_version[] = VERSION;
+ 
+-static int ippool_fd = -1;
+ static char *ippool_pool_name = NULL;
+ static char *ippool_pool_name2 = NULL;
+ static char *ippool_server = "localhost";
+@@ -64,9 +64,9 @@ static int ippool_addr_alloc(CLIENT *cl,
+ 	}
+ 
+ 	*addr = clnt_res.addr.s_addr;
+-
++	struct in_addr temp_addr = {*addr};
+ 	if (ippool_debug) {
+-		dbglog("Allocated address %s from pool %s", inet_ntoa(clnt_res.addr.s_addr), pool_name);
++		dbglog("Allocated address %s from pool %s", inet_ntoa(temp_addr), pool_name);
+ 	}
+ out:
+ 	return result;
+@@ -85,14 +85,16 @@ static void ippool_addr_free(CLIENT *cl,
+ 	}
+ 	if (clnt_res < 0) {
+ 		if (ippool_debug) {
++			struct in_addr temp_addr = {free_addr.s_addr};
+ 			warn("IP address %s free to pool %s failed: %s", 
+-			     inet_ntoa(free_addr), pool_name, strerror(-clnt_res));
++			     inet_ntoa(temp_addr), pool_name, strerror(-clnt_res));
+ 		}
+ 		goto out;
+ 	}
+ 
+ 	if (ippool_debug) {
+-		dbglog("Freed address %s to pool %s", inet_ntoa(free_addr), pool_name);
++		struct in_addr temp_addr = {free_addr.s_addr};
++		dbglog("Freed address %s to pool %s", inet_ntoa(temp_addr), pool_name);
+ 	}
+ out:
+ 	return;
+@@ -138,8 +140,6 @@ static void ippool_choose_ip(u_int32_t *
+ {
+ 	ipcp_options *wo = &ipcp_wantoptions[0];
+ 	ipcp_options *go = &ipcp_gotoptions[0];
+-	ipcp_options *ao = &ipcp_allowoptions[0];
+-	ipcp_options *ho = &ipcp_hisoptions[0];
+ 	CLIENT *cl;
+ 	int result = 0;
+ 
diff --git a/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool/0002-ippool_rpc_server.c-Add-missing-prototype-for-ippool.patch b/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool/0002-ippool_rpc_server.c-Add-missing-prototype-for-ippool.patch
new file mode 100644
index 0000000..b8fdedf
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool/0002-ippool_rpc_server.c-Add-missing-prototype-for-ippool.patch
@@ -0,0 +1,22 @@
+From f9ea91771f0d3c984e7d5fe9e15962db1ee686ad Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 09:39:16 -0700
+Subject: [PATCH 2/2] ippool_rpc_server.c: Add missing prototype for
+ ippool_api_rpc_check_request
+
+Upstream-Status: Inappropriate [no upstream]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ ippool_rpc_server.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/Makefile
++++ b/Makefile
+@@ -123,6 +123,7 @@ $(IPPOOL_RPC_STEM)_server.c:	$(IPPOOL_RP
+ 			-$(RM) $@ $@.tmp
+ 			rpcgen $(RPCGENFLAGS) -m -o $@.tmp $<
+ 			cat $@.tmp | sed -e 's/switch (rqstp->rq_proc) {/if (ippool_api_rpc_check_request(transp) < 0) return; switch (rqstp->rq_proc) {/' > $@
++			sed -i '20i int ippool_api_rpc_check_request(SVCXPRT *xprt);' $@
+ 
+ $(IPPOOL_RPC_STEM)_client.c:	$(IPPOOL_RPC_STEM).x
+ 			-$(RM) $@
diff --git a/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool_1.3.bb b/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool_1.3.bb
index b91ca53..984acac 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool_1.3.bb
+++ b/meta-openembedded/meta-networking/recipes-daemons/ippool/ippool_1.3.bb
@@ -27,6 +27,8 @@
            file://0002-link-with-libtirpc.patch \
            file://0003-musl-fixes.patch \
            file://strncpy-truncation.patch \
+           file://0001-pppd-ippool.c-Fix-type-casting-issues-between-in_add.patch \
+           file://0002-ippool_rpc_server.c-Add-missing-prototype-for-ippool.patch \
            "
 
 LIC_FILES_CHKSUM = "file://LICENSE;md5=4c59283b82fc2b166455e0fc23c71c6f"
diff --git a/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/files/0001-Makefile-Do-not-set-Werror.patch b/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/files/0001-Makefile-Do-not-set-Werror.patch
index d5e0deb..ab6ff6e 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/files/0001-Makefile-Do-not-set-Werror.patch
+++ b/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/files/0001-Makefile-Do-not-set-Werror.patch
@@ -1,4 +1,4 @@
-From 31d88f46bfc67de2659991674253a5d5dfb92afc Mon Sep 17 00:00:00 2001
+From 6afdfbdf1ecf3e7e9158734a3994a57ea151d680 Mon Sep 17 00:00:00 2001
 From: Khem Raj <raj.khem@gmail.com>
 Date: Wed, 12 Aug 2020 12:00:29 -0700
 Subject: [PATCH] Makefile: Do not set -Werror
@@ -9,16 +9,17 @@
 Upstream-Status: Inappropriate [OE-Specific]
 
 Signed-off-by: Khem Raj <raj.khem@gmail.com>
+
 ---
  usr/Makefile | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/usr/Makefile b/usr/Makefile
-index 21bb154..0018605 100644
+index 3fc2248..2b5a234 100644
 --- a/usr/Makefile
 +++ b/usr/Makefile
-@@ -35,7 +35,7 @@ endif
- PKG_CONFIG = /usr/bin/pkg-config
+@@ -44,7 +44,7 @@ HOMEDIR ?= $(etcdir)/iscsi
+ PKG_CONFIG ?= /usr/bin/pkg-config
  
  CFLAGS ?= -O2 -g
 -WARNFLAGS ?= -Wall -Wextra -Werror -Wstrict-prototypes -fno-common
@@ -26,6 +27,3 @@
  CFLAGS += $(WARNFLAGS) -I../include -I. -D_GNU_SOURCE \
  	  -I$(TOPDIR)/libopeniscsiusr
  CFLAGS += $(shell $(PKG_CONFIG) --cflags libkmod)
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/files/0002-iscsiuio-Use-pthread_t-for-INVALID_THREAD.patch b/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/files/0002-iscsiuio-Use-pthread_t-for-INVALID_THREAD.patch
new file mode 100644
index 0000000..02669e9
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/files/0002-iscsiuio-Use-pthread_t-for-INVALID_THREAD.patch
@@ -0,0 +1,27 @@
+From ef54a6f4a81da8ab653d352bfbd3b2521ce9eb7c Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 15:55:32 -0700
+Subject: [PATCH] iscsiuio: Use pthread_t for INVALID_THREAD
+
+pthread_t is opaque, therefore avoid compiler errors on musl when
+compiling since pthread_t is not a plain old data type, like glibc
+
+Upstream-Status: Submitted [https://github.com/open-iscsi/open-iscsi/pull/363]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ iscsiuio/src/unix/options.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/iscsiuio/src/unix/options.h b/iscsiuio/src/unix/options.h
+index 63b8635..873a98a 100644
+--- a/iscsiuio/src/unix/options.h
++++ b/iscsiuio/src/unix/options.h
+@@ -86,7 +86,7 @@
+ #define DEBUG_ON	0x2
+ 
+ #define INVALID_FD	-1
+-#define INVALID_THREAD	-1
++#define INVALID_THREAD	(pthread_t)-1
+ #define INVALID_HOST_NO	-1
+ 
+ struct options {
diff --git a/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/iscsi-initiator-utils_2.1.6.bb b/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/iscsi-initiator-utils_2.1.7.bb
similarity index 96%
rename from meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/iscsi-initiator-utils_2.1.6.bb
rename to meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/iscsi-initiator-utils_2.1.7.bb
index 921f7d4..3ed55d6 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/iscsi-initiator-utils_2.1.6.bb
+++ b/meta-openembedded/meta-networking/recipes-daemons/iscsi-initiator-utils/iscsi-initiator-utils_2.1.7.bb
@@ -12,7 +12,7 @@
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263"
 
-SRCREV ?= "ee575fd19429ec6dc24b49f2ce3822b4a01f57de"
+SRCREV = "7b53fcc502da8617110fd64d675b476772c28a6f"
 
 SRC_URI = "git://github.com/open-iscsi/open-iscsi;branch=master;protocol=https \
            file://0001-Makefile-Do-not-set-Werror.patch \
@@ -22,6 +22,7 @@
            file://iscsi-initiator.service \
            file://iscsi-initiator-targets.service \
            file://set_initiatorname \
+           file://0002-iscsiuio-Use-pthread_t-for-INVALID_THREAD.patch \
            "
 S = "${WORKDIR}/git"
 
@@ -41,6 +42,7 @@
     MANDIR="${mandir}" \
     OPTFLAGS="-DNO_SYSTEMD ${CFLAGS}" \
     PKG_CONFIG="${STAGING_BINDIR_NATIVE}/pkg-config" \
+    SED=sed \
 '
 
 
diff --git a/meta-openembedded/meta-networking/recipes-daemons/radvd/files/0001-Reverts-the-include.h-change-in-46883f8a1a02fe42040d.patch b/meta-openembedded/meta-networking/recipes-daemons/radvd/files/0001-Reverts-the-include.h-change-in-46883f8a1a02fe42040d.patch
new file mode 100644
index 0000000..57338d4
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-daemons/radvd/files/0001-Reverts-the-include.h-change-in-46883f8a1a02fe42040d.patch
@@ -0,0 +1,32 @@
+From 15f439c555289f900eb33111b010bf1266f97edb Mon Sep 17 00:00:00 2001
+From: Jonathan Davies <jpds@protonmail.com>
+Date: Thu, 25 Nov 2021 15:29:18 +0000
+Subject: [PATCH] Reverts the include.h change in
+ 46883f8a1a02fe42040dd8e48aec0ed871545d4d
+
+Closes: #158
+
+Upstream-Status: Backport [https://github.com/radvd-project/radvd/commit/06689f8c06f44c7e87f7ff1d814428f88375b53f]
+Signed-off-by: Jonathan Davies <jpds@protonmail.com>
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ includes.h | 5 -----
+ 1 file changed, 5 deletions(-)
+
+diff --git a/includes.h b/includes.h
+index ef30b10..c528c86 100644
+--- a/includes.h
++++ b/includes.h
+@@ -76,12 +76,7 @@
+ #include <sys/sysctl.h>
+ #endif
+ 
+-#if !defined(__GLIBC__) && defined(linux)
+-#include <linux/if.h>
+-#define IF_NAMESIZE IFNAMSIZ
+-#else
+ #include <net/if.h>
+-#endif
+ 
+ #ifdef HAVE_NET_IF_DL_H
+ #include <net/if_dl.h>
diff --git a/meta-openembedded/meta-networking/recipes-daemons/radvd/radvd.inc b/meta-openembedded/meta-networking/recipes-daemons/radvd/radvd.inc
deleted file mode 100644
index 2afaa48..0000000
--- a/meta-openembedded/meta-networking/recipes-daemons/radvd/radvd.inc
+++ /dev/null
@@ -1,67 +0,0 @@
-SUMMARY = "IPv6 router advertisement daemon"
-DESCRIPTION = "radvd is the router advertisement daemon for IPv6. It \
-listens to router solicitations and sends router \
-advertisements as described in RFC 2461, Neighbor \
-Discovery for IP Version 6 (IPv6). With these \
-advertisements hosts can automatically configure their \
-addresses and some other parameters. They also can \
-choose a default router based on these advertisements."
-HOMEPAGE = "http://www.litech.org/radvd/"
-SECTION = "net"
-DEPENDS = "flex-native bison-native libdaemon "
-
-# License is BSD-Style (with advertising clause) but also has an additional 0th clause
-LICENSE = "radvd"
-LIC_FILES_CHKSUM = "file://COPYRIGHT;md5=73ebbf7182ae996e65e8fadc9a8c45db"
-
-SRC_URI = "http://v6web.litech.org/radvd/dist/radvd-${PV}.tar.gz \
-           file://radvd.init \
-           file://radvd.service \
-           file://volatiles.03_radvd \
-           file://radvd.default \
-           file://radvd.conf"
-
-inherit autotools useradd pkgconfig systemd
-
-SYSTEMD_SERVICE:${PN} = "radvd.service"
-SYSTEMD_AUTO_ENABLE = "disable"
-
-do_install:append () {
-    install -m 0755 -d ${D}${sysconfdir}/init.d \
-                       ${D}${sysconfdir}/default/volatiles \
-                       ${D}${docdir}/radvd
-    # Install init script and volatiles
-    install -m 0755 ${WORKDIR}/radvd.init ${D}${sysconfdir}/init.d/radvd
-    sed -i 's!/usr/sbin/!${sbindir}/!g' ${D}${sysconfdir}/init.d/radvd
-    sed -i 's!/etc/!${sysconfdir}/!g' ${D}${sysconfdir}/init.d/radvd
-    sed -i 's!/var/!${localstatedir}/!g' ${D}${sysconfdir}/init.d/radvd
-    sed -i 's!^PATH=.*!PATH=${base_sbindir}:${base_bindir}:${sbindir}:${bindir}!' ${D}${sysconfdir}/init.d/radvd
-
-    install -m 0644 ${WORKDIR}/volatiles.03_radvd ${D}${sysconfdir}/default/volatiles/03_radvd
-
-    # Install systemd service files
-    install -d ${D}${systemd_unitdir}/system
-    install -m 0644 ${WORKDIR}/radvd.service ${D}${systemd_unitdir}/system
-    sed -i -e 's#@SYSCONFDIR@#${sysconfdir}#g' \
-           -e 's#@SBINDIR@#${sbindir}#g' \
-           -e 's#@BASE_BINDIR@#${base_bindir}#g' ${D}${systemd_unitdir}/system/radvd.service
-
-    # Install default environment file
-    install -m 0644 ${WORKDIR}/radvd.default ${D}${sysconfdir}/default/radvd
-
-    # Documentation
-    for i in radvd.conf.example README; do \
-        install -m 0644 ${S}/$i ${D}${docdir}/radvd; \
-    done
-
-    install -m 0644 ${WORKDIR}/radvd.conf ${D}${sysconfdir}/radvd.conf
-}
-
-USERADD_PACKAGES = "${PN}"
-USERADD_PARAM:${PN} = "--system --home ${localstatedir}/run/radvd/ -M -g nogroup radvd"
-
-pkg_postinst:${PN} () {
-    if [ -z "$D" -a -x /etc/init.d/populate-volatile.sh ]; then
-        /etc/init.d/populate-volatile.sh update
-    fi
-}
diff --git a/meta-openembedded/meta-networking/recipes-daemons/radvd/radvd_2.19.bb b/meta-openembedded/meta-networking/recipes-daemons/radvd/radvd_2.19.bb
index 553987e..f9f810a 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/radvd/radvd_2.19.bb
+++ b/meta-openembedded/meta-networking/recipes-daemons/radvd/radvd_2.19.bb
@@ -1,5 +1,70 @@
+SUMMARY = "IPv6 router advertisement daemon"
+DESCRIPTION = "radvd is the router advertisement daemon for IPv6. It \
+listens to router solicitations and sends router \
+advertisements as described in RFC 2461, Neighbor \
+Discovery for IP Version 6 (IPv6). With these \
+advertisements hosts can automatically configure their \
+addresses and some other parameters. They also can \
+choose a default router based on these advertisements."
+HOMEPAGE = "http://www.litech.org/radvd/"
+SECTION = "net"
+DEPENDS = "flex-native bison-native libdaemon "
 
-require radvd.inc
+# License is BSD-Style (with advertising clause) but also has an additional 0th clause
+LICENSE = "radvd"
+LIC_FILES_CHKSUM = "file://COPYRIGHT;md5=73ebbf7182ae996e65e8fadc9a8c45db"
 
-SRC_URI[md5sum] = "e9c425ac48ecb96ea5ea2912c78969f9"
+SRC_URI = "http://v6web.litech.org/radvd/dist/radvd-${PV}.tar.gz \
+           file://radvd.init \
+           file://radvd.service \
+           file://volatiles.03_radvd \
+           file://radvd.default \
+           file://radvd.conf \
+           file://0001-Reverts-the-include.h-change-in-46883f8a1a02fe42040d.patch \
+           "
 SRC_URI[sha256sum] = "c36470706fec3a9e6bed394ffea08acaff5dac647848d26b96bb9b9c65d58da0"
+
+inherit autotools useradd pkgconfig systemd
+
+SYSTEMD_SERVICE:${PN} = "radvd.service"
+SYSTEMD_AUTO_ENABLE = "disable"
+
+do_install:append () {
+    install -m 0755 -d ${D}${sysconfdir}/init.d \
+                       ${D}${sysconfdir}/default/volatiles \
+                       ${D}${docdir}/radvd
+    # Install init script and volatiles
+    install -m 0755 ${WORKDIR}/radvd.init ${D}${sysconfdir}/init.d/radvd
+    sed -i 's!/usr/sbin/!${sbindir}/!g' ${D}${sysconfdir}/init.d/radvd
+    sed -i 's!/etc/!${sysconfdir}/!g' ${D}${sysconfdir}/init.d/radvd
+    sed -i 's!/var/!${localstatedir}/!g' ${D}${sysconfdir}/init.d/radvd
+    sed -i 's!^PATH=.*!PATH=${base_sbindir}:${base_bindir}:${sbindir}:${bindir}!' ${D}${sysconfdir}/init.d/radvd
+
+    install -m 0644 ${WORKDIR}/volatiles.03_radvd ${D}${sysconfdir}/default/volatiles/03_radvd
+
+    # Install systemd service files
+    install -d ${D}${systemd_unitdir}/system
+    install -m 0644 ${WORKDIR}/radvd.service ${D}${systemd_unitdir}/system
+    sed -i -e 's#@SYSCONFDIR@#${sysconfdir}#g' \
+           -e 's#@SBINDIR@#${sbindir}#g' \
+           -e 's#@BASE_BINDIR@#${base_bindir}#g' ${D}${systemd_unitdir}/system/radvd.service
+
+    # Install default environment file
+    install -m 0644 ${WORKDIR}/radvd.default ${D}${sysconfdir}/default/radvd
+
+    # Documentation
+    for i in radvd.conf.example README; do \
+        install -m 0644 ${S}/$i ${D}${docdir}/radvd; \
+    done
+
+    install -m 0644 ${WORKDIR}/radvd.conf ${D}${sysconfdir}/radvd.conf
+}
+
+USERADD_PACKAGES = "${PN}"
+USERADD_PARAM:${PN} = "--system --home ${localstatedir}/run/radvd/ -M -g nogroup radvd"
+
+pkg_postinst:${PN} () {
+    if [ -z "$D" -a -x /etc/init.d/populate-volatile.sh ]; then
+        /etc/init.d/populate-volatile.sh update
+    fi
+}
diff --git a/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-Fix-build-on-Fedora-Rawhide-772.patch b/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-Fix-build-on-Fedora-Rawhide-772.patch
deleted file mode 100644
index ff51f53..0000000
--- a/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-Fix-build-on-Fedora-Rawhide-772.patch
+++ /dev/null
@@ -1,82 +0,0 @@
-From 1f8b5f0e1cc27634a7310be4c9674112f919d974 Mon Sep 17 00:00:00 2001
-From: uhliarik <luhliari@redhat.com>
-Date: Thu, 18 Feb 2021 01:08:40 +0000
-Subject: [PATCH] Fix build on Fedora Rawhide (#772)
-
-* add SYSTEMD_LIBS to all binaries using client_side.cc, fixing linking
-* add `<limits>` to all sources using std::numeric_limits, fixing gcc-11
-  builds
----
-Upstream-Status: Backport [https://github.com/kraj/squid/commit/1f8b5f0e1cc27634a7310be4c9674112f919d974]
- src/Makefile.am               | 4 ++++
- src/ip/QosConfig.cc           | 1 +
- src/ipc/mem/PageStack.cc      | 1 +
- src/ssl/helper.cc             | 2 ++
- 4 files changed, 8 insertions(+)
-
---- a/src/Makefile.am
-+++ b/src/Makefile.am
-@@ -2320,6 +2320,7 @@ tests_test_http_range_LDADD = \
- 	$(SSLLIB) \
- 	$(KRB5LIBS) \
- 	$(LIBCPPUNIT_LIBS) \
-+	$(SYSTEMD_LIBS) \
- 	$(COMPAT_LIB) \
- 	$(XTRA_LIBS)
- tests_test_http_range_LDFLAGS = $(LIBADD_DL)
-@@ -2624,6 +2625,7 @@ tests_testHttpRequest_LDADD = \
- 	$(SSLLIB) \
- 	$(KRB5LIBS) \
- 	$(LIBCPPUNIT_LIBS) \
-+	$(SYSTEMD_LIBS) \
- 	$(COMPAT_LIB) \
- 	$(XTRA_LIBS)
- tests_testHttpRequest_LDFLAGS = $(LIBADD_DL)
-@@ -3487,6 +3489,7 @@ tests_testURL_LDADD = \
- 	$(SSLLIB) \
- 	$(KRB5LIBS) \
- 	$(LIBCPPUNIT_LIBS) \
-+	$(SYSTEMD_LIBS) \
- 	$(COMPAT_LIB) \
- 	$(XTRA_LIBS)
- tests_testURL_LDFLAGS = $(LIBADD_DL)
-@@ -3646,6 +3649,7 @@ nodist_tests_testYesNoNone_SOURCES = \
- tests_testYesNoNone_LDADD= \
- 	base/libbase.la \
- 	$(LIBCPPUNIT_LIBS) \
-+	$(SYSTEMD_LIBS) \
- 	$(COMPAT_LIB) \
- 	$(XTRA_LIBS)
- tests_testYesNoNone_LDFLAGS = $(LIBADD_DL)
---- a/src/ip/QosConfig.cc
-+++ b/src/ip/QosConfig.cc
-@@ -21,6 +21,7 @@
- #include "Parsing.h"
- 
- #include <cerrno>
-+#include <limits>
- 
- CBDATA_CLASS_INIT(acl_tos);
- 
---- a/src/ipc/mem/PageStack.cc
-+++ b/src/ipc/mem/PageStack.cc
-@@ -14,6 +14,7 @@
- #include "Debug.h"
- #include "ipc/mem/Page.h"
- #include "ipc/mem/PageStack.h"
-+#include <limits>
- 
- /// used to mark a stack slot available for storing free page offsets
- const Ipc::Mem::PageStack::Value Writable = 0;
---- a/src/ssl/helper.cc
-+++ b/src/ssl/helper.cc
-@@ -19,6 +19,8 @@
- #include "ssl/helper.h"
- #include "wordlist.h"
- 
-+#include <limits>
-+
- Ssl::CertValidationHelper::LruCache *Ssl::CertValidationHelper::HelperCache = nullptr;
- 
- #if USE_SSL_CRTD
- 
diff --git a/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-splay.cc-fix-bind-is-not-a-member-of-std.patch b/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-splay.cc-fix-bind-is-not-a-member-of-std.patch
deleted file mode 100644
index fbbad15..0000000
--- a/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-splay.cc-fix-bind-is-not-a-member-of-std.patch
+++ /dev/null
@@ -1,31 +0,0 @@
-From 1def5b4278d97f197520d23c1dce52f93a1b2f46 Mon Sep 17 00:00:00 2001
-From: Andrej Valek <andrej.valek@siemens.com>
-Date: Tue, 9 Apr 2019 09:40:30 +0200
-Subject: [PATCH] splay.cc: fix bind is not a member of std
-
-fix
-| ../../squid-4.6/test-suite/splay.cc:134:28: error: 'bind' is not a member of 'std'
-|      auto nextRandom = std::bind (distribution, generator);
-|                             ^~~~
-| ../../squid-4.6/test-suite/splay.cc:134:28: note: 'std::bind' is defined in header '<functional>'; did you forget to '#include <functional>'?
-
-Signed-off-by: Andrej Valek <andrej.valek@siemens.com>
----
- test-suite/splay.cc | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/test-suite/splay.cc b/test-suite/splay.cc
-index f71b337..4e21adc 100644
---- a/test-suite/splay.cc
-+++ b/test-suite/splay.cc
-@@ -20,6 +20,7 @@
- #include <unistd.h>
- #endif
- #include <random>
-+#include <functional>
- 
- class intnode
- {
--- 
-2.11.0
-
diff --git a/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-tools.cc-fixed-unused-result-warning.patch b/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-tools.cc-fixed-unused-result-warning.patch
deleted file mode 100644
index f267875..0000000
--- a/meta-openembedded/meta-networking/recipes-daemons/squid/files/0001-tools.cc-fixed-unused-result-warning.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 86dae8010310d13bd2a2beb006b4085d06ae1556 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sun, 25 Jun 2017 00:59:24 -0700
-Subject: [PATCH] tools.cc: fixed unused-result warning
-
-fix
-| ../../squid-3.5.26/src/tools.cc: In function 'void enter_suid()':
-| ../../squid-3.5.26/src/tools.cc:616:11: error: ignoring return value of 'int setuid(__uid_t)', declared with attribute warn_unused_result [-Werror=unused-result]
-|      setuid(0);
-|      ~~~~~~^~~
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
-
----
- src/tools.cc | 5 +++--
- 1 file changed, 3 insertions(+), 2 deletions(-)
-
-diff --git a/src/tools.cc b/src/tools.cc
-index 5829574..19f0836 100644
---- a/src/tools.cc
-+++ b/src/tools.cc
-@@ -581,8 +581,10 @@ enter_suid(void)
-         debugs (21, 3, "enter_suid: setresuid failed: " << xstrerr(xerrno));
-     }
- #else
--
--    setuid(0);
-+    if (setuid(0) < 0) {
-+        const auto xerrno = errno;
-+        debugs(50, DBG_IMPORTANT, "WARNING: no_suid: setuid(0): " << xstrerr(xerrno));
-+    }
- #endif
- #if HAVE_PRCTL && defined(PR_SET_DUMPABLE)
-     /* Set Linux DUMPABLE flag */
diff --git a/meta-openembedded/meta-networking/recipes-daemons/squid/files/Fix-flawed-dynamic-ldb-link-test-in-configure.patch b/meta-openembedded/meta-networking/recipes-daemons/squid/files/Fix-flawed-dynamic-ldb-link-test-in-configure.patch
index 1516bb0..a429b7b 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/squid/files/Fix-flawed-dynamic-ldb-link-test-in-configure.patch
+++ b/meta-openembedded/meta-networking/recipes-daemons/squid/files/Fix-flawed-dynamic-ldb-link-test-in-configure.patch
@@ -19,7 +19,7 @@
 index d2f7feb..c7ae568 100644
 --- a/configure.ac
 +++ b/configure.ac
-@@ -3235,6 +3235,16 @@ case "$host" in
+@@ -3268,6 +3268,16 @@ case "$host" in
  		;;
  esac
  
diff --git a/meta-openembedded/meta-networking/recipes-daemons/squid/files/Skip-AC_RUN_IFELSE-tests.patch b/meta-openembedded/meta-networking/recipes-daemons/squid/files/Skip-AC_RUN_IFELSE-tests.patch
index dd83b62..1085333 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/squid/files/Skip-AC_RUN_IFELSE-tests.patch
+++ b/meta-openembedded/meta-networking/recipes-daemons/squid/files/Skip-AC_RUN_IFELSE-tests.patch
@@ -38,10 +38,10 @@
    ])
  ]) dnl SQUID_CHECK_KRB5_HEIMDAL_BROKEN_KRB5_H
 diff --git a/acinclude/lib-checks.m4 b/acinclude/lib-checks.m4
-index 7624b56..b449c5a 100644
+index 1e9333527c..2d42787029 100644
 --- a/acinclude/lib-checks.m4
 +++ b/acinclude/lib-checks.m4
-@@ -217,7 +217,9 @@ AC_DEFUN([SQUID_CHECK_OPENSSL_CONST_SSL_METHOD],[
+@@ -227,7 +227,9 @@ AC_DEFUN([SQUID_CHECK_OPENSSL_CONST_SSL_METHOD],[
    [
     AC_MSG_RESULT([no])
    ],
@@ -52,14 +52,15 @@
  
  SQUID_STATE_ROLLBACK(check_const_SSL_METHOD)
  ])
-@@ -377,7 +379,9 @@ AC_DEFUN([SQUID_CHECK_OPENSSL_TXTDB],[
+@@ -386,8 +386,9 @@ AC_DEFUN([SQUID_CHECK_OPENSSL_TXTDB],[
+   [
     AC_MSG_RESULT([yes])
     AC_DEFINE(SQUID_USE_SSLLHASH_HACK, 1)
-   ],
+-  ],
 -[])
-+[
-+   AC_MSG_RESULT([skipped - can't test in cross-compiled env])
-+])
++  ],[
++    AC_MSG_RESULT([skipped - can't test in cross-compiled env])
++  ])
  
  SQUID_STATE_ROLLBACK(check_TXTDB)
  ])
diff --git a/meta-openembedded/meta-networking/recipes-daemons/squid/files/squid-don-t-do-squid-conf-tests-at-build-time.patch b/meta-openembedded/meta-networking/recipes-daemons/squid/files/squid-don-t-do-squid-conf-tests-at-build-time.patch
index e5267ea..ea27285 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/squid/files/squid-don-t-do-squid-conf-tests-at-build-time.patch
+++ b/meta-openembedded/meta-networking/recipes-daemons/squid/files/squid-don-t-do-squid-conf-tests-at-build-time.patch
@@ -20,11 +20,11 @@
  1 file changed, 7 insertions(+), 8 deletions(-)
 
 diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am
-index 061a463..350dfb2 100644
+index 0233c0e..e0021b6 100644
 --- a/test-suite/Makefile.am
 +++ b/test-suite/Makefile.am
-@@ -41,8 +41,7 @@ TESTS += debug \
- 	MemPoolTest\
+@@ -40,8 +40,7 @@ TESTS += debug \
+ 	splay\
  	mem_node_test\
  	mem_hdr_test\
 -	$(ESI_TESTS) \
@@ -33,7 +33,7 @@
  
  ## Sort by alpha - any build failures are significant.
  check_PROGRAMS += debug \
-@@ -125,19 +124,19 @@ VirtualDeleteOperator_SOURCES = VirtualDeleteOperator.cc $(DEBUG_SOURCE)
+@@ -159,19 +158,19 @@ VirtualDeleteOperator_SOURCES = \
  ##$(TARGLIB): $(LIBOBJS)
  ##	$(AR_R) $(TARGLIB) $(LIBOBJS)
  
diff --git a/meta-openembedded/meta-networking/recipes-daemons/squid/squid_4.15.bb b/meta-openembedded/meta-networking/recipes-daemons/squid/squid_5.7.bb
similarity index 86%
rename from meta-openembedded/meta-networking/recipes-daemons/squid/squid_4.15.bb
rename to meta-openembedded/meta-networking/recipes-daemons/squid/squid_5.7.bb
index a1122a3..a208a2a 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/squid/squid_4.15.bb
+++ b/meta-openembedded/meta-networking/recipes-daemons/squid/squid_5.7.bb
@@ -22,17 +22,14 @@
            file://set_sysroot_patch.patch \
            file://squid-don-t-do-squid-conf-tests-at-build-time.patch \
            file://0001-configure-Check-for-Wno-error-format-truncation-comp.patch \
-           file://0001-tools.cc-fixed-unused-result-warning.patch \
-           file://0001-splay.cc-fix-bind-is-not-a-member-of-std.patch \
-           file://0001-Fix-build-on-Fedora-Rawhide-772.patch \
            "
 
 SRC_URI:remove:toolchain-clang = "file://0001-configure-Check-for-Wno-error-format-truncation-comp.patch"
 
-SRC_URI[sha256sum] = "71635811e766ce8b155225a9e3c7757cfc7ff93df26b28d82e5e6fc021b9a605"
+SRC_URI[sha256sum] = "4c17e1eb324c4b7aa3c6889eba66eeca7ed98625d44076f7db7b027b2b093bd5"
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263 \
-                    file://errors/COPYRIGHT;md5=0e03cd976052c45697ad5d96e7dff8dc \
+                    file://errors/COPYRIGHT;md5=0a7deb73d8fb7a9849af7145987829a4 \
                     "
 DEPENDS = "libtool krb5 openldap db cyrus-sasl"
 
@@ -80,12 +77,21 @@
     cp -rf ${B}/${TESTDIR} ${D}${PTEST_PATH}
     cp -rf ${S}/${TESTDIR} ${D}${PTEST_PATH}
 
+    # Needed to generate file squid.conf.default
+    oe_runmake DESTDIR=${D}${PTEST_PATH} -C src install-data-local
+    install -d ${D}${sysconfdir}/squid
+    install -m 0644 ${D}${PTEST_PATH}/${sysconfdir}/squid/squid.conf.default ${D}${sysconfdir}/squid
+
+    # Don't need these directories
+    rm -rf ${D}${PTEST_PATH}/${sysconfdir}
+    rm -rf ${D}${PTEST_PATH}/usr
+    rm -rf ${D}${PTEST_PATH}/var
+
     # do NOT need to rebuild Makefile itself
     sed -i 's/^Makefile:.*$/Makefile:/' ${D}${PTEST_PATH}/${TESTDIR}/Makefile
 
     # Add squid-conf-tests for runtime tests
     sed -e 's/^\(runtest-TESTS:\)/\1 squid-conf-tests/' \
-        -e "s/\(list=' \$(TESTS)\)/\1 squid-conf-tests/" \
         -i ${D}${PTEST_PATH}/${TESTDIR}/Makefile
 
     # Ensure the path for command true is correct
@@ -112,6 +118,7 @@
 FILES:${PN} += "${libdir} ${datadir}/errors ${datadir}/icons"
 FILES:${PN}-dbg += "/usr/src/debug"
 FILES:${PN}-doc += "${datadir}/*.txt"
+FILES:${PN}-ptest += "${sysconfdir}/squid/squid.conf.default"
 
 RDEPENDS:${PN} += "perl"
-RDEPENDS:${PN}-ptest += "make"
+RDEPENDS:${PN}-ptest += "perl make"
diff --git a/meta-openembedded/meta-networking/recipes-daemons/tftp-hpa/files/0001-tftp-Remove-double-inclusion-of-signal.h.patch b/meta-openembedded/meta-networking/recipes-daemons/tftp-hpa/files/0001-tftp-Remove-double-inclusion-of-signal.h.patch
new file mode 100644
index 0000000..84658d6
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-daemons/tftp-hpa/files/0001-tftp-Remove-double-inclusion-of-signal.h.patch
@@ -0,0 +1,39 @@
+From e9ed48d91642d384ce07cbb9bec788f07fc6354c Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 6 Sep 2022 14:51:42 -0700
+Subject: [PATCH] tftp: Remove double inclusion of signal.h
+
+Undefine __USE_XOPEN2K8 so we can get bsd_signal definition from system
+headers
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ config.h | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/config.h b/config.h
+index 3ff2915..ae1a14b 100644
+--- a/config.h
++++ b/config.h
+@@ -93,7 +93,6 @@
+ #endif
+ 
+ #include <errno.h>
+-#include <signal.h>
+ 
+ #ifdef HAVE_SYS_SOCKET_H
+ #include <sys/socket.h>
+@@ -296,6 +295,9 @@ char *xstrdup(const char *);
+ 
+ #ifndef HAVE_BSD_SIGNAL
+ void (*bsd_signal(int, void (*)(int))) (int);
++#else
++#undef __USE_XOPEN2K8
++#include <signal.h>
+ #endif
+ #ifndef HAVE_DUP2
+ int dup2(int, int);
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-networking/recipes-daemons/tftp-hpa/tftp-hpa_5.2.bb b/meta-openembedded/meta-networking/recipes-daemons/tftp-hpa/tftp-hpa_5.2.bb
index af42bda..565f493 100644
--- a/meta-openembedded/meta-networking/recipes-daemons/tftp-hpa/tftp-hpa_5.2.bb
+++ b/meta-openembedded/meta-networking/recipes-daemons/tftp-hpa/tftp-hpa_5.2.bb
@@ -29,6 +29,7 @@
            file://fix-writing-emtpy-file.patch \
            file://0001-__progname-is-provided-by-libc.patch \
            file://0001-tftp-Mark-toplevel-definition-as-external.patch \
+           file://0001-tftp-Remove-double-inclusion-of-signal.h.patch \
            file://tftpd-hpa.socket \
            file://tftpd-hpa.service \
 "
diff --git a/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap/0001-coap_session.c-Balance-SESSIONS_ADD-and-SESSIONS_DEL.patch b/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap/0001-coap_session.c-Balance-SESSIONS_ADD-and-SESSIONS_DEL.patch
deleted file mode 100644
index c8ac848..0000000
--- a/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap/0001-coap_session.c-Balance-SESSIONS_ADD-and-SESSIONS_DEL.patch
+++ /dev/null
@@ -1,51 +0,0 @@
-From c56a64ff4df7eecb6c88ff929497bcd0d65934f2 Mon Sep 17 00:00:00 2001
-From: Jon Shallow <supjps-libcoap@jpshallow.com>
-Date: Fri, 22 Apr 2022 13:15:39 +0100
-Subject: [PATCH] coap_session.c: Balance SESSIONS_ADD and SESSIONS_DELETE
- usage
-
-Upstream-Status: Backport [https://github.com/obgm/libcoap/commit/7e20aa9ef17277f39203334404e6c776b1171a7d]
-Signed-off-by: Alex Kiernan <alex.kiernan@gmail.com>
-Signed-off-by: Alex Kiernan <alexk@zuma.ai>
----
- src/coap_session.c | 12 ++++++++++++
- 1 file changed, 12 insertions(+)
-
-diff --git a/src/coap_session.c b/src/coap_session.c
-index 77cae598f0af..536e57573361 100644
---- a/src/coap_session.c
-+++ b/src/coap_session.c
-@@ -813,6 +813,12 @@ coap_session_create_client(
-   return session;
- 
- error:
-+  /*
-+   * Need to add in the session as coap_session_release()
-+   * will call SESSIONS_DELETE in coap_session_free().
-+   */
-+  if (session)
-+    SESSIONS_ADD(ctx->sessions, session);
-   coap_session_release(session);
-   return NULL;
- }
-@@ -1133,11 +1139,17 @@ coap_session_t *coap_new_server_session(
-   if (session) {
-     coap_log(LOG_DEBUG, "***%s: new incoming session\n",
-              coap_session_str(session));
-+    /* Returned session may already have been released and is now NULL */
-     session = coap_session_accept(session);
-   }
-   return session;
- 
- error:
-+  /*
-+   * Need to add in the session as coap_session_release()
-+   * will call SESSIONS_DELETE in coap_session_free().
-+   */
-+  SESSIONS_ADD(ep->sessions, session);
-   coap_session_free(session);
-   return NULL;
- }
--- 
-2.35.1
-
diff --git a/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap/0001-libcoap-Fix-gnu-configize-error.patch b/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap/0001-libcoap-Fix-gnu-configize-error.patch
deleted file mode 100644
index 64f8d3a..0000000
--- a/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap/0001-libcoap-Fix-gnu-configize-error.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 25f1bebf1bc4c8da47f976d24a7a424253744e2e Mon Sep 17 00:00:00 2001
-From: Alex Kiernan <alex.kiernan@gmail.com>
-Date: Wed, 24 Mar 2021 09:10:17 +0000
-Subject: [PATCH] libcoap: Fix gnu-configize error
-
-Fix:
-
-  autoreconf: running: gnu-configize
-  gnu-configize: 'configure.ac' or 'configure.in' is required
-  autoreconf: gnu-configize failed with exit status: 1
-
-We're not pulling in the ext/tinydtls submodule, so this fails.
-
-Upstream-Status: Inappropriate [oe-specific]
-Signed-off-by: Alex Kiernan <alex.kiernan@gmail.com>
-Signed-off-by: Alex Kiernan <alexk@zuma.ai>
----
- configure.ac | 13 -------------
- 1 file changed, 13 deletions(-)
-
-diff --git a/configure.ac b/configure.ac
-index 9f51f4c67557..559808e03aa2 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -472,19 +472,6 @@ if test "x$build_dtls" = "xyes"; then
-         have_tinydtls="no" # don't confuse AC_MSG_RESULT at the end of the script
-     fi
- 
--    # The user wants to use explicit TinyDTLS if '--with-tinydtls was set'.
--    if test "x$with_tinydtls" = "xyes" ; then
--        if test -d "$srcdir/ext/tinydtls"; then
--           AC_CONFIG_SUBDIRS([ext/tinydtls])
--           have_tinydtls="yes"
--         else
--           have_tinydtls="no" # don't confuse AC_MSG_RESULT at the end of the script
--         fi
--         have_gnutls="no" # don't confuse AC_MSG_RESULT at the end of the script
--         have_openssl="no" # don't confuse AC_MSG_RESULT at the end of the script
--         have_mbedtls="no" # don't confuse AC_MSG_RESULT at the end of the script
--    fi
--
-     if test "$TLSCOUNT" -eq 0; then
-       # The user hasn't requested the use of a specific cryptography library
-       # we try first GnuTLS for usability ...
diff --git a/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap_4.3.0.bb b/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap_4.3.1.bb
similarity index 83%
rename from meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap_4.3.0.bb
rename to meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap_4.3.1.bb
index aba81d1..0fc3425 100644
--- a/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap_4.3.0.bb
+++ b/meta-openembedded/meta-networking/recipes-devtools/libcoap/libcoap_4.3.1.bb
@@ -5,14 +5,12 @@
 HOMEPAGE ="https://libcoap.net/"
 
 LICENSE = "BSD-2-Clause & BSD-1-Clause"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=e44b3af4925ec58e9f49b9ff143b5493"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=0fbe4435d52b2d27a16f980ffc8ffc80"
 
 SRC_URI = "git://github.com/obgm/libcoap.git;branch=main;protocol=https \
-           file://0001-libcoap-Fix-gnu-configize-error.patch \
-           file://0001-coap_session.c-Balance-SESSIONS_ADD-and-SESSIONS_DEL.patch \
            file://run-ptest \
            "
-SRCREV = "1da37b9abbe871675d5939395b498324ccc8ecfe"
+SRCREV = "02b76470ab9168947152c78ad50835bf043d7c84"
 
 S = "${WORKDIR}/git"
 
@@ -23,17 +21,17 @@
     ${@bb.utils.contains('PTEST_ENABLED', '1', 'tests', '', d)} \
 "
 PACKAGECONFIG[async] = "--enable-async,--disable-async"
-PACKAGECONFIG[gnutls] = "--with-gnutls,--without-gnutls,gnutls,,openssl mbedtls"
+PACKAGECONFIG[gnutls] = "--with-gnutls,--without-gnutls,gnutls,,,openssl mbedtls"
 PACKAGECONFIG[manpages] = "--enable-documentation --enable-doxygen --enable-manpages,--disable-documentation,asciidoc-native doxygen-native graphviz-native"
-PACKAGECONFIG[mbedtls] = "--with-mbedtls,--without-mbedtls,mbedtls,,gnutls openssl"
-PACKAGECONFIG[openssl] = "--with-openssl,--without-openssl,openssl,,gnutls mbedtls"
+PACKAGECONFIG[mbedtls] = "--with-mbedtls,--without-mbedtls,mbedtls,,,gnutls openssl"
+PACKAGECONFIG[openssl] = "--with-openssl,--without-openssl,openssl,,,gnutls mbedtls"
 PACKAGECONFIG[small-stack] = "--enable-small-stack,--disable-small-stack"
 PACKAGECONFIG[tcp] = "--enable-tcp,--disable-tcp"
 PACKAGECONFIG[tests] = "--enable-tests,--disable-tests,cunit"
 
 EXTRA_OECONF = "\
     --with-epoll --enable-add-default-names \
-    --without-tinydtls \
+    --without-tinydtls --without-submodule-tinydtls \
     ${@bb.utils.contains_any('PACKAGECONFIG', 'gnutls openssl mbedtls', '--enable-dtls', '--disable-dtls', d)} \
 "
 
diff --git a/meta-openembedded/meta-networking/recipes-extended/corosync/corosync_3.0.3.bb b/meta-openembedded/meta-networking/recipes-extended/corosync/corosync_3.1.6.bb
similarity index 96%
rename from meta-openembedded/meta-networking/recipes-extended/corosync/corosync_3.0.3.bb
rename to meta-openembedded/meta-networking/recipes-extended/corosync/corosync_3.1.6.bb
index ec2a785..8fca576 100644
--- a/meta-openembedded/meta-networking/recipes-extended/corosync/corosync_3.0.3.bb
+++ b/meta-openembedded/meta-networking/recipes-extended/corosync/corosync_3.1.6.bb
@@ -10,7 +10,7 @@
 SRC_URI = "https://github.com/${BPN}/${BPN}/releases/download/v${PV}/${BP}.tar.gz \
            file://corosync.conf \
           "
-SRC_URI[sha256sum] = "20eb903eb984f6a728282c199825e442e8bba869acefd22390076ef3a33a4ded"
+SRC_URI[sha256sum] = "ca6ed32b4d7f33ed614afce8760fe58d0de92c68b575d4969ebacd892f3d1e27"
 UPSTREAM_CHECK_REGEX = "(?P<pver>\d+\.(?!99)\d+(\.\d+)+)"
 
 LICENSE = "BSD-3-Clause"
diff --git a/meta-openembedded/meta-networking/recipes-extended/dlm/dlm/0001-dlm-fix-compile-error-since-xml2-config-should-not-b.patch b/meta-openembedded/meta-networking/recipes-extended/dlm/dlm/0001-dlm-fix-compile-error-since-xml2-config-should-not-b.patch
index f56359a..3d15515 100644
--- a/meta-openembedded/meta-networking/recipes-extended/dlm/dlm/0001-dlm-fix-compile-error-since-xml2-config-should-not-b.patch
+++ b/meta-openembedded/meta-networking/recipes-extended/dlm/dlm/0001-dlm-fix-compile-error-since-xml2-config-should-not-b.patch
@@ -1,4 +1,4 @@
-From ad207a6f83dac390b2d95e3a8262d27292921863 Mon Sep 17 00:00:00 2001
+From da08f5ec5e553bd43f92a0b0f7476179b0b74502 Mon Sep 17 00:00:00 2001
 From: Changqing Li <changqing.li@windriver.com>
 Date: Wed, 26 Jun 2019 11:49:33 +0800
 Subject: [PATCH] dlm: fix compile error since xml2-config should not be used
@@ -9,28 +9,27 @@
 Upstream-Status: Inappropriate [oe-specific]
 
 Signed-off-by: Changqing Li <changqing.li@windriver.com>
+
 ---
  fence/Makefile | 4 ++--
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/fence/Makefile b/fence/Makefile
-index b927879..6e16078 100644
+index 2b080468..ff2eda3f 100644
 --- a/fence/Makefile
 +++ b/fence/Makefile
-@@ -18,11 +18,11 @@ CFLAGS += -D_GNU_SOURCE -O2 -ggdb \
+@@ -18,12 +18,12 @@ CFLAGS += -D_GNU_SOURCE -O2 -ggdb \
  	-fstack-clash-protection -Wl,-z,now
  
  CFLAGS += -fPIE -DPIE
 -CFLAGS += `xml2-config --cflags`
 +CFLAGS += `pkg-config libxml-2.0 --cflags`
  CFLAGS += -I../include
+ CFLAGS += $(shell pkg-config --cflags pacemaker-fencing)
  
- LDFLAGS += -Wl,-z,relro -pie
+ LDFLAGS += -Wl,-z,relro -Wl,-z,defs -pie
 -LDFLAGS += `xml2-config --libs`
 +LDFLAGS += `pkg-config libxml-2.0 --libs`
  LDFLAGS += -ldl
  
  all: $(BIN_TARGET)
--- 
-2.7.4
-
diff --git a/meta-openembedded/meta-networking/recipes-extended/dlm/dlm/0004-include-string.h-for-memset-prototype.patch b/meta-openembedded/meta-networking/recipes-extended/dlm/dlm/0004-include-string.h-for-memset-prototype.patch
new file mode 100644
index 0000000..257c5d0
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-extended/dlm/dlm/0004-include-string.h-for-memset-prototype.patch
@@ -0,0 +1,23 @@
+From 9652e6b3c43b4c051f2ff0e000d7ebf5fbab418e Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 10:54:51 -0700
+Subject: [PATCH] include string.h for memset prototype
+
+Upstream-Status: Submitted [https://pagure.io/dlm/pull-request/3]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ dlm_controld/lib.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/dlm_controld/lib.c b/dlm_controld/lib.c
+index 8cbdd27f..a7502fcd 100644
+--- a/dlm_controld/lib.c
++++ b/dlm_controld/lib.c
+@@ -10,6 +10,7 @@
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <stdint.h>
++#include <string.h>
+ #include <errno.h>
+ #include <time.h>
+ #include <sys/types.h>
diff --git a/meta-openembedded/meta-networking/recipes-extended/dlm/dlm_4.0.9.bb b/meta-openembedded/meta-networking/recipes-extended/dlm/dlm_4.1.1.bb
similarity index 91%
rename from meta-openembedded/meta-networking/recipes-extended/dlm/dlm_4.0.9.bb
rename to meta-openembedded/meta-networking/recipes-extended/dlm/dlm_4.1.1.bb
index 3e699d2..bb33890 100644
--- a/meta-openembedded/meta-networking/recipes-extended/dlm/dlm_4.0.9.bb
+++ b/meta-openembedded/meta-networking/recipes-extended/dlm/dlm_4.1.1.bb
@@ -9,10 +9,10 @@
            file://0001-dlm-fix-compile-error-since-xml2-config-should-not-b.patch \
            file://0001-Include-sys-sysmacros.h-for-major-minor-macros-in-gl.patch \
            file://0001-make-Replace-cp-a-with-mode-preserving-options.patch \
+           file://0004-include-string.h-for-memset-prototype.patch \
            "
 
-SRC_URI[md5sum] = "4c57a941a15547859cd38fd55f66388e"
-SRC_URI[sha256sum] = "d59142e067cbd603aaf66151a04e9fa34330219680b8827c953d20821b951991"
+SRC_URI[sha256sum] = "f12c0056b9196dfcecbec2fa8930feb87c605a86ef0f3d7bd6fb0b77cd7f45ca"
 
 UPSTREAM_CHECK_URI = "https://pagure.io/dlm/releases"
 UPSTREAM_CHECK_REGEX = "dlm-(?P<pver>\d+(\.\d+)+)"
diff --git a/meta-openembedded/meta-networking/recipes-netkit/netkit-telnet/files/0001-utility-Include-time.h-form-time-and-strftime-protot.patch b/meta-openembedded/meta-networking/recipes-netkit/netkit-telnet/files/0001-utility-Include-time.h-form-time-and-strftime-protot.patch
new file mode 100644
index 0000000..a5e634c
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-netkit/netkit-telnet/files/0001-utility-Include-time.h-form-time-and-strftime-protot.patch
@@ -0,0 +1,27 @@
+From 23b068e695881be0e8205ecccadf775fc3d5889d Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 6 Sep 2022 21:25:50 -0700
+Subject: [PATCH] utility: Include time.h form time() and strftime() prototypes
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ telnetd/utility.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/telnetd/utility.c b/telnetd/utility.c
+index 4811f14..b539777 100644
+--- a/telnetd/utility.c
++++ b/telnetd/utility.c
+@@ -40,6 +40,7 @@ char util_rcsid[] =
+ #define PRINTOPTIONS
+ 
+ #include <stdarg.h>
++#include <time.h> /* for time() anf strftime() */
+ #include <sys/utsname.h>
+ 
+ #ifdef AUTHENTICATE
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-networking/recipes-netkit/netkit-telnet/netkit-telnet_0.17.bb b/meta-openembedded/meta-networking/recipes-netkit/netkit-telnet/netkit-telnet_0.17.bb
index 56860ea..6d60c6f 100644
--- a/meta-openembedded/meta-networking/recipes-netkit/netkit-telnet/netkit-telnet_0.17.bb
+++ b/meta-openembedded/meta-networking/recipes-netkit/netkit-telnet/netkit-telnet_0.17.bb
@@ -14,6 +14,7 @@
            file://0001-telnet-telnetd-Fix-deadlock-on-cleanup.patch \
            file://CVE-2020-10188.patch \
            file://0001-telnetd-utility.c-Fix-buffer-overflow-in-netoprintf.patch \
+           file://0001-utility-Include-time.h-form-time-and-strftime-protot.patch \
            "
 
 UPSTREAM_CHECK_URI = "${DEBIAN_MIRROR}/main/n/netkit-telnet/"
diff --git a/meta-openembedded/meta-networking/recipes-protocols/dante/dante_1.4.1.bb b/meta-openembedded/meta-networking/recipes-protocols/dante/dante_1.4.3.bb
similarity index 82%
rename from meta-openembedded/meta-networking/recipes-protocols/dante/dante_1.4.1.bb
rename to meta-openembedded/meta-networking/recipes-protocols/dante/dante_1.4.3.bb
index 48f9708..746c08b 100644
--- a/meta-openembedded/meta-networking/recipes-protocols/dante/dante_1.4.1.bb
+++ b/meta-openembedded/meta-networking/recipes-protocols/dante/dante_1.4.3.bb
@@ -9,12 +9,11 @@
 HOMEPAGE = "http://www.inet.no/dante/"
 
 LICENSE = "BSD-3-Clause"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=221118dda731fe93a85d0ed973467249"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=edd508404db7339042dfc861a3a690ad"
 
 SRC_URI = "https://www.inet.no/dante/files/dante-${PV}.tar.gz \
           "
-SRC_URI[md5sum] = "68c2ce12119e12cea11a90c7a80efa8f"
-SRC_URI[sha256sum] = "b6d232bd6fefc87d14bf97e447e4fcdeef4b28b16b048d804b50b48f261c4f53"
+SRC_URI[sha256sum] = "418a065fe1a4b8ace8fbf77c2da269a98f376e7115902e76cda7e741e4846a5d"
 
 # without --without-gssapi, config.log will contain reference to /usr/lib
 # as a consequence of GSSAPI path being set to /usr by default.
@@ -23,16 +22,17 @@
 # --enable-release        build prerelease as full release
 EXTRA_OECONF += "--without-gssapi --sbindir=${bindir}"
 
-DEPENDS += "flex-native bison-native libpam"
-
+DEPENDS += "flex-native bison-native libpam libtirpc"
 inherit autotools-brokensep features_check
 
+CFLAGS += "-I${STAGING_INCDIR}/tirpc"
+LIBS += "-ltirpc"
+
 REQUIRED_DISTRO_FEATURES = "pam"
 
 EXTRA_AUTORECONF = "-I ${S}"
 
 PACKAGECONFIG[libwrap] = ",--disable-libwrap,tcp-wrappers,libwrap"
-PACKAGECONFIG[krb5] = ",--without-krb5,krb5"
 
 PACKAGECONFIG ??= ""
 
diff --git a/meta-openembedded/meta-networking/recipes-protocols/frr/frr/0001-configure-Check-for-readline-function-instead-of-mai.patch b/meta-openembedded/meta-networking/recipes-protocols/frr/frr/0001-configure-Check-for-readline-function-instead-of-mai.patch
new file mode 100644
index 0000000..4b218a6
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-protocols/frr/frr/0001-configure-Check-for-readline-function-instead-of-mai.patch
@@ -0,0 +1,30 @@
+From 9399d58c13257849179d3c2b3698a2b43bc1b2a0 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 10:39:23 -0700
+Subject: [PATCH] configure: Check for readline() function instead of main
+
+main is not a function found in libreadline, its better to check for a
+function thats provided by it.
+
+Upstream-Status: Submitted [https://github.com/FRRouting/frr/pull/11893]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index b7e17d356..8c1fab0ea 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -1372,7 +1372,7 @@ case "${enable_vtysh}" in
+   AC_DEFINE([VTYSH], [1], [VTY shell])
+ 
+   prev_libs="$LIBS"
+-  AC_CHECK_LIB([readline], [main], [
++  AC_CHECK_LIB([readline], [readline], [
+     LIBREADLINE="-lreadline"
+   ], [
+     dnl readline failed - it might be incorrectly linked and missing its
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-networking/recipes-protocols/frr/frr_8.2.2.bb b/meta-openembedded/meta-networking/recipes-protocols/frr/frr_8.2.2.bb
index f0d0dbf..607ed8b 100644
--- a/meta-openembedded/meta-networking/recipes-protocols/frr/frr_8.2.2.bb
+++ b/meta-openembedded/meta-networking/recipes-protocols/frr/frr_8.2.2.bb
@@ -10,6 +10,7 @@
                     file://COPYING-LGPLv2.1;md5=4fbd65380cdd255951079008b364516c"
 
 SRC_URI = "git://github.com/FRRouting/frr.git;protocol=https;branch=stable/8.2 \
+           file://0001-configure-Check-for-readline-function-instead-of-mai.patch \
            file://frr.pam \
 	      "
 
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openflow/openflow/0001-socket-util-Include-sys-stat.h-for-fchmod.patch b/meta-openembedded/meta-networking/recipes-protocols/openflow/openflow/0001-socket-util-Include-sys-stat.h-for-fchmod.patch
new file mode 100644
index 0000000..97e3422
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-protocols/openflow/openflow/0001-socket-util-Include-sys-stat.h-for-fchmod.patch
@@ -0,0 +1,23 @@
+From b875c6e264eaf7350ad4e4ebf427692d8fd3cd72 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 12:58:53 -0700
+Subject: [PATCH] socket-util: Include sys/stat.h for fchmod
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ lib/socket-util.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/lib/socket-util.c b/lib/socket-util.c
+index c7b5d6d..5b3d602 100644
+--- a/lib/socket-util.c
++++ b/lib/socket-util.c
+@@ -42,6 +42,7 @@
+ #include <stdio.h>
+ #include <string.h>
+ #include <sys/resource.h>
++#include <sys/stat.h>
+ #include <sys/un.h>
+ #include <unistd.h>
+ #include "fatal-signal.h"
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openflow/openflow_git.bb b/meta-openembedded/meta-networking/recipes-protocols/openflow/openflow_git.bb
index b6cef07..1ac5dd4 100644
--- a/meta-openembedded/meta-networking/recipes-protocols/openflow/openflow_git.bb
+++ b/meta-openembedded/meta-networking/recipes-protocols/openflow/openflow_git.bb
@@ -3,8 +3,8 @@
 SRCREV = "c84f33f09d5dbcfc9b489f64cb30475bf36f653a"
 PV = "1.0+git${SRCPV}"
 
-SRC_URI += "\
-           file://0001-Check-and-use-strlcpy-from-libc-before-defining-own.patch \
+SRC_URI += "file://0001-Check-and-use-strlcpy-from-libc-before-defining-own.patch \
            file://0002-lib-netdev-Adjust-header-include-sequence.patch \
            file://0001-generate-not-static-get_dh-functions.patch \
+           file://0001-socket-util-Include-sys-stat.h-for-fchmod.patch \
            "
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp/0001-l2tp_rpc_server.c-Add-missing-prototype-for-l2tp_api.patch b/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp/0001-l2tp_rpc_server.c-Add-missing-prototype-for-l2tp_api.patch
new file mode 100644
index 0000000..d9aed88
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp/0001-l2tp_rpc_server.c-Add-missing-prototype-for-l2tp_api.patch
@@ -0,0 +1,28 @@
+From ded84ed583e9b0617bc35ab1798032d18b873144 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 23:30:02 -0700
+Subject: [PATCH] l2tp_rpc_server.c: Add missing prototype for
+ l2tp_api_rpc_check_request
+
+Upstream-Status: Inappropriate [no upstream]
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ Makefile | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/Makefile b/Makefile
+index 0815b31..2fa5b2f 100644
+--- a/Makefile
++++ b/Makefile
+@@ -236,6 +236,7 @@ endif
+ 			-$(RM) $@ $@.tmp
+ 			$(RPCGEN) $(RPCGENFLAGS) -m -o $@.tmp $<
+ 			cat $@.tmp | sed -e 's/switch (rqstp->rq_proc) {/if (l2tp_api_rpc_check_request(transp) < 0) return; switch (rqstp->rq_proc) {/' > $@
++			sed -i '21i int l2tp_api_rpc_check_request(SVCXPRT *xprt);' $@
+ 
+ %_client.c:		%.x
+ 			-$(RM) $@
+-- 
+2.37.2
+
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp/0001-lex-yacc-Add-missing-function-prototypes.patch b/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp/0001-lex-yacc-Add-missing-function-prototypes.patch
new file mode 100644
index 0000000..8c21a74
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp/0001-lex-yacc-Add-missing-function-prototypes.patch
@@ -0,0 +1,45 @@
+From 2bfdd02d288de92ff118bf41b54c135a6a318c19 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 23:42:11 -0700
+Subject: [PATCH] lex/yacc: Add missing function prototypes
+
+Fixes build with clang15
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ l2tp_config_parse.y | 3 +++
+ l2tp_config_token.l | 3 +++
+ 2 files changed, 6 insertions(+)
+
+diff --git a/l2tp_config_parse.y b/l2tp_config_parse.y
+index 4baf1e0..15392d0 100644
+--- a/l2tp_config_parse.y
++++ b/l2tp_config_parse.y
+@@ -29,6 +29,9 @@ static struct l2tp_api_session_msg_data session;
+ 
+ extern void l2tp_log(int level, char *fmt, ...);
+ extern void yyfatal(const char *s);
++extern const char *l2tp_strerror(int error);
++extern int yylex (void);
++extern void yyerror(const char *s);
+ 
+ %}
+ 
+diff --git a/l2tp_config_token.l b/l2tp_config_token.l
+index 9016af6..43b8f0a 100644
+--- a/l2tp_config_token.l
++++ b/l2tp_config_token.l
+@@ -15,6 +15,9 @@
+ #include "l2tp_config_types.h"
+ #include "l2tp_config_parse.h"
+ 
++extern void l2tp_log(int level, char *fmt, ...);
++extern const char *l2tp_strerror(int error);
++
+ void yyfatal(const char *s);
+ void yyerror(const char *s);
+ 
+-- 
+2.37.2
+
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp_1.8.bb b/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp_1.8.bb
index 15cebf4..183c006 100644
--- a/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp_1.8.bb
+++ b/meta-openembedded/meta-networking/recipes-protocols/openl2tp/openl2tp_1.8.bb
@@ -35,6 +35,8 @@
            file://run-ptest \
            file://fix_linux_4.15_compile.patch \
            file://0002-user-ipv6-structures.patch \
+           file://0001-l2tp_rpc_server.c-Add-missing-prototype-for-l2tp_api.patch \
+           file://0001-lex-yacc-Add-missing-function-prototypes.patch \
            "
 SRC_URI[md5sum] = "e3d08dedfb9e6a9a1e24f6766f6dadd0"
 SRC_URI[sha256sum] = "1c97704d4b963a87fbc0e741668d4530933991515ae9ab0dffd11b5444f4860f"
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0001-cmds-fix-enum-conversion.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0001-cmds-fix-enum-conversion.patch
deleted file mode 100644
index 680f6b6..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0001-cmds-fix-enum-conversion.patch
+++ /dev/null
@@ -1,91 +0,0 @@
-From 090854e09fe18ebf1ba428864895a690086f78ee Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Mon, 19 Aug 2019 11:41:13 -0400
-Subject: [PATCH 1/9] cmds: fix enum conversion
-
-Multiple modules use implicit conversion between enum types, but
-this triggers warnings with some compilers.  ex:
-
-  qbg/vdp_cmds.c:110:39: error: implicit conversion from enumeration type
-      'lldp_cmd' to different enumeration type 'cmd_status'
-      [-Werror,-Wenum-conversion]
-        cmd_status good_cmd = vdp_cmdok(cmd, cmd_gettlv);
-
-Reported-at: https://github.com/intel/openlldp/issues/53
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- lldp_evb22_cmds.c | 2 +-
- lldp_evb_cmds.c   | 2 +-
- qbg/vdp22_cmds.c  | 2 +-
- qbg/vdp_cmds.c    | 2 +-
- vdptool.c         | 2 +-
- 5 files changed, 5 insertions(+), 5 deletions(-)
-
-diff --git a/lldp_evb22_cmds.c b/lldp_evb22_cmds.c
-index cebfeb2..51810bc 100644
---- a/lldp_evb22_cmds.c
-+++ b/lldp_evb22_cmds.c
-@@ -225,7 +225,7 @@ int evb22_conf_enabletx(char *ifname, enum agent_type type)
- 				TLVID(OUI_IEEE_8021Qbg22, LLDP_EVB22_SUBTYPE));
- }
- 
--static int evb22_cmdok(struct cmd *cmd, cmd_status expected)
-+static int evb22_cmdok(struct cmd *cmd, int expected)
- {
- 	if (cmd->cmd != expected)
- 		return cmd_invalid;
-diff --git a/lldp_evb_cmds.c b/lldp_evb_cmds.c
-index eec4f33..e6af03b 100644
---- a/lldp_evb_cmds.c
-+++ b/lldp_evb_cmds.c
-@@ -163,7 +163,7 @@ int evb_conf_enabletx(char *ifname, enum agent_type type)
- 	return is_tlv_txenabled(ifname, type, TLVID_8021Qbg(LLDP_EVB_SUBTYPE));
- }
- 
--static int evb_cmdok(struct cmd *cmd, cmd_status expected)
-+static int evb_cmdok(struct cmd *cmd, int expected)
- {
- 	if (cmd->cmd != expected)
- 		return cmd_invalid;
-diff --git a/qbg/vdp22_cmds.c b/qbg/vdp22_cmds.c
-index 479b1b0..2e1bbbd 100644
---- a/qbg/vdp22_cmds.c
-+++ b/qbg/vdp22_cmds.c
-@@ -296,7 +296,7 @@ int vdp22_sendevent(struct vdpnl_vsi *p)
- 	return 0;
- }
- 
--static int vdp22_cmdok(struct cmd *cmd, cmd_status expected)
-+static int vdp22_cmdok(struct cmd *cmd, int expected)
- {
- 	if (cmd->cmd != expected)
- 		return cmd_invalid;
-diff --git a/qbg/vdp_cmds.c b/qbg/vdp_cmds.c
-index 95bcfb1..50f2781 100644
---- a/qbg/vdp_cmds.c
-+++ b/qbg/vdp_cmds.c
-@@ -85,7 +85,7 @@ static char *print_mode(char *s, size_t length, struct vsi_profile *p)
- 	return s;
- }
- 
--static int vdp_cmdok(struct cmd *cmd, cmd_status expected)
-+static int vdp_cmdok(struct cmd *cmd, int expected)
- {
- 	if (cmd->cmd != expected)
- 		return cmd_invalid;
-diff --git a/vdptool.c b/vdptool.c
-index 9872348..8f36277 100644
---- a/vdptool.c
-+++ b/vdptool.c
-@@ -141,7 +141,7 @@ static char *print_status(cmd_status status)
- 		str = "TLV does not support agent type";
- 		break;
- 	default:
--		str = print_vdp_status(status);
-+		str = print_vdp_status((enum vdp22_cmd_status)status);
- 		break;
- 	}
- 	return str;
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0002-lldp_head-rename-and-make-extern.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0002-lldp_head-rename-and-make-extern.patch
deleted file mode 100644
index 8f65b79..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0002-lldp_head-rename-and-make-extern.patch
+++ /dev/null
@@ -1,500 +0,0 @@
-From 07a83c583b9d508c7040dc6254a6a7113b2ce55f Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Mon, 3 Aug 2020 15:05:42 -0400
-Subject: [PATCH 2/9] lldp_head: rename and make extern
-
-Try to resolve this silly mod issue
-
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- config.c           |  4 ++--
- event_iface.c      |  4 ++--
- include/lldp_mod.h |  2 +-
- lldp/agent.c       |  2 +-
- lldp/rx.c          |  4 ++--
- lldp/tx.c          |  4 ++--
- lldp_dcbx.c        |  8 ++++----
- lldp_mand_cmds.c   |  8 ++++----
- lldpad.c           | 12 +++++++-----
- lldptool.c         | 14 ++++++++------
- lldptool_cmds.c    |  2 +-
- qbg/ecp22.c        |  8 ++++----
- qbg/vdp.c          |  4 ++--
- qbg/vdp22.c        |  6 +++---
- qbg/vdp22_cmds.c   |  2 +-
- vdptool.c          | 16 +++++++++-------
- 16 files changed, 53 insertions(+), 47 deletions(-)
-
-diff --git a/config.c b/config.c
-index be458fd..bc3fbae 100644
---- a/config.c
-+++ b/config.c
-@@ -185,7 +185,7 @@ void scan_port(UNUSED void *eloop_data, UNUSED void *user_ctx)
- 		LIST_FOREACH(agent, &port->agent_head, entry) {
- 			LLDPAD_DBG("%s: calling ifdown for agent %p.\n",
- 				   __func__, agent);
--			LIST_FOREACH(np, &lldp_head, lldp) {
-+			LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 				ops = np->ops;
- 				if (ops->lldp_mod_ifdown)
- 					ops->lldp_mod_ifdown(ifname, agent);
-@@ -394,7 +394,7 @@ void init_ports(void)
- 		LIST_FOREACH(agent, &port->agent_head, entry) {
- 			LLDPAD_DBG("%s: calling ifup for agent %p.\n",
- 				   __func__, agent);
--			LIST_FOREACH(np, &lldp_head, lldp) {
-+			LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 				if (np->ops->lldp_mod_ifup)
- 					np->ops->lldp_mod_ifup(p->if_name, agent);
- 			}
-diff --git a/event_iface.c b/event_iface.c
-index 43a95c7..1be2963 100644
---- a/event_iface.c
-+++ b/event_iface.c
-@@ -216,7 +216,7 @@ int oper_add_device(char *device_name)
- 	LIST_FOREACH(agent, &port->agent_head, entry) {
- 		LLDPAD_DBG("%s: calling ifup for agent %p.\n",
- 			   __func__, agent);
--		LIST_FOREACH(np, &lldp_head, lldp) {
-+		LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 			if (np->ops->lldp_mod_ifup)
- 				np->ops->lldp_mod_ifup(device_name, agent);
- 		}
-@@ -283,7 +283,7 @@ static void event_if_decode_nlmsg(int route_type, void *data, int len)
- 			LIST_FOREACH(agent, &port->agent_head, entry) {
- 				LLDPAD_DBG("%s: calling ifdown for agent %p.\n",
- 					   __func__, agent);
--				LIST_FOREACH(np, &lldp_head, lldp) {
-+				LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 					ops = np->ops;
- 					if (ops->lldp_mod_ifdown)
- 						ops->lldp_mod_ifdown(device_name,
-diff --git a/include/lldp_mod.h b/include/lldp_mod.h
-index 49a50e4..9733595 100644
---- a/include/lldp_mod.h
-+++ b/include/lldp_mod.h
-@@ -96,7 +96,7 @@ struct lldp_module {
- };
- 
- LIST_HEAD(lldp_head, lldp_module);
--struct lldp_head lldp_head;
-+extern struct lldp_head lldp_mod_head;
- 
- static inline struct lldp_module *find_module_by_id(struct lldp_head *head, int id)
- {
-diff --git a/lldp/agent.c b/lldp/agent.c
-index 333929a..aa4a8d1 100644
---- a/lldp/agent.c
-+++ b/lldp/agent.c
-@@ -163,7 +163,7 @@ static void timer(UNUSED void *eloop_data, UNUSED void *user_ctx)
- 			run_rx_sm(port, agent);
- 			update_rx_timers(agent);
- 
--			LIST_FOREACH(n, &lldp_head, lldp) {
-+			LIST_FOREACH(n, &lldp_mod_head, lldp) {
- 				if (n->ops && n->ops->timer)
- 					n->ops->timer(port, agent);
- 			}
-diff --git a/lldp/rx.c b/lldp/rx.c
-index 12d07bc..43aeeba 100644
---- a/lldp/rx.c
-+++ b/lldp/rx.c
-@@ -359,7 +359,7 @@ void rxProcessFrame(struct port *port, struct lldp_agent *agent)
- 		}
- 
- 		/* rx per lldp module */
--		LIST_FOREACH(np, &lldp_head, lldp) {
-+		LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 			if (!np->ops || !np->ops->lldp_mod_rchange)
- 				continue;
- 
-@@ -402,7 +402,7 @@ u8 mibDeleteObjects(struct port *port, struct lldp_agent *agent)
- {
- 	struct lldp_module *np;
- 
--	LIST_FOREACH(np, &lldp_head, lldp) {
-+	LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 		if (!np->ops || !np->ops->lldp_mod_mibdelete)
- 			continue;
- 		np->ops->lldp_mod_mibdelete(port, agent);
-diff --git a/lldp/tx.c b/lldp/tx.c
-index c3a5c62..0746e34 100644
---- a/lldp/tx.c
-+++ b/lldp/tx.c
-@@ -71,7 +71,7 @@ bool mibConstrInfoLLDPDU(struct port *port, struct lldp_agent *agent)
- 	fb_offset += sizeof(struct l2_ethhdr);
- 
- 	/* Generic TLV Pack */
--	LIST_FOREACH(np, &lldp_head, lldp) {
-+	LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 		if (!np->ops || !np->ops->lldp_mod_gettlv)
- 			continue;
- 
-@@ -206,7 +206,7 @@ bool mibConstrShutdownLLDPDU(struct port *port, struct lldp_agent *agent)
- 	memcpy(agent->tx.frameout, (void *)&eth, sizeof(struct l2_ethhdr));
- 	fb_offset += sizeof(struct l2_ethhdr);
- 
--	np = find_module_by_id(&lldp_head, LLDP_MOD_MAND);
-+	np = find_module_by_id(&lldp_mod_head, LLDP_MOD_MAND);
- 	if (!np)
- 		goto error;
- 	if (!np->ops || !np->ops->lldp_mod_gettlv)
-diff --git a/lldp_dcbx.c b/lldp_dcbx.c
-index 7e53c0f..809086f 100644
---- a/lldp_dcbx.c
-+++ b/lldp_dcbx.c
-@@ -129,7 +129,7 @@ struct dcbx_tlvs *dcbx_data(const char *ifname)
- 	struct dcbd_user_data *dud;
- 	struct dcbx_tlvs *tlv = NULL;
- 
--	dud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_DCBX);
-+	dud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_DCBX);
- 	if (dud) {
- 		LIST_FOREACH(tlv, &dud->head, entry) {
- 			if (!strncmp(tlv->ifname, ifname, IFNAMSIZ))
-@@ -148,7 +148,7 @@ int dcbx_tlvs_rxed(const char *ifname, struct lldp_agent *agent)
- 	if (agent->type != NEAREST_BRIDGE)
- 		return 0;
- 
--	dud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_DCBX);
-+	dud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_DCBX);
- 	if (dud) {
- 		LIST_FOREACH(tlv, &dud->head, entry) {
- 			if (!strncmp(tlv->ifname, ifname, IFNAMSIZ))
-@@ -172,7 +172,7 @@ int dcbx_check_active(const char *ifname)
- 	struct dcbd_user_data *dud;
- 	struct dcbx_tlvs *tlv = NULL;
- 
--	dud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_DCBX);
-+	dud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_DCBX);
- 	if (dud) {
- 		LIST_FOREACH(tlv, &dud->head, entry) {
- 			if (!strncmp(tlv->ifname, ifname, IFNAMSIZ))
-@@ -507,7 +507,7 @@ void dcbx_ifup(char *ifname, struct lldp_agent *agent)
- 	ifindex = get_ifidx(ifname);
- 	port = port_find_by_ifindex(ifindex);
- 
--	dud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_DCBX);
-+	dud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_DCBX);
- 	tlvs = dcbx_data(ifname);
- 
- 	if (!port)
-diff --git a/lldp_mand_cmds.c b/lldp_mand_cmds.c
-index 8a88177..1c43bf4 100644
---- a/lldp_mand_cmds.c
-+++ b/lldp_mand_cmds.c
-@@ -466,7 +466,7 @@ int handle_get_args(struct cmd *cmd, UNUSED char *arg, char *argvalue,
- 	nbuf = obuf;
- 	nbuf_len = obuf_len;
- 
--	LIST_FOREACH(np, &lldp_head, lldp) {
-+	LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 		if (!np->ops->get_arg_handler)
- 			continue;
- 		if (!(ah = np->ops->get_arg_handler()))
-@@ -496,7 +496,7 @@ int handle_get_arg(struct cmd *cmd, char *arg, char *argvalue,
- 	struct arg_handlers *ah;
- 	int rval, status = cmd_not_applicable;
- 
--	LIST_FOREACH(np, &lldp_head, lldp) {
-+	LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 		if (!np->ops->get_arg_handler)
- 			continue;
- 		if (!(ah = np->ops->get_arg_handler()))
-@@ -593,7 +593,7 @@ int handle_test_arg(struct cmd *cmd, char *arg, char *argvalue,
- 	struct arg_handlers *ah;
- 	int rval, status = cmd_not_applicable;
- 
--	LIST_FOREACH(np, &lldp_head, lldp) {
-+	LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 		if (!np->ops->get_arg_handler)
- 			continue;
- 		if (!(ah = np->ops->get_arg_handler()))
-@@ -626,7 +626,7 @@ int handle_set_arg(struct cmd *cmd, char *arg, char *argvalue,
- 	struct arg_handlers *ah;
- 	int rval, status = cmd_not_applicable;
- 
--	LIST_FOREACH(np, &lldp_head, lldp) {
-+	LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 		if (!np->ops->get_arg_handler)
- 			continue;
- 		if (!(ah = np->ops->get_arg_handler()))
-diff --git a/lldpad.c b/lldpad.c
-index 01fb588..65e92c7 100644
---- a/lldpad.c
-+++ b/lldpad.c
-@@ -80,6 +80,8 @@ struct lldp_module *(*register_tlv_table[])(void) = {
- 	NULL,
- };
- 
-+struct lldp_head lldp_mod_head;
-+
- char *cfg_file_name = NULL;
- bool daemonize = 0;
- int loglvl = LOG_WARNING;
-@@ -98,7 +100,7 @@ static void init_modules(void)
- 	struct lldp_module *premod = NULL;
- 	int i = 0;
- 
--	LIST_INIT(&lldp_head);
-+	LIST_INIT(&lldp_mod_head);
- 	for (i = 0; register_tlv_table[i]; i++) {
- 		module = register_tlv_table[i]();
- 		if (!module)
-@@ -106,7 +108,7 @@ static void init_modules(void)
- 		if (premod)
- 			LIST_INSERT_AFTER(premod, module, lldp);
- 		else
--			LIST_INSERT_HEAD(&lldp_head, module, lldp);
-+			LIST_INSERT_HEAD(&lldp_mod_head, module, lldp);
- 		premod = module;
- 	}
- }
-@@ -115,9 +117,9 @@ void deinit_modules(void)
- {
- 	struct lldp_module *module;
- 
--	while (lldp_head.lh_first != NULL) {
--		module = lldp_head.lh_first;
--		LIST_REMOVE(lldp_head.lh_first, lldp);
-+	while (lldp_mod_head.lh_first != NULL) {
-+		module = lldp_mod_head.lh_first;
-+		LIST_REMOVE(lldp_mod_head.lh_first, lldp);
- 		module->ops->lldp_mod_unregister(module);
- 	}
- }
-diff --git a/lldptool.c b/lldptool.c
-index 2b14f61..664a248 100644
---- a/lldptool.c
-+++ b/lldptool.c
-@@ -64,6 +64,8 @@
- #include "lldp_util.h"
- #include "lldpad_status.h"
- 
-+struct lldp_head lldp_mod_head;
-+
- static int show_raw;
- 
- static const char *cli_version =
-@@ -199,7 +201,7 @@ static void init_modules(void)
- 		if (premod)
- 			LIST_INSERT_AFTER(premod, module, lldp);
- 		else
--			LIST_INSERT_HEAD(&lldp_head, module, lldp);
-+			LIST_INSERT_HEAD(&lldp_mod_head, module, lldp);
- 		premod = module;
- 	}
- }
-@@ -208,9 +210,9 @@ void deinit_modules(void)
- {
- 	struct lldp_module *module;
- 
--	while (lldp_head.lh_first != NULL) {
--		module = lldp_head.lh_first;
--		LIST_REMOVE(lldp_head.lh_first, lldp);
-+	while (lldp_mod_head.lh_first != NULL) {
-+		module = lldp_mod_head.lh_first;
-+		LIST_REMOVE(lldp_mod_head.lh_first, lldp);
- 		module->ops->lldp_mod_unregister(module);
- 	}
- }
-@@ -346,7 +348,7 @@ cli_cmd_help(UNUSED struct clif *clif, UNUSED int argc, UNUSED char *argv[],
- 	printf("%s\n%s\n%s", commands_usage, commands_options, commands_help);
- 
- 	printf("\nTLV identifiers:\n");
--	LIST_FOREACH(np, &lldp_head, lldp)
-+	LIST_FOREACH(np, &lldp_mod_head, lldp)
- 		if (np->ops->print_help)
- 			np->ops->print_help();
- 	return 0;
-@@ -406,7 +408,7 @@ u32 lookup_tlvid(char *tlvid_str)
- 	struct lldp_module *np;
- 	u32 tlvid = INVALID_TLVID;
- 
--	LIST_FOREACH(np, &lldp_head, lldp) {
-+	LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 		if (np->ops->lookup_tlv_name) {
- 			tlvid = np->ops->lookup_tlv_name(tlvid_str);
- 			if (tlvid != INVALID_TLVID)
-diff --git a/lldptool_cmds.c b/lldptool_cmds.c
-index daef8c8..70b7b0d 100644
---- a/lldptool_cmds.c
-+++ b/lldptool_cmds.c
-@@ -464,7 +464,7 @@ static void print_tlvs(struct cmd *cmd, char *ibuf)
- 			offset += 8;
- 		
- 		printed = 0;
--		LIST_FOREACH(np, &lldp_head, lldp) {
-+		LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 			if (np->ops->print_tlv(tlvid, tlv_len, ibuf+offset)) {
- 					printed = 1;
- 					break;
-diff --git a/qbg/ecp22.c b/qbg/ecp22.c
-index 6561d14..825392b 100644
---- a/qbg/ecp22.c
-+++ b/qbg/ecp22.c
-@@ -774,7 +774,7 @@ void ecp22_start(char *ifname)
- 	struct ecp22 *ecp;
- 
- 	LLDPAD_DBG("%s:%s start ecp\n", __func__, ifname);
--	eud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_ECP22);
-+	eud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_ECP22);
- 	if (!eud) {
- 		LLDPAD_DBG("%s:%s no ECP module\n", __func__, ifname);
- 		return;
-@@ -837,7 +837,7 @@ void ecp22_stop(char *ifname)
- 	struct ecp22 *ecp;
- 
- 	LLDPAD_DBG("%s:%s stop ecp\n", __func__, ifname);
--	eud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_ECP22);
-+	eud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_ECP22);
- 	ecp = find_ecpdata(ifname, eud);
- 	if (ecp)
- 		ecp22_remove(ecp);
-@@ -852,7 +852,7 @@ static int ecp22_data_from_evb(char *ifname, struct evb22_to_ecp22 *ptr)
- 	struct ecp22_user_data *eud;
- 	struct ecp22 *ecp;
- 
--	eud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_ECP22);
-+	eud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_ECP22);
- 	ecp = find_ecpdata(ifname, eud);
- 	if (ecp) {
- 		ecp->max_rte = ptr->max_rte;
-@@ -930,7 +930,7 @@ static int ecp22_req2send(char *ifname, unsigned short subtype,
- 
- 	LLDPAD_DBG("%s:%s subtype:%d\n", __func__, ifname, subtype);
- 
--	eud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_ECP22);
-+	eud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_ECP22);
- 	ecp = find_ecpdata(ifname, eud);
- 	if (!ecp) {
- 		rc = -ENODEV;
-diff --git a/qbg/vdp.c b/qbg/vdp.c
-index d131560..c26bbd6 100644
---- a/qbg/vdp.c
-+++ b/qbg/vdp.c
-@@ -188,7 +188,7 @@ struct vdp_data *vdp_data(char *ifname)
- 	struct vdp_user_data *ud;
- 	struct vdp_data *vd = NULL;
- 
--	ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_VDP02);
-+	ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_VDP02);
- 	if (ud) {
- 		LIST_FOREACH(vd, &ud->head, entry) {
- 			if (!strncmp(ifname, vd->ifname, IFNAMSIZ))
-@@ -1624,7 +1624,7 @@ void vdp_ifup(char *ifname, struct lldp_agent *agent)
- 
- 	LIST_INIT(&vd->profile_head);
- 
--	ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_VDP02);
-+	ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_VDP02);
- 	LIST_INSERT_HEAD(&ud->head, vd, entry);
- 
- out_start_again:
-diff --git a/qbg/vdp22.c b/qbg/vdp22.c
-index cf02310..81ea3a8 100644
---- a/qbg/vdp22.c
-+++ b/qbg/vdp22.c
-@@ -694,7 +694,7 @@ static struct vdp22 *vdp22_findif(const char *ifname,
- 	struct vdp22 *vdp = 0;
- 
- 	if (!ud) {
--		ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_VDP22);
-+		ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_VDP22);
- 		if (!ud)
- 			LLDPAD_DBG("%s:%s no VDP22 module\n", __func__,
- 				   ifname);
-@@ -794,7 +794,7 @@ void vdp22_stop(char *ifname)
- 	struct vsi22 *vsi;
- 
- 	LLDPAD_DBG("%s:%s stop vdp\n", __func__, ifname);
--	vud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_VDP22);
-+	vud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_VDP22);
- 	if (!vud) {
- 		LLDPAD_ERR("%s:%s no VDP22 module\n", __func__, ifname);
- 		return;
-@@ -874,7 +874,7 @@ void vdp22_start(const char *ifname, int role)
- 	struct vsi22 *vsi;
- 
- 	LLDPAD_DBG("%s:%s start vdp\n", __func__, ifname);
--	vud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_VDP22);
-+	vud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_VDP22);
- 	if (!vud) {
- 		LLDPAD_ERR("%s:%s no VDP22 module\n", __func__, ifname);
- 		return;
-diff --git a/qbg/vdp22_cmds.c b/qbg/vdp22_cmds.c
-index 2e1bbbd..a8025ee 100644
---- a/qbg/vdp22_cmds.c
-+++ b/qbg/vdp22_cmds.c
-@@ -57,7 +57,7 @@ static struct lldp_module *get_my_module(int thisid)
- {
- 	struct lldp_module *np = NULL;
- 
--	LIST_FOREACH(np, &lldp_head, lldp)
-+	LIST_FOREACH(np, &lldp_mod_head, lldp)
- 		if (thisid == np->id)
- 			break;
- 	return np;
-diff --git a/vdptool.c b/vdptool.c
-index 8f36277..f1d946c 100644
---- a/vdptool.c
-+++ b/vdptool.c
-@@ -61,6 +61,8 @@
- #define OUI_ENCODE_HNDLR(name) name##_oui_encode_hndlr
- #define OUI_PRNT_DECODE_HNDLR(name) name##_oui_print_decode_hndlr
- 
-+struct lldp_head lldp_mod_head;
-+
- #define EXTERN_OUI_FN(name) \
- 	extern bool name##_oui_encode_hndlr(char *, char *, size_t); \
- 	extern void name##_oui_print_decode_hndlr(char *)
-@@ -796,13 +798,13 @@ static void init_modules(void)
- 	struct lldp_module *premod = NULL;
- 	int i = 0;
- 
--	LIST_INIT(&lldp_head);
-+	LIST_INIT(&lldp_mod_head);
- 	for (i = 0; register_tlv_table[i]; i++) {
- 		module = register_tlv_table[i]();
- 		if (premod)
- 			LIST_INSERT_AFTER(premod, module, lldp);
- 		else
--			LIST_INSERT_HEAD(&lldp_head, module, lldp);
-+			LIST_INSERT_HEAD(&lldp_mod_head, module, lldp);
- 		premod = module;
- 	}
- }
-@@ -811,9 +813,9 @@ void deinit_modules(void)
- {
- 	struct lldp_module *module;
- 
--	while (lldp_head.lh_first != NULL) {
--		module = lldp_head.lh_first;
--		LIST_REMOVE(lldp_head.lh_first, lldp);
-+	while (lldp_mod_head.lh_first != NULL) {
-+		module = lldp_mod_head.lh_first;
-+		LIST_REMOVE(lldp_mod_head.lh_first, lldp);
- 		module->ops->lldp_mod_unregister(module);
- 	}
- }
-@@ -953,7 +955,7 @@ cli_cmd_help(UNUSED struct clif *clif, UNUSED int argc, UNUSED char *argv[],
- 	printf("%s\n%s\n%s", commands_usage, commands_options, commands_help);
- 
- 	printf("\nTLV identifiers:\n");
--	LIST_FOREACH(np, &lldp_head, lldp)
-+	LIST_FOREACH(np, &lldp_mod_head, lldp)
- 		if (np->ops->print_help)
- 			np->ops->print_help();
- 	return 0;
-@@ -1006,7 +1008,7 @@ u32 lookup_tlvid(char *tlvid_str)
- 	struct lldp_module *np;
- 	u32 tlvid = INVALID_TLVID;
- 
--	LIST_FOREACH(np, &lldp_head, lldp) {
-+	LIST_FOREACH(np, &lldp_mod_head, lldp) {
- 		if (np->ops->lookup_tlv_name) {
- 			tlvid = np->ops->lookup_tlv_name(tlvid_str);
- 			if (tlvid != INVALID_TLVID)
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0003-lldp-add-packed-struct-definition.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0003-lldp-add-packed-struct-definition.patch
deleted file mode 100644
index 4d0594c..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0003-lldp-add-packed-struct-definition.patch
+++ /dev/null
@@ -1,49 +0,0 @@
-From 702dd20f43d9ca7e0bcb917e8acfec3f1acdcb5c Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Mon, 3 Aug 2020 15:14:50 -0400
-Subject: [PATCH 3/9] lldp: add packed struct definition
-
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- include/lldp.h   | 6 ++++++
- lldp/l2_packet.h | 5 ++---
- 2 files changed, 8 insertions(+), 3 deletions(-)
-
-diff --git a/include/lldp.h b/include/lldp.h
-index fb5ee93..de6a4ad 100644
---- a/include/lldp.h
-+++ b/include/lldp.h
-@@ -255,5 +255,11 @@ enum {
- #define LLDP_EVB_DEFAULT_RTE				15
- #define LLDP_EVB_DEFAULT_MAX_RTE			31
- 
-+#ifndef _MSC_VER
-+#define STRUCT_PACKED(STRUCT) STRUCT __attribute__((__packed__))
-+#else
-+#define STRUCT_PACKED(STRUCT) __pragma(pack(push, 1)) STRUCT __pragma(pack(pop))
-+#endif
-+
- void somethingChangedLocal(const char *ifname, int type);
- #endif /* _LLDP_H */
-diff --git a/lldp/l2_packet.h b/lldp/l2_packet.h
-index 607b8a3..831958c 100644
---- a/lldp/l2_packet.h
-+++ b/lldp/l2_packet.h
-@@ -58,12 +58,11 @@
-  */
- struct l2_packet_data;
- 
--
--struct l2_ethhdr {
-+STRUCT_PACKED(struct l2_ethhdr {
- 	u8 h_dest[ETH_ALEN];
- 	u8 h_source[ETH_ALEN];
- 	u16 h_proto;
--} STRUCT_PACKED;
-+});
- 
- /**
-  * l2_packet_init - Initialize l2_packet interface
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0004-clif-Include-string.h-for-mem-function-prototypes.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0004-clif-Include-string.h-for-mem-function-prototypes.patch
new file mode 100644
index 0000000..b6d9cf7
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0004-clif-Include-string.h-for-mem-function-prototypes.patch
@@ -0,0 +1,23 @@
+From d88aae2230683517b6d5b62cced51da81317a722 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 22:37:14 -0700
+Subject: [PATCH] clif: Include string.h for mem* function prototypes
+
+Upstream-Status: Submitted [https://github.com/intel/openlldp/pull/86]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ clif.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/clif.c b/clif.c
+index cad6f75..0758a7e 100644
+--- a/clif.c
++++ b/clif.c
+@@ -32,6 +32,7 @@
+ 
+ #include <stdlib.h>
+ #include <stdio.h>
++#include <string.h>
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/socket.h>
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0004-lldptool-make-extern.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0004-lldptool-make-extern.patch
deleted file mode 100644
index 16b7def..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0004-lldptool-make-extern.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 8229f4fb700ba4fcb2ec3e9956491bf5ee8c0ae2 Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Mon, 3 Aug 2020 15:17:31 -0400
-Subject: [PATCH 4/9] lldptool: make extern
-
-This should only exist per final linked object.
-
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- include/lldptool.h | 2 +-
- lldptool.c         | 1 +
- 2 files changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/include/lldptool.h b/include/lldptool.h
-index c919873..a190009 100644
---- a/include/lldptool.h
-+++ b/include/lldptool.h
-@@ -29,7 +29,7 @@
- 
- #include "clif.h"
- 
--struct lldp_head lldp_cli_head;
-+extern struct lldp_head lldp_cli_head;
- 
- int clif_command(struct clif *clif, char *cmd, int raw);
- void print_raw_message(char *msg, int print);
-diff --git a/lldptool.c b/lldptool.c
-index 664a248..d76cc48 100644
---- a/lldptool.c
-+++ b/lldptool.c
-@@ -64,6 +64,7 @@
- #include "lldp_util.h"
- #include "lldpad_status.h"
- 
-+struct lldp_head lldp_cli_head;
- struct lldp_head lldp_mod_head;
- 
- static int show_raw;
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0005-cisco_oui-match-encode-handler-prototypes.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0005-cisco_oui-match-encode-handler-prototypes.patch
deleted file mode 100644
index 4bdc5f2..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0005-cisco_oui-match-encode-handler-prototypes.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-From 9203dec731e53fb72e1c0d62639e6e54378f66cc Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Mon, 3 Aug 2020 15:20:59 -0400
-Subject: [PATCH 5/9] cisco_oui: match encode handler prototypes
-
-The EXTERN_FN prototype generated requires size_t be the third parameter.
-
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- vdptool_cisco_oui.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/vdptool_cisco_oui.c b/vdptool_cisco_oui.c
-index 7003521..3f88c76 100644
---- a/vdptool_cisco_oui.c
-+++ b/vdptool_cisco_oui.c
-@@ -28,7 +28,7 @@
- #include "lldp_util.h"
- #include "vdp_cisco.h"
- 
--bool cisco_oui_encode_hndlr(char *dst, char *src, int len)
-+bool cisco_oui_encode_hndlr(char *dst, char *src, size_t len)
- {
- 	char *src_temp = strdup(src);
- 	char *key, *data;
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0006-ecp22-make-enum-a-type-rather-than-instance.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0006-ecp22-make-enum-a-type-rather-than-instance.patch
deleted file mode 100644
index 2b0289d..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0006-ecp22-make-enum-a-type-rather-than-instance.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 2723219c08726efa08a6bad04ffb775f850a96bc Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Mon, 3 Aug 2020 15:23:28 -0400
-Subject: [PATCH 6/9] ecp22: make enum a type rather than instance
-
-The enum defined in the qbg header is setup as a discreet instance
-rather than a type.  Fix this.
-
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- include/qbg_ecp22.h | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/include/qbg_ecp22.h b/include/qbg_ecp22.h
-index 567f6df..fe66bb3 100644
---- a/include/qbg_ecp22.h
-+++ b/include/qbg_ecp22.h
-@@ -49,10 +49,10 @@ enum {					/* ECP Transmit states */
- 	ECP22_TX_ERROR
- };
- 
--enum {
-+enum ecp22_mode {
- 	ECP22_REQUEST = 0,
- 	ECP22_ACK
--} ecp22_mode;
-+};
- 
- struct ecp22_hdr {		/* ECP22 header */
- 	u16 ver_op_sub;		/* ECP22 version, operation, subtype */
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0007-lldp_8021qaz-extern-config-object.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0007-lldp_8021qaz-extern-config-object.patch
deleted file mode 100644
index 36b575c..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0007-lldp_8021qaz-extern-config-object.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From 275fe9da663193a843de450f03e810daedc06955 Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Mon, 3 Aug 2020 15:25:38 -0400
-Subject: [PATCH 7/9] lldp_8021qaz: extern config object
-
-The config object only exists as part of the config translation unit
-so remove the extra config object in the 8021qaz module.
-
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- lldp_8021qaz.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/lldp_8021qaz.c b/lldp_8021qaz.c
-index a42fd11..673d758 100644
---- a/lldp_8021qaz.c
-+++ b/lldp_8021qaz.c
-@@ -49,7 +49,7 @@
- 
- 
- struct lldp_head lldp_head;
--struct config_t lldpad_cfg;
-+extern config_t lldpad_cfg;
- extern bool read_only_8021qaz;
- 
- static int ieee8021qaz_check_pending(struct port *port, struct lldp_agent *);
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0008-stringops-fix-some-string-copy-errors.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0008-stringops-fix-some-string-copy-errors.patch
deleted file mode 100644
index 501b666..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0008-stringops-fix-some-string-copy-errors.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From 61291de03cb6dd1aea2a633eb72951f3fe453e7f Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Mon, 3 Aug 2020 15:33:08 -0400
-Subject: [PATCH 8/9] stringops: fix some string copy errors
-
-Reported when using gcc-10.
-
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- dcb_protocol.c | 13 ++++---------
- lldp/ports.c   |  2 +-
- 2 files changed, 5 insertions(+), 10 deletions(-)
-
-diff --git a/dcb_protocol.c b/dcb_protocol.c
-index 75ca139..930251b 100644
---- a/dcb_protocol.c
-+++ b/dcb_protocol.c
-@@ -2257,13 +2257,8 @@ cmd_status get_bwg_descrpt(char *device_name, u8 bwgid, char **name)
- 
- 	if ((it != NULL) &&
- 		(bwgid < it->second->max_pgid_desc)) {
--		size = (int)strlen(it->second->pgid_desc[bwgid]) +
--			sizeof(char);  /* Localization OK */
--		*name = (char*)malloc(size);
--		if (*name != NULL) {
--			strncpy(*name, it->second->pgid_desc[bwgid],
--					size); /* Localization OK */
--		} else {
-+		*name = strdup(it->second->pgid_desc[bwgid]);
-+		if (*name == NULL) {
- 			goto Error;
- 		}
- 	} else {
-@@ -2272,9 +2267,9 @@ cmd_status get_bwg_descrpt(char *device_name, u8 bwgid, char **name)
- 			size = (int)strlen(
- 				attribs.descript.pgid_desc[bwgid]) +
- 				sizeof(char);
--			*name = (char*)malloc(size);
-+			*name = (char*)calloc(size, sizeof(char));
- 			if (*name != NULL) {
--				memcpy(*name, attribs.descript.pgid_desc[bwgid], size); /* Localization OK */
-+				memcpy(*name, attribs.descript.pgid_desc[bwgid], size - 1); /* Localization OK */
- 			} else {
- 				goto Error;
- 			}
-diff --git a/lldp/ports.c b/lldp/ports.c
-index 6384f14..9b681f7 100644
---- a/lldp/ports.c
-+++ b/lldp/ports.c
-@@ -264,7 +264,7 @@ struct port *add_port(int ifindex, const char *ifname)
- 	memset(newport, 0, sizeof(*newport));
- 	newport->ifindex = ifindex;
- 	newport->next = NULL;
--	strncpy(newport->ifname, ifname, IFNAMSIZ);
-+	strncpy(newport->ifname, ifname, IFNAMSIZ - 1);
- 
- 	newport->bond_master = is_bond(ifname);
- 	/* Initialize relevant port variables */
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0009-8021qaz-mark-prio-map-functions-static.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0009-8021qaz-mark-prio-map-functions-static.patch
deleted file mode 100644
index 0f84426..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/0009-8021qaz-mark-prio-map-functions-static.patch
+++ /dev/null
@@ -1,39 +0,0 @@
-From 7ae79b0dff53a23fa0a964f77b9e3bb387a293c4 Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Tue, 4 Aug 2020 09:17:50 -0400
-Subject: [PATCH 9/9] 8021qaz: mark prio map functions static
-
-Inline is not the correct way to mark a function for inclusion
-in a single translation unit.  Use 'static' to restrict export
-of these functions.
-
-Signed-off-by: Aaron Conole <aconole@redhat.com>
----
- lldp_8021qaz.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/lldp_8021qaz.c b/lldp_8021qaz.c
-index 673d758..16ae167 100644
---- a/lldp_8021qaz.c
-+++ b/lldp_8021qaz.c
-@@ -397,7 +397,7 @@ static int read_cfg_file(char *ifname, struct lldp_agent *agent,
- 	return 0;
- }
- 
--inline int get_prio_map(u32 prio_map, int prio)
-+static int get_prio_map(u32 prio_map, int prio)
- {
- 	if (prio > 7)
- 		return 0;
-@@ -405,7 +405,7 @@ inline int get_prio_map(u32 prio_map, int prio)
- 	return (prio_map >> (4 * (7-prio))) & 0xF;
- }
- 
--inline void set_prio_map(u32 *prio_map, u8 prio, int tc)
-+static void set_prio_map(u32 *prio_map, u8 prio, int tc)
- {
- 	u32 mask = ~(0xffffffff & (0xF << (4 * (7-prio))));
- 	*prio_map &= mask;
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/lldp_head-remove-all-references.patch b/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/lldp_head-remove-all-references.patch
deleted file mode 100644
index bb3400c..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/files/lldp_head-remove-all-references.patch
+++ /dev/null
@@ -1,331 +0,0 @@
-From ed6a8e5a75f56b7034a46294a0bf2a9a7fd14fbc Mon Sep 17 00:00:00 2001
-From: Aaron Conole <aconole@redhat.com>
-Date: Fri, 23 Oct 2020 14:40:32 -0400
-Subject: [PATCH] lldp_head: remove all references
-
-There were a number of references missed during the module cleanup.  This hits the remaining
-ones.
-
-Fixes: 07a83c583b9d ("lldp_head: rename and make extern")
-Signed-off-by: Aaron Conole <aconole@redhat.com>
-
-Reference to upstream patch:
-https://github.com/intel/openlldp/commit/ed6a8e5a75f56b7034a46294a0bf2a9a7fd14fbc
-
-Signed-off-by: Li Wang <li.wang@windriver.com>
----
- ctrl_iface.c   |  4 +---
- lldp_8021qaz.c | 11 +++++------
- lldp_8023.c    |  6 ++----
- lldp_basman.c  |  6 ++----
- lldp_evb.c     |  6 ++----
- lldp_evb22.c   |  6 ++----
- lldp_mand.c    | 10 ++++------
- lldp_med.c     |  6 ++----
- qbg_utils.c    |  3 +--
- 9 files changed, 21 insertions(+), 37 deletions(-)
-
-diff --git a/ctrl_iface.c b/ctrl_iface.c
-index 1734f49..666f7c8 100644
---- a/ctrl_iface.c
-+++ b/ctrl_iface.c
-@@ -53,8 +53,6 @@
- #include "lldp_util.h"
- #include "messages.h"
- 
--extern struct lldp_head lldp_head;
--
- struct ctrl_dst {
- 	struct ctrl_dst *next;
- 	struct sockaddr_un addr;
-@@ -116,7 +114,7 @@ int clif_iface_module(struct clif_data *clifd,
- 		return cmd_invalid;
- 	}
- 
--	mod = find_module_by_id(&lldp_head, module_id);
-+	mod = find_module_by_id(&lldp_mod_head, module_id);
- 	if (mod && mod->ops && mod->ops->client_cmd)
- 		return  (mod->ops->client_cmd)(clifd, from, fromlen,
- 			 cmd_start, cmd_len, rbuf+strlen(rbuf), rlen);
-diff --git a/lldp_8021qaz.c b/lldp_8021qaz.c
-index 16ae167..e747710 100644
---- a/lldp_8021qaz.c
-+++ b/lldp_8021qaz.c
-@@ -48,7 +48,6 @@
- #include "lldp_dcbx.h"
- 
- 
--struct lldp_head lldp_head;
- extern config_t lldpad_cfg;
- extern bool read_only_8021qaz;
- 
-@@ -84,7 +83,7 @@ static int ieee8021qaz_check_pending(struct port *port,
- 	if (!port->portEnabled)
- 		return 0;
- 
--	iud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_8021QAZ);
-+	iud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_8021QAZ);
- 	if (iud) {
- 		LIST_FOREACH(tlv, &iud->head, entry) {
- 			if (!strncmp(port->ifname, tlv->ifname, IFNAMSIZ)) {
-@@ -143,7 +142,7 @@ struct ieee8021qaz_tlvs *ieee8021qaz_data(const char *ifname)
- 	struct ieee8021qaz_user_data *iud;
- 	struct ieee8021qaz_tlvs *tlv = NULL;
- 
--	iud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_8021QAZ);
-+	iud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_8021QAZ);
- 	if (iud) {
- 		LIST_FOREACH(tlv, &iud->head, entry) {
- 			if (!strncmp(tlv->ifname, ifname, IFNAMSIZ))
-@@ -629,7 +628,7 @@ void ieee8021qaz_ifup(char *ifname, struct lldp_agent *agent)
- 	LIST_INIT(&tlvs->app_head);
- 	read_cfg_file(port->ifname, agent, tlvs);
- 
--	iud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_8021QAZ);
-+	iud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_8021QAZ);
- 	LIST_INSERT_HEAD(&iud->head, tlvs, entry);
- 
- initialized:
-@@ -2179,7 +2178,7 @@ int ieee8021qaz_tlvs_rxed(const char *ifname)
- 	struct ieee8021qaz_user_data *iud;
- 	struct ieee8021qaz_tlvs *tlv = NULL;
- 
--	iud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_8021QAZ);
-+	iud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_8021QAZ);
- 	if (iud) {
- 		LIST_FOREACH(tlv, &iud->head, entry) {
- 			if (!strncmp(tlv->ifname, ifname, IFNAMSIZ))
-@@ -2198,7 +2197,7 @@ int ieee8021qaz_check_active(const char *ifname)
- 	struct ieee8021qaz_user_data *iud;
- 	struct ieee8021qaz_tlvs *tlv = NULL;
- 
--	iud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_8021QAZ);
-+	iud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_8021QAZ);
- 	if (iud) {
- 		LIST_FOREACH(tlv, &iud->head, entry) {
- 			if (!strncmp(tlv->ifname, ifname, IFNAMSIZ))
-diff --git a/lldp_8023.c b/lldp_8023.c
-index 422026e..8a03211 100644
---- a/lldp_8023.c
-+++ b/lldp_8023.c
-@@ -39,8 +39,6 @@
- #include "lldp_8023_clif.h"
- #include "lldp_8023_cmds.h"
- 
--extern struct lldp_head lldp_head;
--
- struct tlv_info_8023_maccfg {
- 	u8 oui[3];
- 	u8 sub;
-@@ -84,7 +82,7 @@ static struct ieee8023_data *ieee8023_data(const char *ifname, enum agent_type t
- 	struct ieee8023_user_data *ud;
- 	struct ieee8023_data *bd = NULL;
- 
--	ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_8023);
-+	ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_8023);
- 	if (ud) {
- 		LIST_FOREACH(bd, &ud->head, entry) {
- 			if (!strncmp(ifname, bd->ifname, IFNAMSIZ) &&
-@@ -456,7 +454,7 @@ void ieee8023_ifup(char *ifname, struct lldp_agent *agent)
- 		goto out_err;
- 	}
- 
--	ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_8023);
-+	ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_8023);
- 	LIST_INSERT_HEAD(&ud->head, bd, entry);
- 	LLDPAD_INFO("%s:port %s added\n", __func__, ifname);
- 	return;
-diff --git a/lldp_basman.c b/lldp_basman.c
-index a4f69c1..614e2a2 100644
---- a/lldp_basman.c
-+++ b/lldp_basman.c
-@@ -75,8 +75,6 @@ struct tlv_info_manaddr {
- 	struct tlv_info_maoid o;
- } __attribute__ ((__packed__));
- 
--extern struct lldp_head lldp_head;
--
- static const struct lldp_mod_ops basman_ops =  {
- 	.lldp_mod_register 	= basman_register,
- 	.lldp_mod_unregister 	= basman_unregister,
-@@ -91,7 +89,7 @@ static struct basman_data *basman_data(const char *ifname, enum agent_type type)
- 	struct basman_user_data *bud;
- 	struct basman_data *bd = NULL;
- 
--	bud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_BASIC);
-+	bud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_BASIC);
- 	if (bud) {
- 		LIST_FOREACH(bd, &bud->head, entry) {
- 			if (!strncmp(ifname, bd->ifname, IFNAMSIZ) &&
-@@ -688,7 +686,7 @@ void basman_ifup(char *ifname, struct lldp_agent *agent)
- 		goto out_err;
- 	}
- 
--	bud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_BASIC);
-+	bud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_BASIC);
- 	LIST_INSERT_HEAD(&bud->head, bd, entry);
- 	LLDPAD_DBG("%s:port %s added\n", __func__, ifname);
- 	return;
-diff --git a/lldp_evb.c b/lldp_evb.c
-index dcdcc7e..a8f3965 100644
---- a/lldp_evb.c
-+++ b/lldp_evb.c
-@@ -36,14 +36,12 @@
- #include "messages.h"
- #include "config.h"
- 
--extern struct lldp_head lldp_head;
--
- struct evb_data *evb_data(char *ifname, enum agent_type type)
- {
- 	struct evb_user_data *ud;
- 	struct evb_data *ed = NULL;
- 
--	ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_EVB);
-+	ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_EVB);
- 	if (ud) {
- 		LIST_FOREACH(ed, &ud->head, entry) {
- 			if (!strncmp(ifname, ed->ifname, IFNAMSIZ) &&
-@@ -347,7 +345,7 @@ static void evb_ifup(char *ifname, struct lldp_agent *agent)
- 
- 	evb_init_tlv(ed, agent);
- 
--	ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_EVB);
-+	ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_EVB);
- 	LIST_INSERT_HEAD(&ud->head, ed, entry);
- 	LLDPAD_DBG("%s:%s agent %d added\n", __func__, ifname, agent->type);
- }
-diff --git a/lldp_evb22.c b/lldp_evb22.c
-index 76ba883..6e92d9d 100644
---- a/lldp_evb22.c
-+++ b/lldp_evb22.c
-@@ -37,14 +37,12 @@
- #include "messages.h"
- #include "config.h"
- 
--extern struct lldp_head lldp_head;
--
- struct evb22_data *evb22_data(char *ifname, enum agent_type type)
- {
- 	struct evb22_user_data *ud;
- 	struct evb22_data *ed = NULL;
- 
--	ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_EVB22);
-+	ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_EVB22);
- 	if (ud) {
- 		LIST_FOREACH(ed, &ud->head, entry) {
- 			if (!strncmp(ifname, ed->ifname, IFNAMSIZ) &&
-@@ -453,7 +451,7 @@ static void evb22_ifup(char *ifname, struct lldp_agent *agent)
- 	STRNCPY_TERMINATED(ed->ifname, ifname, IFNAMSIZ);
- 	ed->agenttype = agent->type;
- 	evb22_init_tlv(ed, agent);
--	ud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_EVB22);
-+	ud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_EVB22);
- 	LIST_INSERT_HEAD(&ud->head, ed, entry);
- 	LLDPAD_DBG("%s:%s agent %d added\n", __func__, ifname, agent->type);
- }
-diff --git a/lldp_mand.c b/lldp_mand.c
-index 0db63cb..b857a88 100644
---- a/lldp_mand.c
-+++ b/lldp_mand.c
-@@ -42,8 +42,6 @@
- #include "lldp/l2_packet.h"
- #include "lldp_tlv.h"
- 
--extern struct lldp_head lldp_head;
--
- static const struct lldp_mod_ops mand_ops = {
- 	.lldp_mod_register 	= mand_register,
- 	.lldp_mod_unregister 	= mand_unregister,
-@@ -59,7 +57,7 @@ struct mand_data *mand_data(const char *ifname, enum agent_type type)
- 	struct mand_user_data *mud;
- 	struct mand_data *md = NULL;
- 
--	mud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_MAND);
-+	mud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_MAND);
- 	if (mud) {
- 		LIST_FOREACH(md, &mud->head, entry) {
- 			if (!strncmp(ifname, md->ifname, IFNAMSIZ) &&
-@@ -608,7 +606,7 @@ void mand_ifup(char *ifname, struct lldp_agent *agent)
- 		STRNCPY_TERMINATED(md->ifname, ifname, IFNAMSIZ);
- 		md->agenttype = agent->type;
- 
--		mud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_MAND);
-+		mud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_MAND);
- 		LIST_INSERT_HEAD(&mud->head, md, entry);
- 	}
- 
-@@ -636,7 +634,7 @@ struct lldp_module *mand_register(void)
- 		LLDPAD_ERR("failed to malloc LLDP Mandatory module data\n");
- 		goto out_err;
- 	}
--	mud = malloc(sizeof(struct mand_user_data));
-+    mud = malloc(sizeof(struct mand_user_data));
- 	if (!mud) {
- 		free(mod);
- 		LLDPAD_ERR("failed to malloc LLDP Mandatory module user data\n");
-@@ -644,8 +642,8 @@ struct lldp_module *mand_register(void)
- 	}
- 	LIST_INIT(&mud->head);
-  	mod->id = LLDP_MOD_MAND;
-+    mod->data = mud;
- 	mod->ops = &mand_ops;
--	mod->data = mud;
- 	LLDPAD_INFO("%s:done\n", __func__);
- 	return mod;
- out_err:
-diff --git a/lldp_med.c b/lldp_med.c
-index f6c373e..7b6996e 100644
---- a/lldp_med.c
-+++ b/lldp_med.c
-@@ -40,8 +40,6 @@
- #include "lldp_mand_clif.h"
- #include "lldp_med_cmds.h"
- 
--extern struct lldp_head lldp_head;
--
- struct tlv_info_medcaps {
- 	u8 oui[OUI_SIZE];
- 	u8 subtype;
-@@ -95,7 +93,7 @@ static struct med_data *med_data(const char *ifname, enum agent_type type)
- 	struct med_user_data *mud;
- 	struct med_data *md = NULL;
- 
--	mud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_MED);
-+	mud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_MED);
- 	if (mud) {
- 		LIST_FOREACH(md, &mud->head, entry) {
- 			if (!strncmp(ifname, md->ifname, IFNAMSIZ) &&
-@@ -914,7 +912,7 @@ void med_ifup(char *ifname, struct lldp_agent *agent)
- 		free(md);
- 		goto out_err;
- 	}
--	mud = find_module_user_data_by_id(&lldp_head, LLDP_MOD_MED);
-+	mud = find_module_user_data_by_id(&lldp_mod_head, LLDP_MOD_MED);
- 	LIST_INSERT_HEAD(&mud->head, md, entry);
- 	LLDPAD_INFO("%s:port %s added\n", __func__, ifname);
- 	return;
-diff --git a/qbg_utils.c b/qbg_utils.c
-index 9daeade..0d40c5b 100644
---- a/qbg_utils.c
-+++ b/qbg_utils.c
-@@ -36,7 +36,6 @@
- #include "qbg_utils.h"
- 
- extern int loglvl;			/* Global lldpad log level */
--extern struct lldp_head lldp_head;
- 
- /*
-  * hexdump_frame - print raw evb/ecp/vdp frame
-@@ -73,7 +72,7 @@ void hexdump_frame(const char *ifname, char *txt, const unsigned char *buf,
-  */
- int modules_notify(int id, int sender_id, char *ifname, void *data)
- {
--	struct lldp_module *mp = find_module_by_id(&lldp_head, id);
-+	struct lldp_module *mp = find_module_by_id(&lldp_mod_head, id);
- 	int rc = 0;
- 
- 	if (mp && mp->ops->lldp_mod_notify)
--- 
-2.18.1
-
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/openlldp_1.0.1.bb b/meta-openembedded/meta-networking/recipes-protocols/openlldp/openlldp_1.0.1.bb
deleted file mode 100644
index acde9c3..0000000
--- a/meta-openembedded/meta-networking/recipes-protocols/openlldp/openlldp_1.0.1.bb
+++ /dev/null
@@ -1,44 +0,0 @@
-SUMMARY = "Open-LLDP"
-DESCRIPTION = "Link Layer Discovery Protocol for Linux that includes support for DCBX"
-HOMEPAGE = "http://open-lldp.org/start"
-
-LICENSE = "GPL-2.0-only"
-LIC_FILES_CHKSUM = "file://COPYING;md5=8c2bc283e65df398ced5f5b747e78162"
-
-S = "${WORKDIR}/git"
-
-inherit pkgconfig autotools
-inherit ${@bb.utils.contains('DISTRO_FEATURES', 'systemd', 'systemd', '', d)}
-
-DEPENDS = "libnl libconfig readline"
-
-SRCREV = "b71bfb87fefb31c4b1a6a7ae351791c90966c3a8"
-PV .= "+git${SRCPV}"
-SRC_URI = "git://github.com/intel/openlldp.git;protocol=https;branch=master \
-           file://0001-Fix-musl-libc-build-issue.patch \
-           file://0001-autotools-Add-include-path-to-generated-version.h.patch \
-           file://0001-autotools-Add-option-to-disable-installation-of-syst.patch \
-           file://0001-cmds-fix-enum-conversion.patch \
-           file://0002-lldp_head-rename-and-make-extern.patch \
-           file://0003-lldp-add-packed-struct-definition.patch \
-           file://0004-lldptool-make-extern.patch \
-           file://0005-cisco_oui-match-encode-handler-prototypes.patch \
-           file://0006-ecp22-make-enum-a-type-rather-than-instance.patch \
-           file://0007-lldp_8021qaz-extern-config-object.patch \
-           file://0008-stringops-fix-some-string-copy-errors.patch \
-           file://0009-8021qaz-mark-prio-map-functions-static.patch \
-           file://lldp_head-remove-all-references.patch \
-           "
-
-# Makefile.am adds -Werror to AM_CFLAGS. There are warnings so disable it.
-TARGET_CFLAGS += "-Wno-error"
-
-# Enable install of systemd conf files.
-EXTRA_OECONF += "${@bb.utils.contains('DISTRO_FEATURES', 'systemd', '--with-systemdsystemunitdir=${systemd_system_unitdir}', '', d)}"
-
-SYSTEMD_SERVICE:${PN} = "lldpad.service lldpad.socket"
-
-# To enable service at boot set to enable in local.conf.
-SYSTEMD_AUTO_ENABLE ?= "disable"
-
-RRECOMMENDS:${PN} = "iproute2-tc"
diff --git a/meta-openembedded/meta-networking/recipes-protocols/openlldp/openlldp_1.1.0.bb b/meta-openembedded/meta-networking/recipes-protocols/openlldp/openlldp_1.1.0.bb
new file mode 100644
index 0000000..812f4fb
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-protocols/openlldp/openlldp_1.1.0.bb
@@ -0,0 +1,32 @@
+SUMMARY = "Open-LLDP"
+DESCRIPTION = "Link Layer Discovery Protocol for Linux that includes support for DCBX"
+HOMEPAGE = "http://open-lldp.org/start"
+
+LICENSE = "GPL-2.0-only"
+LIC_FILES_CHKSUM = "file://COPYING;md5=8c2bc283e65df398ced5f5b747e78162"
+
+S = "${WORKDIR}/git"
+
+inherit pkgconfig autotools
+inherit ${@bb.utils.contains('DISTRO_FEATURES', 'systemd', 'systemd', '', d)}
+
+DEPENDS = "libnl libconfig readline"
+
+SRCREV = "85e55837a81d710e5baa7da47f7ed0d205c8ede5"
+PV .= "+git${SRCPV}"
+SRC_URI = "git://github.com/intel/openlldp.git;protocol=https;branch=branch-1.1 \
+           file://0001-Fix-musl-libc-build-issue.patch \
+           file://0001-autotools-Add-include-path-to-generated-version.h.patch \
+           file://0001-autotools-Add-option-to-disable-installation-of-syst.patch \
+           file://0004-clif-Include-string.h-for-mem-function-prototypes.patch \
+           "
+
+# Enable install of systemd conf files.
+EXTRA_OECONF += "${@bb.utils.contains('DISTRO_FEATURES', 'systemd', '--with-systemdsystemunitdir=${systemd_system_unitdir}', '', d)}"
+
+SYSTEMD_SERVICE:${PN} = "lldpad.service lldpad.socket"
+
+# To enable service at boot set to enable in local.conf.
+SYSTEMD_AUTO_ENABLE ?= "disable"
+
+RRECOMMENDS:${PN} = "iproute2-tc"
diff --git a/meta-openembedded/meta-networking/recipes-protocols/quagga/files/0001-configure-Check-for-readline-instead-of-main-in-libr.patch b/meta-openembedded/meta-networking/recipes-protocols/quagga/files/0001-configure-Check-for-readline-instead-of-main-in-libr.patch
new file mode 100644
index 0000000..776c095
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-protocols/quagga/files/0001-configure-Check-for-readline-instead-of-main-in-libr.patch
@@ -0,0 +1,32 @@
+From 675b35b7ed416c837267e493b157167319e8f5fa Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 11:01:53 -0700
+Subject: [PATCH] configure: Check for readline() instead of main() in
+ libreadline
+
+while checking for presense of libreadline, poke for a function which is
+provided by libreadline, main is not provided by it, so modern compiler
+toolchains may complain about it.
+
+Upstream-Status: Submitted [https://github.com/Quagga/quagga/pull/9]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index ed279f48..d444ab4a 100755
+--- a/configure.ac
++++ b/configure.ac
+@@ -754,7 +754,7 @@ dnl	 [TODO] on Linux, and in [TODO] on Solaris.
+ 	      )]
+ 	    )]
+ 	  )
+-         AC_CHECK_LIB(readline, main, LIBREADLINE="-lreadline $LIBREADLINE",,
++         AC_CHECK_LIB(readline, readline, LIBREADLINE="-lreadline $LIBREADLINE",,
+                       "$LIBREADLINE")
+          if test $ac_cv_lib_readline_main = no; then
+            AC_MSG_ERROR([vtysh needs libreadline but was not found and usable on your system.])
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-networking/recipes-protocols/quagga/quagga_1.2.4.bb b/meta-openembedded/meta-networking/recipes-protocols/quagga/quagga_1.2.4.bb
index 984264a..37cfc13 100644
--- a/meta-openembedded/meta-networking/recipes-protocols/quagga/quagga_1.2.4.bb
+++ b/meta-openembedded/meta-networking/recipes-protocols/quagga/quagga_1.2.4.bb
@@ -1,5 +1,7 @@
 require quagga.inc
 
+SRC_URI += "file://0001-configure-Check-for-readline-instead-of-main-in-libr.patch"
+
 SRC_URI[md5sum] = "eced21b054d71c9e1b7c6ac43286a166"
 SRC_URI[sha256sum] = "e364c082c3309910e1eb7b068bf39ee298e2f2f3f31a6431a5c115193bd653d3"
 
diff --git a/meta-openembedded/meta-networking/recipes-support/chrony/chrony/arm_eabi.patch b/meta-openembedded/meta-networking/recipes-support/chrony/chrony/arm_eabi.patch
index 4a2769b..5ba2081 100644
--- a/meta-openembedded/meta-networking/recipes-support/chrony/chrony/arm_eabi.patch
+++ b/meta-openembedded/meta-networking/recipes-support/chrony/chrony/arm_eabi.patch
@@ -1,4 +1,4 @@
-From f35e07aceb4a16121d83b47ee77990018bec98ea Mon Sep 17 00:00:00 2001
+From 28b9f115e36e2133301b02fa02ad71a8efbed9b9 Mon Sep 17 00:00:00 2001
 From: Joe Slater <jslater@windriver.com>
 Date: Thu, 9 Mar 2017 10:58:06 -0800
 Subject: [PATCH] chrony: fix build failure for arma9
@@ -22,12 +22,13 @@
     Refreshed for 4.0
 
     Signed-off-by: Khem Raj <raj.khem@gmail.com>
+
 ---
  sys_linux.c | 15 +++++++++------
  1 file changed, 9 insertions(+), 6 deletions(-)
 
 diff --git a/sys_linux.c b/sys_linux.c
-index 9cab2ef..8104b8f 100644
+index f2baab1..14a9241 100644
 --- a/sys_linux.c
 +++ b/sys_linux.c
 @@ -484,7 +484,6 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
@@ -45,8 +46,8 @@
 -    SCMP_SYS(getrlimit),
      SCMP_SYS(getuid),
      SCMP_SYS(getuid32),
-     SCMP_SYS(rt_sigaction),
-@@ -508,7 +506,6 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
+ #ifdef __NR_rseq
+@@ -511,7 +509,6 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
      /* Memory */
      SCMP_SYS(brk),
      SCMP_SYS(madvise),
@@ -54,7 +55,7 @@
      SCMP_SYS(mmap2),
      SCMP_SYS(mprotect),
      SCMP_SYS(mremap),
-@@ -568,8 +565,6 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
+@@ -571,8 +568,6 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
      SCMP_SYS(sendmsg),
      SCMP_SYS(sendto),
      SCMP_SYS(shutdown),
@@ -63,7 +64,7 @@
  
      /* General I/O */
      SCMP_SYS(_newselect),
-@@ -593,7 +588,6 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
+@@ -596,7 +591,6 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
  #ifdef __NR_futex_time64
      SCMP_SYS(futex_time64),
  #endif
@@ -71,7 +72,7 @@
      SCMP_SYS(set_robust_list),
      SCMP_SYS(write),
  
-@@ -601,6 +595,15 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
+@@ -604,6 +598,15 @@ SYS_Linux_EnableSystemCallFilter(int level, SYS_ProcessContext context)
      SCMP_SYS(getrandom),
      SCMP_SYS(sysinfo),
      SCMP_SYS(uname),
@@ -87,6 +88,3 @@
    };
  
    const int denied_any[] = {
--- 
-2.25.1
-
diff --git a/meta-openembedded/meta-networking/recipes-support/chrony/chrony_4.2.bb b/meta-openembedded/meta-networking/recipes-support/chrony/chrony_4.3.bb
similarity index 98%
rename from meta-openembedded/meta-networking/recipes-support/chrony/chrony_4.2.bb
rename to meta-openembedded/meta-networking/recipes-support/chrony/chrony_4.3.bb
index 8ce9e1d..083d2cb 100644
--- a/meta-openembedded/meta-networking/recipes-support/chrony/chrony_4.2.bb
+++ b/meta-openembedded/meta-networking/recipes-support/chrony/chrony_4.3.bb
@@ -39,7 +39,7 @@
 SRC_URI:append:libc-musl = " \
     file://0001-Fix-compilation-with-musl.patch \
 "
-SRC_URI[sha256sum] = "273f9fd15c328ed6f3a5f6ba6baec35a421a34a73bb725605329b1712048db9a"
+SRC_URI[sha256sum] = "9d0da889a865f089a5a21610ffb6713e3c9438ce303a63b49c2fb6eaff5b8804"
 
 DEPENDS = "pps-tools"
 
diff --git a/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq.inc b/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq.inc
index 136c65d..a8ff21a 100644
--- a/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq.inc
+++ b/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq.inc
@@ -3,8 +3,9 @@
 SECTION = "net"
 # GPLv3 was added in version 2.41 as license option
 LICENSE = "GPL-2.0-only | GPL-3.0-only"
-LIC_FILES_CHKSUM = "file://COPYING;md5=0636e73ff0215e8d672dc4c32c317bb3 \
-                    file://COPYING-v3;md5=d32239bcb673463ab874e80d47fae504"
+LIC_FILES_CHKSUM = "file://COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263 \
+                    file://COPYING-v3;md5=d32239bcb673463ab874e80d47fae504 \
+                    "
 
 #at least versions 2.69 and prior are moved to the archive folder on the server
 SRC_URI = "http://www.thekelleys.org.uk/dnsmasq/${@['archive/', ''][float(d.getVar('PV').split('.')[1]) > 69]}dnsmasq-${PV}.tar.gz;name=dnsmasq-${PV} \
diff --git a/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq/CVE-2022-0934.patch b/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq/CVE-2022-0934.patch
deleted file mode 100644
index 6bd734d..0000000
--- a/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq/CVE-2022-0934.patch
+++ /dev/null
@@ -1,191 +0,0 @@
-From 3cdecc159e0f417a2f8d43d99632af26beea630f Mon Sep 17 00:00:00 2001
-From: Simon Kelley <simon@thekelleys.org.uk>
-Date: Thu, 31 Mar 2022 21:35:20 +0100
-Subject: [PATCH] Fix write-after-free error in DHCPv6 code. CVE-2022-0934
- refers.
-
-CVE: CVE-2022-0934
-
-Upstream-Status: Backport
-[https://thekelleys.org.uk/gitweb/?p=dnsmasq.git;a=commitdiff;h=03345ecefe]
-
-Signed-off-by: Yi Zhao <yi.zhao@windriver.com>
----
- CHANGELOG     |  3 +++
- src/rfc3315.c | 48 +++++++++++++++++++++++++++---------------------
- 2 files changed, 30 insertions(+), 21 deletions(-)
-
-diff --git a/CHANGELOG b/CHANGELOG
-index 5e54df9..a28da2a 100644
---- a/CHANGELOG
-+++ b/CHANGELOG
-@@ -1,4 +1,7 @@
- version 2.86
-+	Fix write-after-free error in DHCPv6 server code.
-+	CVE-2022-0934 refers.
-+
- 	Handle DHCPREBIND requests in the DHCPv6 server code.
- 	Thanks to Aichun Li for spotting this omission, and the initial
- 	patch.
-diff --git a/src/rfc3315.c b/src/rfc3315.c
-index 5c2ff97..6ecfeeb 100644
---- a/src/rfc3315.c
-+++ b/src/rfc3315.c
-@@ -33,9 +33,9 @@ struct state {
-   unsigned int mac_len, mac_type;
- };
- 
--static int dhcp6_maybe_relay(struct state *state, void *inbuff, size_t sz, 
-+static int dhcp6_maybe_relay(struct state *state, unsigned char *inbuff, size_t sz, 
- 			     struct in6_addr *client_addr, int is_unicast, time_t now);
--static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_t sz, int is_unicast, time_t now);
-+static int dhcp6_no_relay(struct state *state, int msg_type, unsigned char *inbuff, size_t sz, int is_unicast, time_t now);
- static void log6_opts(int nest, unsigned int xid, void *start_opts, void *end_opts);
- static void log6_packet(struct state *state, char *type, struct in6_addr *addr, char *string);
- static void log6_quiet(struct state *state, char *type, struct in6_addr *addr, char *string);
-@@ -104,12 +104,12 @@ unsigned short dhcp6_reply(struct dhcp_context *context, int interface, char *if
- }
- 
- /* This cost me blood to write, it will probably cost you blood to understand - srk. */
--static int dhcp6_maybe_relay(struct state *state, void *inbuff, size_t sz, 
-+static int dhcp6_maybe_relay(struct state *state, unsigned char *inbuff, size_t sz, 
- 			     struct in6_addr *client_addr, int is_unicast, time_t now)
- {
-   void *end = inbuff + sz;
-   void *opts = inbuff + 34;
--  int msg_type = *((unsigned char *)inbuff);
-+  int msg_type = *inbuff;
-   unsigned char *outmsgtypep;
-   void *opt;
-   struct dhcp_vendor *vendor;
-@@ -259,15 +259,15 @@ static int dhcp6_maybe_relay(struct state *state, void *inbuff, size_t sz,
-   return 1;
- }
- 
--static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_t sz, int is_unicast, time_t now)
-+static int dhcp6_no_relay(struct state *state, int msg_type, unsigned char *inbuff, size_t sz, int is_unicast, time_t now)
- {
-   void *opt;
--  int i, o, o1, start_opts;
-+  int i, o, o1, start_opts, start_msg;
-   struct dhcp_opt *opt_cfg;
-   struct dhcp_netid *tagif;
-   struct dhcp_config *config = NULL;
-   struct dhcp_netid known_id, iface_id, v6_id;
--  unsigned char *outmsgtypep;
-+  unsigned char outmsgtype;
-   struct dhcp_vendor *vendor;
-   struct dhcp_context *context_tmp;
-   struct dhcp_mac *mac_opt;
-@@ -296,12 +296,13 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
-   v6_id.next = state->tags;
-   state->tags = &v6_id;
- 
--  /* copy over transaction-id, and save pointer to message type */
--  if (!(outmsgtypep = put_opt6(inbuff, 4)))
-+  start_msg = save_counter(-1);
-+  /* copy over transaction-id */
-+  if (!put_opt6(inbuff, 4))
-     return 0;
-   start_opts = save_counter(-1);
--  state->xid = outmsgtypep[3] | outmsgtypep[2] << 8 | outmsgtypep[1] << 16;
--   
-+  state->xid = inbuff[3] | inbuff[2] << 8 | inbuff[1] << 16;
-+    
-   /* We're going to be linking tags from all context we use. 
-      mark them as unused so we don't link one twice and break the list */
-   for (context_tmp = state->context; context_tmp; context_tmp = context_tmp->current)
-@@ -347,7 +348,7 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
-       (msg_type == DHCP6REQUEST || msg_type == DHCP6RENEW || msg_type == DHCP6RELEASE || msg_type == DHCP6DECLINE))
-     
-     {  
--      *outmsgtypep = DHCP6REPLY;
-+      outmsgtype = DHCP6REPLY;
-       o1 = new_opt6(OPTION6_STATUS_CODE);
-       put_opt6_short(DHCP6USEMULTI);
-       put_opt6_string("Use multicast");
-@@ -619,11 +620,11 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
- 	struct dhcp_netid *solicit_tags;
- 	struct dhcp_context *c;
- 	
--	*outmsgtypep = DHCP6ADVERTISE;
-+	outmsgtype = DHCP6ADVERTISE;
- 	
- 	if (opt6_find(state->packet_options, state->end, OPTION6_RAPID_COMMIT, 0))
- 	  {
--	    *outmsgtypep = DHCP6REPLY;
-+	    outmsgtype = DHCP6REPLY;
- 	    state->lease_allocate = 1;
- 	    o = new_opt6(OPTION6_RAPID_COMMIT);
- 	    end_opt6(o);
-@@ -809,7 +810,7 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
- 	int start = save_counter(-1);
- 
- 	/* set reply message type */
--	*outmsgtypep = DHCP6REPLY;
-+	outmsgtype = DHCP6REPLY;
- 	state->lease_allocate = 1;
- 
- 	log6_quiet(state, "DHCPREQUEST", NULL, ignore ? _("ignored") : NULL);
-@@ -924,7 +925,7 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
- 	int address_assigned = 0;
- 
- 	/* set reply message type */
--	*outmsgtypep = DHCP6REPLY;
-+	outmsgtype = DHCP6REPLY;
- 	
- 	log6_quiet(state, msg_type == DHCP6RENEW ? "DHCPRENEW" : "DHCPREBIND", NULL, NULL);
- 
-@@ -1057,7 +1058,7 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
- 	int good_addr = 0;
- 
- 	/* set reply message type */
--	*outmsgtypep = DHCP6REPLY;
-+	outmsgtype = DHCP6REPLY;
- 	
- 	log6_quiet(state, "DHCPCONFIRM", NULL, NULL);
- 	
-@@ -1121,7 +1122,7 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
- 	log6_quiet(state, "DHCPINFORMATION-REQUEST", NULL, ignore ? _("ignored") : state->hostname);
- 	if (ignore)
- 	  return 0;
--	*outmsgtypep = DHCP6REPLY;
-+	outmsgtype = DHCP6REPLY;
- 	tagif = add_options(state, 1);
- 	break;
-       }
-@@ -1130,7 +1131,7 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
-     case DHCP6RELEASE:
-       {
- 	/* set reply message type */
--	*outmsgtypep = DHCP6REPLY;
-+	outmsgtype = DHCP6REPLY;
- 
- 	log6_quiet(state, "DHCPRELEASE", NULL, NULL);
- 
-@@ -1195,7 +1196,7 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
-     case DHCP6DECLINE:
-       {
- 	/* set reply message type */
--	*outmsgtypep = DHCP6REPLY;
-+	outmsgtype = DHCP6REPLY;
- 	
- 	log6_quiet(state, "DHCPDECLINE", NULL, NULL);
- 
-@@ -1275,7 +1276,12 @@ static int dhcp6_no_relay(struct state *state, int msg_type, void *inbuff, size_
-       }
- 
-     }
--  
-+
-+  /* Fill in the message type. Note that we store the offset,
-+     not a direct pointer, since the packet memory may have been 
-+     reallocated. */
-+  ((unsigned char *)(daemon->outpacket.iov_base))[start_msg] = outmsgtype;
-+
-   log_tags(tagif, state->xid);
-   log6_opts(0, state->xid, daemon->outpacket.iov_base + start_opts, daemon->outpacket.iov_base + save_counter(-1));
-   
--- 
-2.25.1
-
diff --git a/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq_2.86.bb b/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq_2.86.bb
deleted file mode 100644
index 0f7880c..0000000
--- a/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq_2.86.bb
+++ /dev/null
@@ -1,8 +0,0 @@
-require dnsmasq.inc
-
-SRC_URI[dnsmasq-2.86.sha256sum] = "ef15f608a83ee2b1d1d2c1f11d089a7e0ac401ffb0991de73fc01ce5f290e512"
-SRC_URI += "\
-    file://lua.patch \
-    file://CVE-2022-0934.patch \
-"
-
diff --git a/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq_2.87.bb b/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq_2.87.bb
new file mode 100644
index 0000000..793b61d
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/dnsmasq/dnsmasq_2.87.bb
@@ -0,0 +1,7 @@
+require dnsmasq.inc
+
+SRC_URI[dnsmasq-2.87.sha256sum] = "ae39bffde9c37e4d64849b528afeb060be6bad6d1044a3bd94a49fce41357284"
+SRC_URI += "\
+    file://lua.patch \
+"
+
diff --git a/meta-openembedded/meta-networking/recipes-support/ettercap/ettercap_0.8.3.1.bb b/meta-openembedded/meta-networking/recipes-support/ettercap/ettercap_0.8.3.1.bb
index 99fcacd..7d37f41 100644
--- a/meta-openembedded/meta-networking/recipes-support/ettercap/ettercap_0.8.3.1.bb
+++ b/meta-openembedded/meta-networking/recipes-support/ettercap/ettercap_0.8.3.1.bb
@@ -32,6 +32,7 @@
     -DENABLE_GTK=OFF \
 "
 
+CFLAGS += "-D_GNU_SOURCE"
 # Replaces default encoding set (ISO-8859-1) with UTF-8 in ettercap
 # configuration file installed by the package.
 # It ensures that all characters are properly decoded and avoids
diff --git a/meta-openembedded/meta-networking/recipes-support/fping/fping/0001-fping-Initialize-msghdr-struct-in-a-portable-way.patch b/meta-openembedded/meta-networking/recipes-support/fping/fping/0001-fping-Initialize-msghdr-struct-in-a-portable-way.patch
new file mode 100644
index 0000000..2a5b3e6
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/fping/fping/0001-fping-Initialize-msghdr-struct-in-a-portable-way.patch
@@ -0,0 +1,42 @@
+From c3f476a763412be51b4df0e748af04d4150a2c71 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 15:41:51 -0700
+Subject: [PATCH] fping: Initialize msghdr struct in a portable way
+
+Initializing the structure assuming glibc layout results in
+compile errors on musl, therefore do partial intialization and then
+assigning the members individually.
+
+Upstream-status: Submitted [https://github.com/schweikert/fping/pull/263]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/fping.c | 16 +++++++---------
+ 1 file changed, 7 insertions(+), 9 deletions(-)
+
+diff --git a/src/fping.c b/src/fping.c
+index e26b216..81a61d9 100644
+--- a/src/fping.c
++++ b/src/fping.c
+@@ -1951,15 +1951,13 @@ int receive_packet(int64_t wait_time,
+         reply_buf,
+         reply_buf_len
+     };
+-    struct msghdr recv_msghdr = {
+-        reply_src_addr,
+-        reply_src_addr_len,
+-        &msg_iov,
+-        1,
+-        &msg_control,
+-        sizeof(msg_control),
+-        0
+-    };
++    struct msghdr recv_msghdr = {0};
++    recv_msghdr.msg_name = reply_src_addr;
++    recv_msghdr.msg_namelen = reply_src_addr_len;
++    recv_msghdr.msg_iov = &msg_iov;
++    recv_msghdr.msg_iovlen = 1;
++    recv_msghdr.msg_control = &msg_control;
++    recv_msghdr.msg_controllen = sizeof(msg_control);
+ #if HAVE_SO_TIMESTAMPNS
+     struct cmsghdr* cmsg;
+ #endif
diff --git a/meta-openembedded/meta-networking/recipes-support/fping/fping_5.1.bb b/meta-openembedded/meta-networking/recipes-support/fping/fping_5.1.bb
index f2dd897..0682eaf 100644
--- a/meta-openembedded/meta-networking/recipes-support/fping/fping_5.1.bb
+++ b/meta-openembedded/meta-networking/recipes-support/fping/fping_5.1.bb
@@ -14,7 +14,9 @@
 NO_GENERIC_LICENSE[fping] = "COPYING"
 LIC_FILES_CHKSUM = "file://COPYING;md5=c6170fbadddfcd74f011515291d96901"
 
-SRC_URI = "http://www.fping.org/dist/fping-${PV}.tar.gz"
+SRC_URI = "http://www.fping.org/dist/fping-${PV}.tar.gz \
+           file://0001-fping-Initialize-msghdr-struct-in-a-portable-way.patch \
+           "
 SRC_URI[sha256sum] = "1ee5268c063d76646af2b4426052e7d81a42b657e6a77d8e7d3d2e60fd7409fe"
 
 S = "${WORKDIR}/fping-${PV}"
diff --git a/meta-openembedded/meta-networking/recipes-support/libesmtp/libesmtp_1.1.0.bb b/meta-openembedded/meta-networking/recipes-support/libesmtp/libesmtp_1.1.0.bb
index 164c8c2..3d0764a 100644
--- a/meta-openembedded/meta-networking/recipes-support/libesmtp/libesmtp_1.1.0.bb
+++ b/meta-openembedded/meta-networking/recipes-support/libesmtp/libesmtp_1.1.0.bb
@@ -32,5 +32,9 @@
 
 CFLAGS += "-D_GNU_SOURCE"
 
+do_configure:prepend:libc-glibc() {
+    sed -i -e "s/conf.set('HAVE_WORKING_STRERROR_R', 0)/conf.set('HAVE_WORKING_STRERROR_R', 1)/g" ${S}/meson.build
+}
+
 FILES:${PN} = "${libdir}/lib*${SOLIBS} \
                ${libdir}/esmtp-plugins-6.2.0/*${SOLIBSDEV}"
diff --git a/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm/0001-configure-Check-for-symbol-from-libresolv-instead-of.patch b/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm/0001-configure-Check-for-symbol-from-libresolv-instead-of.patch
new file mode 100644
index 0000000..abae366
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm/0001-configure-Check-for-symbol-from-libresolv-instead-of.patch
@@ -0,0 +1,28 @@
+From a2a2e1b7a3f4f90e32912b5ba9b79e1a02275775 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 11:17:13 -0700
+Subject: [PATCH] configure: Check for symbol from libresolv instead of main
+
+This checks will fail with modern autoconf and compilers
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.in | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/configure.in
++++ b/configure.in
+@@ -45,12 +45,7 @@ AC_CHECK_HEADER(linux/atmsap.h, ,
+ )
+ 
+ dnl Check for libraries
+-dnl libresolv is required
+-AC_CHECK_LIB(resolv, main, ,
+-	AC_MSG_ERROR([*** Unable to find libresolv!!!])
+-)
+ 
+-dnl We don't want libresolv everywhere, just with libatm
+ LIBS=""
+ 
+ INCLUDES="-I\$(top_srcdir)/src/include"
diff --git a/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm/0001-include-string-h-from-memcpy-and-strcpy-function-pro.patch b/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm/0001-include-string-h-from-memcpy-and-strcpy-function-pro.patch
new file mode 100644
index 0000000..81c5efd
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm/0001-include-string-h-from-memcpy-and-strcpy-function-pro.patch
@@ -0,0 +1,87 @@
+From 5217cb7c829cf87771096c4ce41fd4648dca47cb Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 16:36:21 -0700
+Subject: [PATCH] include string,h from memcpy and strcpy function prototype
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/led/address.c  | 1 +
+ src/led/display.c  | 1 +
+ src/lib/unix.c     | 1 +
+ src/maint/hediag.c | 1 +
+ src/sigd/kernel.c  | 1 +
+ src/sigd/policy.c  | 1 +
+ 6 files changed, 6 insertions(+)
+
+diff --git a/src/led/address.c b/src/led/address.c
+index 574e881..b5c5fbb 100644
+--- a/src/led/address.c
++++ b/src/led/address.c
+@@ -31,6 +31,7 @@
+ #endif
+ 
+ #include <sys/ioctl.h>
++#include <string.h>
+ #include <unistd.h>
+ #include <errno.h>
+ 
+diff --git a/src/led/display.c b/src/led/display.c
+index d78a15d..b835e89 100644
+--- a/src/led/display.c
++++ b/src/led/display.c
+@@ -5,6 +5,7 @@
+ #if HAVE_CONFIG_H
+ #include <config.h>
+ #endif
++#include <string.h>
+ 
+ #include <atm.h>
+ #include <atmd.h>
+diff --git a/src/lib/unix.c b/src/lib/unix.c
+index 34aa465..d5bef54 100644
+--- a/src/lib/unix.c
++++ b/src/lib/unix.c
+@@ -8,6 +8,7 @@
+ 
+ #include <stdlib.h>
+ #include <stdio.h>
++#include <string.h>
+ #include <unistd.h>
+ #include <errno.h>
+ #include <sys/types.h>
+diff --git a/src/maint/hediag.c b/src/maint/hediag.c
+index 8a4312a..a4f792f 100644
+--- a/src/maint/hediag.c
++++ b/src/maint/hediag.c
+@@ -1,5 +1,6 @@
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <string.h>
+ #include <unistd.h>
+ #include <sys/ioctl.h>
+ #include <sys/types.h>
+diff --git a/src/sigd/kernel.c b/src/sigd/kernel.c
+index 9ee74b1..2491626 100644
+--- a/src/sigd/kernel.c
++++ b/src/sigd/kernel.c
+@@ -8,6 +8,7 @@
+ 
+ #include <stdlib.h>
+ #include <stdio.h>
++#include <string.h>
+ #include <errno.h>
+ #include <assert.h>
+ 
+diff --git a/src/sigd/policy.c b/src/sigd/policy.c
+index 2cfb42d..87223a7 100644
+--- a/src/sigd/policy.c
++++ b/src/sigd/policy.c
+@@ -6,6 +6,7 @@
+ #include <config.h>
+ #endif
+ 
++#include <string.h>
+ #include <atm.h>
+ #include <atmd.h>
+ 
diff --git a/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm_2.5.2.bb b/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm_2.5.2.bb
index bb79b78..86b9112 100644
--- a/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm_2.5.2.bb
+++ b/meta-openembedded/meta-networking/recipes-support/linux-atm/linux-atm_2.5.2.bb
@@ -14,7 +14,9 @@
            file://0001-IFNAMSIZ-is-defined-in-net-if.h.patch \
            file://0001-saaldump-atmdump-Include-linux-sockios.h-for-SIOCGST.patch \
            file://0001-make-Add-PREFIX-knob.patch \
-"
+           file://0001-include-string-h-from-memcpy-and-strcpy-function-pro.patch \
+           file://0001-configure-Check-for-symbol-from-libresolv-instead-of.patch \
+           "
 
 SRC_URI:append:libc-musl = " file://musl-no-on_exit.patch"
 
diff --git a/meta-openembedded/meta-networking/recipes-support/mctp/mctp_git.bb b/meta-openembedded/meta-networking/recipes-support/mctp/mctp_git.bb
index 602724d..516f3e2 100644
--- a/meta-openembedded/meta-networking/recipes-support/mctp/mctp_git.bb
+++ b/meta-openembedded/meta-networking/recipes-support/mctp/mctp_git.bb
@@ -7,7 +7,7 @@
 
 PV = "1.0+git${SRCPV}"
 
-SRCREV = "669740432af525c19a6a41cec777406fbbc24836"
+SRCREV = "ae3a9162d6c5df0fa50abc34fc951dfd0e3d23c5"
 
 SRC_URI = "git://github.com/CodeConstruct/mctp;branch=main;protocol=https"
 
diff --git a/meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-netlink_git.bb b/meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-netlink_1.2.0.bb
similarity index 100%
rename from meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-netlink_git.bb
rename to meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-netlink_1.2.0.bb
diff --git a/meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-tools.inc b/meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-tools.inc
index a8a435f..7afe1c5 100644
--- a/meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-tools.inc
+++ b/meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-tools.inc
@@ -5,5 +5,4 @@
 LIC_FILES_CHKSUM = "file://${WORKDIR}/git/COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263"
 
 SRC_URI = "git://github.com/wkz/mdio-tools.git;protocol=https;branch=master"
-SRCREV = "07cbff2d5e2de05037e5e7edd5044d678394c8d1"
-PV = "1.1.1"
+SRCREV = "ee47c32d958ae0dcb9900b3b06654a8c08001331"
diff --git a/meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-tools_git.bb b/meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-tools_1.2.0.bb
similarity index 100%
rename from meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-tools_git.bb
rename to meta-openembedded/meta-networking/recipes-support/mdio-tools/mdio-tools_1.2.0.bb
diff --git a/meta-openembedded/meta-networking/recipes-support/memcached/memcached/0001-Fix-function-protypes.patch b/meta-openembedded/meta-networking/recipes-support/memcached/memcached/0001-Fix-function-protypes.patch
new file mode 100644
index 0000000..15ef54f
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/memcached/memcached/0001-Fix-function-protypes.patch
@@ -0,0 +1,110 @@
+From 6021d3d60e64d9174f41515d2d962df9b5d7645e Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 17:15:28 -0700
+Subject: [PATCH] Fix function protypes
+
+clang-15+ has started diagnosing them as errors
+
+thread.c:925:18: error: a function declaration without a prototype is deprecated in all versions of C [-Werror,-Wstrict-prototypes]
+| void STATS_UNLOCK() {
+|                  ^
+|                   void
+
+Upstream-Status: Submitted [https://github.com/memcached/memcached/pull/928]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ assoc.c     | 4 ++--
+ memcached.c | 4 ++--
+ slabs.c     | 2 +-
+ testapp.c   | 2 +-
+ thread.c    | 4 ++--
+ 5 files changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/assoc.c b/assoc.c
+index bc68695..01063a9 100644
+--- a/assoc.c
++++ b/assoc.c
+@@ -261,7 +261,7 @@ static void *assoc_maintenance_thread(void *arg) {
+ 
+ static pthread_t maintenance_tid;
+ 
+-int start_assoc_maintenance_thread() {
++int start_assoc_maintenance_thread(void) {
+     int ret;
+     char *env = getenv("MEMCACHED_HASH_BULK_MOVE");
+     if (env != NULL) {
+@@ -279,7 +279,7 @@ int start_assoc_maintenance_thread() {
+     return 0;
+ }
+ 
+-void stop_assoc_maintenance_thread() {
++void stop_assoc_maintenance_thread(void) {
+     mutex_lock(&maintenance_lock);
+     do_run_maintenance_thread = 0;
+     pthread_cond_signal(&maintenance_cond);
+diff --git a/memcached.c b/memcached.c
+index 7871fe8..4d3b54a 100644
+--- a/memcached.c
++++ b/memcached.c
+@@ -84,7 +84,7 @@ static int try_read_command_udp(conn *c);
+ static enum try_read_result try_read_network(conn *c);
+ static enum try_read_result try_read_udp(conn *c);
+ 
+-static int start_conn_timeout_thread();
++static int start_conn_timeout_thread(void);
+ 
+ /* stats */
+ static void stats_init(void);
+@@ -374,7 +374,7 @@ static void *conn_timeout_thread(void *arg) {
+     return NULL;
+ }
+ 
+-static int start_conn_timeout_thread() {
++static int start_conn_timeout_thread(void) {
+     int ret;
+ 
+     if (settings.idle_timeout == 0)
+diff --git a/slabs.c b/slabs.c
+index 3c78d8a..0dadd35 100644
+--- a/slabs.c
++++ b/slabs.c
+@@ -638,7 +638,7 @@ static void *memory_allocate(size_t size) {
+ }
+ 
+ /* Must only be used if all pages are item_size_max */
+-static void memory_release() {
++static void memory_release(void) {
+     void *p = NULL;
+     if (mem_base != NULL)
+         return;
+diff --git a/testapp.c b/testapp.c
+index 5face54..387a847 100644
+--- a/testapp.c
++++ b/testapp.c
+@@ -80,7 +80,7 @@ static struct conn *con = NULL;
+ static bool allow_closed_read = false;
+ static bool enable_ssl = false;
+ 
+-static void close_conn() {
++static void close_conn(void) {
+     if (con == NULL) return;
+ #ifdef TLS
+     if (con->ssl) {
+diff --git a/thread.c b/thread.c
+index d5ed052..f5efdc3 100644
+--- a/thread.c
++++ b/thread.c
+@@ -918,11 +918,11 @@ enum store_item_type store_item(item *item, int comm, conn* c) {
+ 
+ /******************************* GLOBAL STATS ******************************/
+ 
+-void STATS_LOCK() {
++void STATS_LOCK(void) {
+     pthread_mutex_lock(&stats_lock);
+ }
+ 
+-void STATS_UNLOCK() {
++void STATS_UNLOCK(void) {
+     pthread_mutex_unlock(&stats_lock);
+ }
+ 
diff --git a/meta-openembedded/meta-networking/recipes-support/memcached/memcached_1.6.15.bb b/meta-openembedded/meta-networking/recipes-support/memcached/memcached_1.6.17.bb
similarity index 83%
rename from meta-openembedded/meta-networking/recipes-support/memcached/memcached_1.6.15.bb
rename to meta-openembedded/meta-networking/recipes-support/memcached/memcached_1.6.17.bb
index b28bfe7..270ad54 100644
--- a/meta-openembedded/meta-networking/recipes-support/memcached/memcached_1.6.15.bb
+++ b/meta-openembedded/meta-networking/recipes-support/memcached/memcached_1.6.17.bb
@@ -12,7 +12,7 @@
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=7e5ded7363d335e1bb18013ca08046ff"
 
-inherit autotools
+inherit autotools pkgconfig
 
 DEPENDS += "libevent"
 RDEPENDS:${PN} += "perl perl-module-posix perl-module-autoloader \
@@ -21,12 +21,16 @@
 
 SRC_URI = "http://www.memcached.org/files/${BP}.tar.gz \
            file://memcached-add-hugetlbfs-check.patch \
+           file://0001-Fix-function-protypes.patch \
            "
-SRC_URI[sha256sum] = "8d7abe3d649378edbba16f42ef1d66ca3f2ac075f2eb97145ce164388e6ed515"
+SRC_URI[sha256sum] = "2055e373613d8fc21529aff9f0adce3e23b9ce01ba0478d30e7941d9f2bd1224"
 
 # set the same COMPATIBLE_HOST as libhugetlbfs
 COMPATIBLE_HOST = "(i.86|x86_64|powerpc|powerpc64|aarch64|arm).*-linux*"
 
+# assoc.c:83:9: error: variable 'depth' set but not used [-Werror,-Wunused-but-set-variable]
+CFLAGS:append:toolchain-clang = " -Wno-error=unused-but-set-variable"
+
 python __anonymous () {
     endianness = d.getVar('SITEINFO_ENDIANNESS')
     if endianness == 'le':
diff --git a/meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt/0001-Add-configure-time-check-for-gettid-API.patch b/meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt/0001-Add-configure-time-check-for-gettid-API.patch
deleted file mode 100644
index 9484d0b..0000000
--- a/meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt/0001-Add-configure-time-check-for-gettid-API.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From 25d5daf30aa2dc451ba528712f668036d8506054 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sat, 27 Jul 2019 07:50:26 -0700
-Subject: [PATCH] Add configure time check for gettid API
-
-glibc 2.30 has added this syscalls wrapper
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- configure.ac  | 1 +
- src/log_msg.c | 2 ++
- 2 files changed, 3 insertions(+)
-
-diff --git a/configure.ac b/configure.ac
-index 9e32a4a..6d1067b 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -33,6 +33,7 @@ if test -n "$TIRPC_LIBS"; then
-   AC_DEFINE(HAVE_TIRPC, 1, [Define to 1 if TIRPC is available])
- fi
- 
-+AC_CHECK_FUNCS(gettid)
- dnl The difficult part: could we reconfigure NSS or must we use DNS ?
- AC_CHECK_FUNCS(__nss_configure_lookup)
- if eval "test \"`echo '$ac_cv_func___nss_configure_lookup'`\" != yes"; then
-diff --git a/src/log_msg.c b/src/log_msg.c
-index 49f0905..37da311 100644
---- a/src/log_msg.c
-+++ b/src/log_msg.c
-@@ -26,6 +26,7 @@
- #include "log_msg.h"
- 
- #include <sys/syscall.h>
-+#if !HAVE_GETTID
- #ifdef __NR_gettid
- static pid_t
- gettid (void)
-@@ -39,6 +40,7 @@ gettid (void)
-     return getpid ();
- }
- #endif
-+#endif
- 
- int debug_flag = 0;
- int logfile_flag = 0;
diff --git a/meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt_2.6.bb b/meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt_2.7.2.bb
similarity index 89%
rename from meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt_2.6.bb
rename to meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt_2.7.2.bb
index 2c5e2bd..a95062b 100644
--- a/meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt_2.6.bb
+++ b/meta-openembedded/meta-networking/recipes-support/nis/ypbind-mt_2.7.2.bb
@@ -29,13 +29,11 @@
 PROVIDES += "ypbind"
 
 SRC_URI = "https://github.com/thkukuk/ypbind-mt/releases/download/v${PV}/${BPN}-${PV}.tar.xz \
+           file://0001-dns_hosts-Fix-build-with-musl.patch \
            file://ypbind.init \
            file://ypbind.service \
-           file://0001-dns_hosts-Fix-build-with-musl.patch \
-           file://0001-Add-configure-time-check-for-gettid-API.patch \
            "
-SRC_URI[md5sum] = "7cf89641fdc128d0919207e4b7caaf1d"
-SRC_URI[sha256sum] = "0696c0263c4fd48a4ff2ce6c109f05f37aab0f71646d81cb22c7c28591bf80eb"
+SRC_URI[sha256sum] = "064f2f185673c5493df83f6400b799f3a359de56118b6ba37c4327111f2fcd8b"
 
 inherit systemd update-rc.d
 
@@ -57,6 +55,8 @@
     install -m 0644 ${WORKDIR}/ypbind.service ${D}${systemd_unitdir}/system
 }
 
+# uses glibc internal APIs e.g. _hostalias
+COMPATIBLE_HOST:libc-musl = "null"
 
 RPROVIDES:${PN} += "${PN}-systemd"
 RREPLACES:${PN} += "${PN}-systemd"
diff --git a/meta-openembedded/meta-networking/recipes-support/ntp/ntp/0001-sntp-Fix-types-in-check-for-pthread_detach.patch b/meta-openembedded/meta-networking/recipes-support/ntp/ntp/0001-sntp-Fix-types-in-check-for-pthread_detach.patch
new file mode 100644
index 0000000..ca15470
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/ntp/ntp/0001-sntp-Fix-types-in-check-for-pthread_detach.patch
@@ -0,0 +1,33 @@
+From 864f43ae09d18b1114d5c894e836698743e4e44c Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 6 Sep 2022 21:36:35 -0700
+Subject: [PATCH] sntp: Fix types in check for pthread_detach
+
+New compilers are stricter and flag assigning NULL to pthread_t as error
+therefore using a pthread_t variable constructed from -1
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ sntp/m4/openldap-thread-check.m4 | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/sntp/m4/openldap-thread-check.m4 b/sntp/m4/openldap-thread-check.m4
+index 7768a5c..b9e54ad 100644
+--- a/sntp/m4/openldap-thread-check.m4
++++ b/sntp/m4/openldap-thread-check.m4
+@@ -262,10 +262,7 @@ pthread_rwlock_t rwlock;
+ 				dnl save the flags
+ 				AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+ #include <pthread.h>
+-#ifndef NULL
+-#define NULL (void*)0
+-#endif
+-]], [[pthread_detach(NULL);]])],[ol_cv_func_pthread_detach=yes],[ol_cv_func_pthread_detach=no])
++]], [[pthread_detach((pthread_t)-1);]])],[ol_cv_func_pthread_detach=yes],[ol_cv_func_pthread_detach=no])
+ 			])
+ 
+ 			if test $ol_cv_func_pthread_detach = no ; then
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-networking/recipes-support/ntp/ntp_4.2.8p15.bb b/meta-openembedded/meta-networking/recipes-support/ntp/ntp_4.2.8p15.bb
index a30f720..2ae53dc 100644
--- a/meta-openembedded/meta-networking/recipes-support/ntp/ntp_4.2.8p15.bb
+++ b/meta-openembedded/meta-networking/recipes-support/ntp/ntp_4.2.8p15.bb
@@ -15,6 +15,7 @@
            file://reproducibility-fixed-path-to-posix-shell.patch \
            file://0001-libntp-Do-not-use-PTHREAD_STACK_MIN-on-glibc.patch \
            file://0001-test-Fix-build-with-new-compiler-defaults-to-fno-com.patch \
+           file://0001-sntp-Fix-types-in-check-for-pthread_detach.patch \
            file://ntpd \
            file://ntp.conf \
            file://ntpdate \
@@ -138,8 +139,9 @@
 # ntp originally includes tickadj. It's split off for inclusion in small firmware images on platforms
 # with wonky clocks (e.g. OpenSlug)
 RDEPENDS:${PN} = "${PN}-tickadj"
-# ntpd require libgcc for execution
+# ntpd & sntp require libgcc for execution due to phtread_cancel/pthread_exit calls
 RDEPENDS:${PN} += "libgcc"
+RDEPENDS:sntp += "libgcc"
 # Handle move from bin to utils package
 RPROVIDES:${PN}-utils = "${PN}-bin"
 RREPLACES:${PN}-utils = "${PN}-bin"
diff --git a/meta-openembedded/meta-networking/recipes-support/open-isns/open-isns_0.101.bb b/meta-openembedded/meta-networking/recipes-support/open-isns/open-isns_0.102.bb
similarity index 96%
rename from meta-openembedded/meta-networking/recipes-support/open-isns/open-isns_0.101.bb
rename to meta-openembedded/meta-networking/recipes-support/open-isns/open-isns_0.102.bb
index f2a084d..24c5209 100644
--- a/meta-openembedded/meta-networking/recipes-support/open-isns/open-isns_0.101.bb
+++ b/meta-openembedded/meta-networking/recipes-support/open-isns/open-isns_0.102.bb
@@ -17,7 +17,7 @@
            file://0001-isnsd.socket-use-run-instead-of-var-run.patch \
            "
 
-SRCREV = "0d86dc31fae2e2d77a082ccea5aba95426b40c3c"
+SRCREV = "c0e6d9fedc5a7041260de477fe1a8455fa074113"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-networking/recipes-support/rdma-core/rdma-core/0001-examples-Include-alloca.h-for-strdupa.patch b/meta-openembedded/meta-networking/recipes-support/rdma-core/rdma-core/0001-examples-Include-alloca.h-for-strdupa.patch
new file mode 100644
index 0000000..cb472d9
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/rdma-core/rdma-core/0001-examples-Include-alloca.h-for-strdupa.patch
@@ -0,0 +1,29 @@
+From a6c547e46bbadd2c08be9944a85308f6625263cb Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 23:15:44 -0700
+Subject: [PATCH] examples: Include alloca.h for strdupa
+
+musl defines strdupa via a macro which uses alloca() therefore include
+the header to get the prototype
+
+Upstream-Status: Submitted [https://github.com/linux-rdma/rdma-core/pull/1212]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ libibverbs/examples/asyncwatch.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/libibverbs/examples/asyncwatch.c b/libibverbs/examples/asyncwatch.c
+index 724796e58..842b94180 100644
+--- a/libibverbs/examples/asyncwatch.c
++++ b/libibverbs/examples/asyncwatch.c
+@@ -36,6 +36,7 @@
+ #include <endian.h>
+ #include <getopt.h>
+ #include <string.h>
++#include <alloca.h>
+ 
+ #include <util/compiler.h>
+ #include <infiniband/verbs.h>
+-- 
+2.37.2
+
diff --git a/meta-openembedded/meta-networking/recipes-support/rdma-core/rdma-core_42.0.bb b/meta-openembedded/meta-networking/recipes-support/rdma-core/rdma-core_42.0.bb
index e1123dc..86243a5 100644
--- a/meta-openembedded/meta-networking/recipes-support/rdma-core/rdma-core_42.0.bb
+++ b/meta-openembedded/meta-networking/recipes-support/rdma-core/rdma-core_42.0.bb
@@ -5,7 +5,9 @@
 DEPENDS = "libnl"
 RDEPENDS:${PN} = "bash perl"
 
-SRC_URI = "git://github.com/linux-rdma/rdma-core.git;branch=master;protocol=https"
+SRC_URI = "git://github.com/linux-rdma/rdma-core.git;branch=master;protocol=https \
+           file://0001-examples-Include-alloca.h-for-strdupa.patch \
+           "
 SRCREV = "196bad56ed060612e22674b668b5ec3d8659ade3"
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-networking/recipes-support/tcpreplay/tcpreplay/0001-libopts.m4-set-POSIX_SHELL-to-bin-sh.patch b/meta-openembedded/meta-networking/recipes-support/tcpreplay/tcpreplay/0001-libopts.m4-set-POSIX_SHELL-to-bin-sh.patch
new file mode 100644
index 0000000..448455f
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/tcpreplay/tcpreplay/0001-libopts.m4-set-POSIX_SHELL-to-bin-sh.patch
@@ -0,0 +1,45 @@
+From 769e96b60f631e8c208fd7f72900d0bb17760f88 Mon Sep 17 00:00:00 2001
+From: Yi Zhao <yi.zhao@windriver.com>
+Date: Tue, 30 Aug 2022 09:54:11 +0800
+Subject: [PATCH] libopts.m4: set POSIX_SHELL to /bin/sh
+
+POSIX_SHELL is specified a host tool path as it searches path on build
+host using `which` when configure. Set it to a fixed path '/bin/sh'.
+
+Upstream-Status: Inappropriate [embedded specific]
+
+Signed-off-by: Yi Zhao <yi.zhao@windriver.com>
+---
+ m4/libopts.m4 | 16 +---------------
+ 1 file changed, 1 insertion(+), 15 deletions(-)
+
+diff --git a/m4/libopts.m4 b/m4/libopts.m4
+index cfbd477..c8047eb 100644
+--- a/m4/libopts.m4
++++ b/m4/libopts.m4
+@@ -111,21 +111,7 @@ AC_DEFUN([INVOKE_LIBOPTS_MACROS_FIRST],[
+   AC_CHECK_FUNCS([mmap canonicalize_file_name snprintf strdup strchr \
+                  strrchr strsignal fchmod fstat chmod])
+   AC_PROG_SED
+-  [while :
+-  do
+-      POSIX_SHELL=`which bash`
+-      test -x "$POSIX_SHELL" && break
+-      POSIX_SHELL=`which dash`
+-      test -x "$POSIX_SHELL" && break
+-      POSIX_SHELL=/usr/xpg4/bin/sh
+-      test -x "$POSIX_SHELL" && break
+-      POSIX_SHELL=`/bin/sh -c '
+-          exec 2>/dev/null
+-          if ! true ; then exit 1 ; fi
+-          echo /bin/sh'`
+-      test -x "$POSIX_SHELL" && break
+-      ]AC_MSG_ERROR([cannot locate a working POSIX shell])[
+-  done]
++  POSIX_SHELL='/bin/sh'
+   AC_DEFINE_UNQUOTED([POSIX_SHELL], ["${POSIX_SHELL}"],
+            [define to a working POSIX compliant shell])
+   AC_SUBST([POSIX_SHELL])
+-- 
+2.25.1
+
diff --git a/meta-openembedded/meta-networking/recipes-support/tcpreplay/tcpreplay_4.4.2.bb b/meta-openembedded/meta-networking/recipes-support/tcpreplay/tcpreplay_4.4.2.bb
index 165a0e7..7d2a6a1 100644
--- a/meta-openembedded/meta-networking/recipes-support/tcpreplay/tcpreplay_4.4.2.bb
+++ b/meta-openembedded/meta-networking/recipes-support/tcpreplay/tcpreplay_4.4.2.bb
@@ -7,7 +7,9 @@
 LICENSE = "GPL-3.0-only"
 LIC_FILES_CHKSUM = "file://docs/LICENSE;md5=10f0474a2f0e5dccfca20f69d6598ad8"
 
-SRC_URI = "https://github.com/appneta/tcpreplay/releases/download/v${PV}/tcpreplay-${PV}.tar.gz"
+SRC_URI = "https://github.com/appneta/tcpreplay/releases/download/v${PV}/tcpreplay-${PV}.tar.gz \
+           file://0001-libopts.m4-set-POSIX_SHELL-to-bin-sh.patch \
+          "
 
 SRC_URI[sha256sum] = "5b272cd83b67d6288a234ea15f89ecd93b4fadda65eddc44e7b5fcb2f395b615"
 
@@ -19,3 +21,6 @@
 
 inherit siteinfo autotools-brokensep
 
+do_install:append() {
+    sed -i -e 's:${RECIPE_SYSROOT}::g' ${S}/src/defines.h
+}
diff --git a/meta-openembedded/meta-networking/recipes-support/unbound/unbound_1.16.2.bb b/meta-openembedded/meta-networking/recipes-support/unbound/unbound_1.16.3.bb
similarity index 96%
rename from meta-openembedded/meta-networking/recipes-support/unbound/unbound_1.16.2.bb
rename to meta-openembedded/meta-networking/recipes-support/unbound/unbound_1.16.3.bb
index 63036f6..5e40d8c 100644
--- a/meta-openembedded/meta-networking/recipes-support/unbound/unbound_1.16.2.bb
+++ b/meta-openembedded/meta-networking/recipes-support/unbound/unbound_1.16.3.bb
@@ -12,7 +12,7 @@
 SRC_URI = "git://github.com/NLnetLabs/unbound.git;protocol=http;branch=master;protocol=https \
 	file://0001-contrib-add-yocto-compatible-init-script.patch \
 "
-SRCREV = "cbed768b8ff9bfcf11089a5f1699b7e5707f1ea5"
+SRCREV = "137719522a8ea5b380fbb6206d2466f402f5b554"
 
 inherit autotools pkgconfig systemd update-rc.d
 
diff --git a/meta-openembedded/meta-networking/recipes-support/wireshark/files/CVE-2022-3190.patch b/meta-openembedded/meta-networking/recipes-support/wireshark/files/CVE-2022-3190.patch
new file mode 100644
index 0000000..0b98770
--- /dev/null
+++ b/meta-openembedded/meta-networking/recipes-support/wireshark/files/CVE-2022-3190.patch
@@ -0,0 +1,145 @@
+From 4585d515b962f3b3a5e81caa64e13e8d9ed2e431 Mon Sep 17 00:00:00 2001
+From: Hitendra Prajapati <hprajapati@mvista.com>
+Date: Mon, 26 Sep 2022 12:47:00 +0530
+Subject: [PATCH] CVE-2022-3190
+
+Upstream-Status: Backport [https://gitlab.com/wireshark/wireshark/-/commit/67326401a595fffbc67eeed48eb6c55d66a55f67]
+CVE : CVE-2022-3190
+Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com>
+---
+ epan/dissectors/packet-f5ethtrailer.c | 108 +++++++++++++-------------
+ 1 file changed, 56 insertions(+), 52 deletions(-)
+
+diff --git a/epan/dissectors/packet-f5ethtrailer.c b/epan/dissectors/packet-f5ethtrailer.c
+index ed77dfd..b15b0d4 100644
+--- a/epan/dissectors/packet-f5ethtrailer.c
++++ b/epan/dissectors/packet-f5ethtrailer.c
+@@ -2741,69 +2741,73 @@ dissect_dpt_trailer(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *d
+ static gint
+ dissect_old_trailer(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree, void *data)
+ {
+-    proto_tree *type_tree   = NULL;
+-    proto_item *ti          = NULL;
+     guint offset            = 0;
+-    guint processed         = 0;
+-    f5eth_tap_data_t *tdata = (f5eth_tap_data_t *)data;
+-    guint8 type;
+-    guint8 len;
+-    guint8 ver;
+ 
+     /* While we still have data in the trailer.  For old format trailers, this needs
+      * type, length, version (3 bytes) and for new format trailers, the magic header (4 bytes).
+      * All old format trailers are at least 4 bytes long, so just check for length of magic.
+      */
+-    while (tvb_reported_length_remaining(tvb, offset)) {
+-        type = tvb_get_guint8(tvb, offset);
+-        len = tvb_get_guint8(tvb, offset + F5_OFF_LENGTH) + F5_OFF_VERSION;
+-        ver = tvb_get_guint8(tvb, offset + F5_OFF_VERSION);
+-
+-        if (len <= tvb_reported_length_remaining(tvb, offset) && type >= F5TYPE_LOW
+-            && type <= F5TYPE_HIGH && len >= F5_MIN_SANE && len <= F5_MAX_SANE
+-            && ver <= F5TRAILER_VER_MAX) {
+-            /* Parse out the specified trailer. */
+-            switch (type) {
+-            case F5TYPE_LOW:
+-                ti        = proto_tree_add_item(tree, hf_low_id, tvb, offset, len, ENC_NA);
+-                type_tree = proto_item_add_subtree(ti, ett_f5ethtrailer_low);
+-
+-                processed = dissect_low_trailer(tvb, pinfo, type_tree, offset, len, ver, tdata);
+-                if (processed > 0) {
+-                    tdata->trailer_len += processed;
+-                    tdata->noise_low = 1;
+-                }
+-                break;
+-            case F5TYPE_MED:
+-                ti        = proto_tree_add_item(tree, hf_med_id, tvb, offset, len, ENC_NA);
+-                type_tree = proto_item_add_subtree(ti, ett_f5ethtrailer_med);
+-
+-                processed = dissect_med_trailer(tvb, pinfo, type_tree, offset, len, ver, tdata);
+-                if (processed > 0) {
+-                    tdata->trailer_len += processed;
+-                    tdata->noise_med = 1;
+-                }
+-                break;
+-            case F5TYPE_HIGH:
+-                ti        = proto_tree_add_item(tree, hf_high_id, tvb, offset, len, ENC_NA);
+-                type_tree = proto_item_add_subtree(ti, ett_f5ethtrailer_high);
+-
+-                processed =
+-                    dissect_high_trailer(tvb, pinfo, type_tree, offset, len, ver, tdata);
+-                if (processed > 0) {
+-                    tdata->trailer_len += processed;
+-                    tdata->noise_high = 1;
+-                }
+-                break;
++    while (tvb_reported_length_remaining(tvb, offset) >= F5_MIN_SANE) {
++        /* length field does not include the type and length bytes.  Add them back in */
++        guint8 len = tvb_get_guint8(tvb, offset + F5_OFF_LENGTH) + F5_OFF_VERSION;
++        if (len > tvb_reported_length_remaining(tvb, offset)
++            || len < F5_MIN_SANE || len > F5_MAX_SANE) {
++            /* Invalid length - either a malformed trailer, corrupt packet, or not f5ethtrailer */
++            return offset;
++        }
++        guint8 type = tvb_get_guint8(tvb, offset);
++        guint8 ver = tvb_get_guint8(tvb, offset + F5_OFF_VERSION);
++
++        /* Parse out the specified trailer. */
++        proto_tree *type_tree   = NULL;
++        proto_item *ti          = NULL;
++        f5eth_tap_data_t *tdata = (f5eth_tap_data_t *)data;
++        guint processed = 0;
++
++        switch (type) {
++        case F5TYPE_LOW:
++            ti        = proto_tree_add_item(tree, hf_low_id, tvb, offset, len, ENC_NA);
++            type_tree = proto_item_add_subtree(ti, ett_f5ethtrailer_low);
++
++            processed = dissect_low_trailer(tvb, pinfo, type_tree, offset, len, ver, tdata);
++            if (processed > 0) {
++                tdata->trailer_len += processed;
++                tdata->noise_low = 1;
+             }
+-            if (processed == 0) {
+-                proto_item_set_len(ti, 1);
+-                return offset;
++            break;
++        case F5TYPE_MED:
++            ti        = proto_tree_add_item(tree, hf_med_id, tvb, offset, len, ENC_NA);
++            type_tree = proto_item_add_subtree(ti, ett_f5ethtrailer_med);
++
++            processed = dissect_med_trailer(tvb, pinfo, type_tree, offset, len, ver, tdata);
++            if (processed > 0) {
++                tdata->trailer_len += processed;
++                tdata->noise_med = 1;
++            }
++            break;
++        case F5TYPE_HIGH:
++            ti        = proto_tree_add_item(tree, hf_high_id, tvb, offset, len, ENC_NA);
++            type_tree = proto_item_add_subtree(ti, ett_f5ethtrailer_high);
++
++            processed =
++                dissect_high_trailer(tvb, pinfo, type_tree, offset, len, ver, tdata);
++            if (processed > 0) {
++                tdata->trailer_len += processed;
++                tdata->noise_high = 1;
+             }
++            break;
++        default:
++            /* Unknown type - malformed trailer, corrupt packet, or not f5ethtrailer - bali out*/
++            return offset;
++        }
++        if (processed == 0) {
++            /* couldn't process trailer - bali out */
++            proto_item_set_len(ti, 1);
++            return offset;
+         }
+         offset += processed;
+     }
+-return offset;
++    return offset;
+ } /* dissect_old_trailer() */
+ 
+ /*---------------------------------------------------------------------------*/
+-- 
+2.25.1
+
diff --git a/meta-openembedded/meta-networking/recipes-support/wireshark/wireshark_3.4.12.bb b/meta-openembedded/meta-networking/recipes-support/wireshark/wireshark_3.4.12.bb
index 38fdbce..1a4aedc 100644
--- a/meta-openembedded/meta-networking/recipes-support/wireshark/wireshark_3.4.12.bb
+++ b/meta-openembedded/meta-networking/recipes-support/wireshark/wireshark_3.4.12.bb
@@ -15,6 +15,7 @@
     file://0002-flex-Remove-line-directives.patch \
     file://0003-bison-Remove-line-directives.patch \
     file://0004-lemon-Remove-line-directives.patch \
+    file://CVE-2022-3190.patch \
 "
 
 UPSTREAM_CHECK_URI = "https://1.as.dl.wireshark.org/src"
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/lcdproc/lcdproc/0001-Fix-compilation-with-GCC-10.x.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/lcdproc/lcdproc/0001-Fix-compilation-with-GCC-10.x.patch
index 35634dd..cd8a69c 100644
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/lcdproc/lcdproc/0001-Fix-compilation-with-GCC-10.x.patch
+++ b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/lcdproc/lcdproc/0001-Fix-compilation-with-GCC-10.x.patch
@@ -16,8 +16,6 @@
  clients/lcdproc/iface.h | 2 +-
  2 files changed, 2 insertions(+), 1 deletion(-)
 
-diff --git a/clients/lcdproc/iface.c b/clients/lcdproc/iface.c
-index 40e50cb7..1ac355bd 100644
 --- a/clients/lcdproc/iface.c
 +++ b/clients/lcdproc/iface.c
 @@ -32,6 +32,7 @@
@@ -28,8 +26,6 @@
  
  static int iface_count = 0;	/* number of interfaces */
  static char unit_label[10] = "B";	/* default unit label is Bytes */
-diff --git a/clients/lcdproc/iface.h b/clients/lcdproc/iface.h
-index cc6dbaaf..c1bd6b5b 100644
 --- a/clients/lcdproc/iface.h
 +++ b/clients/lcdproc/iface.h
 @@ -18,7 +18,7 @@
@@ -41,6 +37,3 @@
  
  /** Update screen content */
  int iface_screen(int rep, int display, int *flags_ptr);
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/lcdproc/lcdproc/0003-Fix-non-x86-platforms-on-musl.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/lcdproc/lcdproc/0003-Fix-non-x86-platforms-on-musl.patch
index e39e9bd..eed9cee 100644
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/lcdproc/lcdproc/0003-Fix-non-x86-platforms-on-musl.patch
+++ b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/lcdproc/lcdproc/0003-Fix-non-x86-platforms-on-musl.patch
@@ -17,19 +17,33 @@
  server/drivers/port.h | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
-diff --git a/server/drivers/port.h b/server/drivers/port.h
-index c584cd4e..bde235b3 100644
 --- a/server/drivers/port.h
 +++ b/server/drivers/port.h
-@@ -94,7 +94,7 @@ static inline int port_deny_multiple(unsigned short port, unsigned short count);
+@@ -94,7 +94,7 @@ static inline int port_deny_multiple(uns
  /*  ---------------------------- Linux ------------------------------------ */
  /*  Use ioperm, inb and outb in <sys/io.h> (Linux) */
  /*  And iopl for higher addresses of PCI LPT cards */
 -#if defined HAVE_IOPERM
-+#if defined(__GLIBC__) || (defined(__x86__) || defined(__x86_64__))
++#if HAVE_INB
  
  /* Glibc2 and Glibc1 */
  # ifdef HAVE_SYS_IO_H
--- 
-2.24.1
-
+@@ -333,7 +333,7 @@ static inline int port_deny_multiple (un
+ 	return i386_set_ioperm(port, count, 0);
+ }
+ 
+-#else
++#elif defined(__x86__) && defined(__x86_64__)
+ 
+ /*  ------------------------- Everything else ----------------------------- */
+ /*  Last chance! Use /dev/io and i386 ASM code (BSD4.3 ?) */
+--- a/configure.ac
++++ b/configure.ac
+@@ -144,6 +144,7 @@ AC_CHECK_LIB(i386, i386_get_ioperm,
+ 		[])]
+ )
+ 
++AC_CHECK_FUNCS(inb)
+ AC_CHECK_FUNCS(iopl)
+ AC_CHECK_FUNCS(ioperm)
+ AC_CHECK_HEADERS(sys/io.h)
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0001-rust.configure-Skip-all-target-manipulations.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0001-rust.configure-Skip-all-target-manipulations.patch
deleted file mode 100644
index 453174e..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0001-rust.configure-Skip-all-target-manipulations.patch
+++ /dev/null
@@ -1,66 +0,0 @@
-From b75661fbddd00ba9a43680c35b8c08aad8807d6b Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
-Date: Sun, 31 Oct 2021 16:49:55 +0100
-Subject: [PATCH] rust.configure: Skip all target manipulations
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Mozjs/rust targets are different from OE-rust targets. Use targets reported
-as is.
-
-Upstream-Status: Inappropriate [OE specific]
-
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
----
- build/moz.configure/rust.configure | 16 +---------------
- 1 file changed, 1 insertion(+), 15 deletions(-)
-
-diff --git a/build/moz.configure/rust.configure b/build/moz.configure/rust.configure
-index e5122d6..9f3cc91 100644
---- a/build/moz.configure/rust.configure
-+++ b/build/moz.configure/rust.configure
-@@ -81,9 +81,6 @@ def unwrap_rustup(prog, name):
- 
-     return unwrap
- 
--rustc = unwrap_rustup(rustc, 'rustc')
--cargo = unwrap_rustup(cargo, 'cargo')
--
- 
- set_config('CARGO', cargo)
- set_config('RUSTC', rustc)
-@@ -239,6 +236,7 @@ def rust_triple_alias(host_or_target, host_or_target_c_compiler):
-     @imports(_from='textwrap', _import='dedent')
-     def rust_target(rustc, host_or_target, compiler_info,
-                     rust_supported_targets, arm_target):
-+        return host_or_target.alias
-         # Rust's --target options are similar to, but not exactly the same
-         # as, the autoconf-derived targets we use.  An example would be that
-         # Rust uses distinct target triples for targetting the GNU C++ ABI
-@@ -401,22 +399,10 @@ def rust_triple_alias(host_or_target, host_or_target_c_compiler):
- 
-     return rust_target
- 
--
- rust_target_triple = rust_triple_alias(target, c_compiler)
- rust_host_triple = rust_triple_alias(host, host_c_compiler)
- 
- 
--@depends(host, rust_host_triple, rustc_info.host)
--def validate_rust_host_triple(host, rust_host, rustc_host):
--    if rust_host != rustc_host:
--        if host.alias == rust_host:
--            configure_host = host.alias
--        else:
--            configure_host = '{}/{}'.format(host.alias, rust_host)
--        die("The rust compiler host ({}) is not suitable for the configure host ({})."
--            .format(rustc_host, configure_host))
--
--
- set_config('RUST_TARGET', rust_target_triple)
- set_config('RUST_HOST_TARGET', rust_host_triple)
- 
--- 
-2.31.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0002-build-do-not-use-autoconf-s-config.sub-to-canonicali.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0002-build-do-not-use-autoconf-s-config.sub-to-canonicali.patch
deleted file mode 100644
index 21ad82e..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0002-build-do-not-use-autoconf-s-config.sub-to-canonicali.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From e5b95b3918588e2930c9af7ba304c57e871b2d55 Mon Sep 17 00:00:00 2001
-From: Alexander Kanavin <alex@linutronix.de>
-Date: Thu, 7 Oct 2021 12:44:18 +0200
-Subject: [PATCH] build: do not use autoconf's config.sub to 'canonicalize'
- names
-
-The outcome is that processed names no longer match our custom rust
-target definitions, and the build fails.
-
-Upstream-Status: Inappropriate [oespecific]
-
-Signed-off-by: Alexander Kanavin <alex@linutronix.de>
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
----
- build/moz.configure/init.configure | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/build/moz.configure/init.configure b/build/moz.configure/init.configure
-index b887153..0a6a33c 100644
---- a/build/moz.configure/init.configure
-+++ b/build/moz.configure/init.configure
-@@ -808,7 +808,7 @@ def help_host_target(help, host, target):
- def config_sub(shell, triplet):
-     config_sub = os.path.join(os.path.dirname(__file__), '..',
-                               'autoconf', 'config.sub')
--    return check_cmd_output(shell, config_sub, triplet).strip()
-+    return triplet
- 
- 
- @depends('--host', shell)
--- 
-2.31.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0003-Do-not-check-binaries-after-build.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0003-Do-not-check-binaries-after-build.patch
deleted file mode 100644
index a0f37f5..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0003-Do-not-check-binaries-after-build.patch
+++ /dev/null
@@ -1,55 +0,0 @@
-From 1a47eac590f57c765033c7797b0759dc314f2128 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
-Date: Mon, 1 Nov 2021 22:52:57 +0100
-Subject: [PATCH] Do not check binaries after build
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-* It buys us a dependency hard to fulfill in different layer setups
-* Mozjs-91 does not perform these checks when setting --enable-project=js. Here
-  for old configuration style --enable-project changes nothing and build wants
-  to check binaries created.
-
-So omit checks by not searching for llvm_objdump and making check_binary.py a
-stub to prevent errors by using unset LLVM_OBJDUMP.
-
-Upstream-Status: Inappropriate [oe specific]
-
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
----
- moz.configure                                   | 7 -------
- python/mozbuild/mozbuild/action/check_binary.py | 2 +-
- 2 files changed, 1 insertion(+), 8 deletions(-)
-
-diff --git a/moz.configure b/moz.configure
-index 9b0e784..41e3e4d 100755
---- a/moz.configure
-+++ b/moz.configure
-@@ -648,13 +648,6 @@ def llvm_objdump(host_c_compiler, c_compiler, bindgen_config_paths):
-     return (llvm_objdump,)
- 
- 
--llvm_objdump = check_prog('LLVM_OBJDUMP', llvm_objdump, what='llvm-objdump',
--                          when='--enable-compile-environment',
--                          paths=toolchain_search_path)
--
--add_old_configure_assignment('LLVM_OBJDUMP', llvm_objdump)
--
--
- js_option('--enable-dtrace', help='Build with dtrace support')
- 
- dtrace = check_header('sys/sdt.h', when='--enable-dtrace',
-diff --git a/python/mozbuild/mozbuild/action/check_binary.py b/python/mozbuild/mozbuild/action/check_binary.py
-index 57ccfa6..bd2c167 100644
---- a/python/mozbuild/mozbuild/action/check_binary.py
-+++ b/python/mozbuild/mozbuild/action/check_binary.py
-@@ -366,4 +366,4 @@ def main(args):
- 
- 
- if __name__ == '__main__':
--    sys.exit(main(sys.argv[1:]))
-+    sys.exit(0)
--- 
-2.31.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0004-Cargo.toml-do-not-abort-on-panic.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0004-Cargo.toml-do-not-abort-on-panic.patch
deleted file mode 100644
index 665eace..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0004-Cargo.toml-do-not-abort-on-panic.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 9e37248870b2b955293754933c789ca00bca06ef Mon Sep 17 00:00:00 2001
-From: Alexander Kanavin <alex@linutronix.de>
-Date: Fri, 1 Oct 2021 13:00:24 +0200
-Subject: [PATCH] Cargo.toml: do not abort on panic
-
-OE's rust is configured to unwind, and this setting clashes with it/
-
-Upstream-Status: Inappropriate [OE specific]
-
-Signed-off-by: Alexander Kanavin <alex@linutronix.de>
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
----
- Cargo.toml | 2 --
- 1 file changed, 2 deletions(-)
-
-diff --git a/Cargo.toml b/Cargo.toml
-index 897daad41b..505454263e 100644
---- a/Cargo.toml
-+++ b/Cargo.toml
-@@ -56,13 +56,11 @@ opt-level = 1
- rpath = false
- lto = false
- debug-assertions = true
--panic = "abort"
- 
- [profile.release]
- opt-level = 2
- rpath = false
- debug-assertions = false
--panic = "abort"
- 
- [patch.crates-io]
- libudev-sys = { path = "dom/webauthn/libudev-sys" }
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0005-Fixup-compatibility-of-mozbuild-with-Python-3.10.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0005-Fixup-compatibility-of-mozbuild-with-Python-3.10.patch
deleted file mode 100644
index d069d00..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0005-Fixup-compatibility-of-mozbuild-with-Python-3.10.patch
+++ /dev/null
@@ -1,304 +0,0 @@
-From a88d0c8e27b48344942187c2611bb121bde9332d Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Franti=C5=A1ek=20Zatloukal?= <fzatlouk@redhat.com>
-Date: Tue, 13 Jul 2021 11:46:20 +0200
-Subject: [PATCH] Fixup compatibility of mozbuild with Python 3.10
-
-Stolen from [1]
-
-[1] https://src.fedoraproject.org/rpms/mozjs78/raw/rawhide/f/Fixup-compatibility-of-mozbuild-with-Python-3.10.patch
-
-Upstream-Status: Pending
-
----
- python/mach/mach/config.py                                    | 4 ++--
- python/mach/mach/decorators.py                                | 2 +-
- python/mozbuild/mozbuild/backend/configenvironment.py         | 3 ++-
- python/mozbuild/mozbuild/makeutil.py                          | 2 +-
- python/mozbuild/mozbuild/util.py                              | 2 +-
- testing/marionette/client/marionette_driver/wait.py           | 2 +-
- testing/mozbase/manifestparser/manifestparser/filters.py      | 3 ++-
- testing/mozbase/versioninfo.py                                | 2 +-
- testing/web-platform/tests/tools/manifest/vcs.py              | 2 +-
- .../web-platform/tests/tools/third_party/h2/h2/settings.py    | 2 +-
- .../tests/tools/third_party/html5lib/html5lib/_trie/_base.py  | 2 +-
- .../tools/third_party/html5lib/html5lib/treebuilders/dom.py   | 2 +-
- .../tests/tools/third_party/hyper/hyper/common/headers.py     | 2 +-
- .../tests/tools/third_party/hyper/hyper/h2/settings.py        | 2 +-
- .../tests/tools/third_party/hyper/hyper/http11/connection.py  | 4 ++--
- .../third_party/hyper/hyper/packages/hyperframe/flags.py      | 2 +-
- .../tests/tools/third_party/hyperframe/hyperframe/flags.py    | 2 +-
- testing/web-platform/tests/tools/wptserve/wptserve/config.py  | 3 ++-
- testing/web-platform/tests/webdriver/tests/support/sync.py    | 2 +-
- 19 files changed, 24 insertions(+), 21 deletions(-)
-
-diff --git a/python/mach/mach/config.py b/python/mach/mach/config.py
-index 7210eca82..edb4d2e93 100644
---- a/python/mach/mach/config.py
-+++ b/python/mach/mach/config.py
-@@ -144,7 +144,7 @@ def reraise_attribute_error(func):
-     return _
- 
- 
--class ConfigSettings(collections.Mapping):
-+class ConfigSettings(collections.abc.Mapping):
-     """Interface for configuration settings.
- 
-     This is the main interface to the configuration.
-@@ -190,7 +190,7 @@ class ConfigSettings(collections.Mapping):
-     will result in exceptions being raised.
-     """
- 
--    class ConfigSection(collections.MutableMapping, object):
-+    class ConfigSection(collections.abc.MutableMapping, object):
-         """Represents an individual config section."""
-         def __init__(self, config, name, settings):
-             object.__setattr__(self, '_config', config)
-diff --git a/python/mach/mach/decorators.py b/python/mach/mach/decorators.py
-index 27f7f34a6..5f63271a3 100644
---- a/python/mach/mach/decorators.py
-+++ b/python/mach/mach/decorators.py
-@@ -140,7 +140,7 @@ def CommandProvider(cls):
-               'Conditions argument must take a list ' + \
-               'of functions. Found %s instead.'
- 
--        if not isinstance(command.conditions, collections.Iterable):
-+        if not isinstance(command.conditions, collections.abc.Iterable):
-             msg = msg % (command.name, type(command.conditions))
-             raise MachError(msg)
- 
-diff --git a/python/mozbuild/mozbuild/backend/configenvironment.py b/python/mozbuild/mozbuild/backend/configenvironment.py
-index 20d1a9fa6..8747958bd 100644
---- a/python/mozbuild/mozbuild/backend/configenvironment.py
-+++ b/python/mozbuild/mozbuild/backend/configenvironment.py
-@@ -9,7 +9,8 @@ import six
- import sys
- import json
- 
--from collections import Iterable, OrderedDict
-+from collections import OrderedDict
-+from collections.abc import Iterable
- from types import ModuleType
- 
- import mozpack.path as mozpath
-diff --git a/python/mozbuild/mozbuild/makeutil.py b/python/mozbuild/mozbuild/makeutil.py
-index 4da1a3b26..4ce56848c 100644
---- a/python/mozbuild/mozbuild/makeutil.py
-+++ b/python/mozbuild/mozbuild/makeutil.py
-@@ -7,7 +7,7 @@ from __future__ import absolute_import, print_function, unicode_literals
- import os
- import re
- import six
--from collections import Iterable
-+from collections.abc import Iterable
- 
- 
- class Makefile(object):
-diff --git a/python/mozbuild/mozbuild/util.py b/python/mozbuild/mozbuild/util.py
-index 044cf645c..98ed3ef52 100644
---- a/python/mozbuild/mozbuild/util.py
-+++ b/python/mozbuild/mozbuild/util.py
-@@ -782,7 +782,7 @@ class HierarchicalStringList(object):
-         self._strings = StrictOrderingOnAppendList()
-         self._children = {}
- 
--    class StringListAdaptor(collections.Sequence):
-+    class StringListAdaptor(collections.abc.Sequence):
-         def __init__(self, hsl):
-             self._hsl = hsl
- 
-diff --git a/testing/marionette/client/marionette_driver/wait.py b/testing/marionette/client/marionette_driver/wait.py
-index eeaa1e23d..c147f463f 100644
---- a/testing/marionette/client/marionette_driver/wait.py
-+++ b/testing/marionette/client/marionette_driver/wait.py
-@@ -82,7 +82,7 @@ class Wait(object):
- 
-         exceptions = []
-         if ignored_exceptions is not None:
--            if isinstance(ignored_exceptions, collections.Iterable):
-+            if isinstance(ignored_exceptions, collections.abc.Iterable):
-                 exceptions.extend(iter(ignored_exceptions))
-             else:
-                 exceptions.append(ignored_exceptions)
-diff --git a/testing/mozbase/manifestparser/manifestparser/filters.py b/testing/mozbase/manifestparser/manifestparser/filters.py
-index 287ee033b..b1d608003 100644
---- a/testing/mozbase/manifestparser/manifestparser/filters.py
-+++ b/testing/mozbase/manifestparser/manifestparser/filters.py
-@@ -12,7 +12,8 @@ from __future__ import absolute_import
- 
- import itertools
- import os
--from collections import defaultdict, MutableSequence
-+from collections import defaultdict
-+from collections.abc import MutableSequence
- 
- import six
- from six import string_types
-diff --git a/testing/mozbase/versioninfo.py b/testing/mozbase/versioninfo.py
-index 91d1a0473..8c1680069 100755
---- a/testing/mozbase/versioninfo.py
-+++ b/testing/mozbase/versioninfo.py
-@@ -11,7 +11,7 @@ from commit messages.
- 
- from __future__ import absolute_import, print_function
- 
--from collections import Iterable
-+from collections.abc import Iterable
- from distutils.version import StrictVersion
- import argparse
- import os
-diff --git a/testing/web-platform/tests/tools/manifest/vcs.py b/testing/web-platform/tests/tools/manifest/vcs.py
-index 7c0feeb81..05ee19c7c 100644
---- a/testing/web-platform/tests/tools/manifest/vcs.py
-+++ b/testing/web-platform/tests/tools/manifest/vcs.py
-@@ -3,7 +3,7 @@ import json
- import os
- import stat
- from collections import deque
--from collections import MutableMapping
-+from collections.abc import MutableMapping
- 
- from six import with_metaclass, PY2
- 
-diff --git a/testing/web-platform/tests/tools/third_party/h2/h2/settings.py b/testing/web-platform/tests/tools/third_party/h2/h2/settings.py
-index 3da720329..e097630e9 100644
---- a/testing/web-platform/tests/tools/third_party/h2/h2/settings.py
-+++ b/testing/web-platform/tests/tools/third_party/h2/h2/settings.py
-@@ -88,7 +88,7 @@ class ChangedSetting:
-         )
- 
- 
--class Settings(collections.MutableMapping):
-+class Settings(collections.abc.MutableMapping):
-     """
-     An object that encapsulates HTTP/2 settings state.
- 
-diff --git a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/_base.py b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/_base.py
-index a1158bbbf..a9295a2ba 100644
---- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/_base.py
-+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/_trie/_base.py
-@@ -1,6 +1,6 @@
- from __future__ import absolute_import, division, unicode_literals
- 
--from collections import Mapping
-+from collections.abc import Mapping
- 
- 
- class Trie(Mapping):
-diff --git a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/treebuilders/dom.py b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/treebuilders/dom.py
-index dcfac220b..818a33433 100644
---- a/testing/web-platform/tests/tools/third_party/html5lib/html5lib/treebuilders/dom.py
-+++ b/testing/web-platform/tests/tools/third_party/html5lib/html5lib/treebuilders/dom.py
-@@ -1,7 +1,7 @@
- from __future__ import absolute_import, division, unicode_literals
- 
- 
--from collections import MutableMapping
-+from collections.abc import MutableMapping
- from xml.dom import minidom, Node
- import weakref
- 
-diff --git a/testing/web-platform/tests/tools/third_party/hyper/hyper/common/headers.py b/testing/web-platform/tests/tools/third_party/hyper/hyper/common/headers.py
-index 655a591ac..6454f550a 100644
---- a/testing/web-platform/tests/tools/third_party/hyper/hyper/common/headers.py
-+++ b/testing/web-platform/tests/tools/third_party/hyper/hyper/common/headers.py
-@@ -10,7 +10,7 @@ import collections
- from hyper.common.util import to_bytestring, to_bytestring_tuple
- 
- 
--class HTTPHeaderMap(collections.MutableMapping):
-+class HTTPHeaderMap(collections.abc.MutableMapping):
-     """
-     A structure that contains HTTP headers.
- 
-diff --git a/testing/web-platform/tests/tools/third_party/hyper/hyper/h2/settings.py b/testing/web-platform/tests/tools/third_party/hyper/hyper/h2/settings.py
-index fedc5e3c4..040afea92 100755
---- a/testing/web-platform/tests/tools/third_party/hyper/hyper/h2/settings.py
-+++ b/testing/web-platform/tests/tools/third_party/hyper/hyper/h2/settings.py
-@@ -151,7 +151,7 @@ class ChangedSetting:
-         )
- 
- 
--class Settings(collections.MutableMapping):
-+class Settings(collections.abc.MutableMapping):
-     """
-     An object that encapsulates HTTP/2 settings state.
- 
-diff --git a/testing/web-platform/tests/tools/third_party/hyper/hyper/http11/connection.py b/testing/web-platform/tests/tools/third_party/hyper/hyper/http11/connection.py
-index 61361c358..a214311d2 100644
---- a/testing/web-platform/tests/tools/third_party/hyper/hyper/http11/connection.py
-+++ b/testing/web-platform/tests/tools/third_party/hyper/hyper/http11/connection.py
-@@ -10,7 +10,7 @@ import os
- import socket
- import base64
- 
--from collections import Iterable, Mapping
-+from collections.abc import Iterable, Mapping
- 
- import collections
- from hyperframe.frame import SettingsFrame
-@@ -295,7 +295,7 @@ class HTTP11Connection(object):
-                 return
- 
-             # Iterables that set a specific content length.
--            elif isinstance(body, collections.Iterable):
-+            elif isinstance(body, collections.abc.Iterable):
-                 for item in body:
-                     try:
-                         self._sock.send(item)
-diff --git a/testing/web-platform/tests/tools/third_party/hyper/hyper/packages/hyperframe/flags.py b/testing/web-platform/tests/tools/third_party/hyper/hyper/packages/hyperframe/flags.py
-index e8f630056..8f2ea689b 100644
---- a/testing/web-platform/tests/tools/third_party/hyper/hyper/packages/hyperframe/flags.py
-+++ b/testing/web-platform/tests/tools/third_party/hyper/hyper/packages/hyperframe/flags.py
-@@ -11,7 +11,7 @@ import collections
- Flag = collections.namedtuple("Flag", ["name", "bit"])
- 
- 
--class Flags(collections.MutableSet):
-+class Flags(collections.abc.MutableSet):
-     """
-     A simple MutableSet implementation that will only accept known flags as elements.
- 
-diff --git a/testing/web-platform/tests/tools/third_party/hyperframe/hyperframe/flags.py b/testing/web-platform/tests/tools/third_party/hyperframe/hyperframe/flags.py
-index 05b35017e..14c352e10 100644
---- a/testing/web-platform/tests/tools/third_party/hyperframe/hyperframe/flags.py
-+++ b/testing/web-platform/tests/tools/third_party/hyperframe/hyperframe/flags.py
-@@ -11,7 +11,7 @@ import collections
- Flag = collections.namedtuple("Flag", ["name", "bit"])
- 
- 
--class Flags(collections.MutableSet):
-+class Flags(collections.abc.MutableSet):
-     """
-     A simple MutableSet implementation that will only accept known flags as
-     elements.
-diff --git a/testing/web-platform/tests/tools/wptserve/wptserve/config.py b/testing/web-platform/tests/tools/wptserve/wptserve/config.py
-index 7766565fe..3c1c36d6f 100644
---- a/testing/web-platform/tests/tools/wptserve/wptserve/config.py
-+++ b/testing/web-platform/tests/tools/wptserve/wptserve/config.py
-@@ -2,7 +2,8 @@ import copy
- import logging
- import os
- 
--from collections import defaultdict, Mapping
-+from collections import defaultdict
-+from collections.abc import Mapping
- from six import integer_types, iteritems, itervalues, string_types
- 
- from . import sslutils
-diff --git a/testing/web-platform/tests/webdriver/tests/support/sync.py b/testing/web-platform/tests/webdriver/tests/support/sync.py
-index 3fc77131c..8e8f6b819 100644
---- a/testing/web-platform/tests/webdriver/tests/support/sync.py
-+++ b/testing/web-platform/tests/webdriver/tests/support/sync.py
-@@ -81,7 +81,7 @@ class Poll(object):
- 
-         exceptions = []
-         if ignored_exceptions is not None:
--            if isinstance(ignored_exceptions, collections.Iterable):
-+            if isinstance(ignored_exceptions, collections.abc.Iterable):
-                 exceptions.extend(iter(ignored_exceptions))
-             else:
-                 exceptions.append(ignored_exceptions)
--- 
-2.31.1
-
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0006-use-asm-sgidefs.h.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0006-use-asm-sgidefs.h.patch
deleted file mode 100644
index b56f0b9..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0006-use-asm-sgidefs.h.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From 65acc8800dba7e10da882871d4648241805c47ce Mon Sep 17 00:00:00 2001
-From: Andre McCurdy <amccurdy@gmail.com>
-Date: Sat, 30 Apr 2016 15:29:06 -0700
-Subject: [PATCH] use <asm/sgidefs.h>
-
-Build fix for MIPS with musl libc
-
-The MIPS specific header <sgidefs.h> is provided by glibc and uclibc
-but not by musl. Regardless of the libc, the kernel headers provide
-<asm/sgidefs.h> which provides the same definitions, so use that
-instead.
-
-Upstream-Status: Pending
-
-[Vincent:
-Taken from: https://sourceware.org/bugzilla/show_bug.cgi?id=21070]
-
-Signed-off-by: Andre McCurdy <armccurdy@gmail.com>
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
-Signed-off-by: Vicente Olivert Riera <Vincent.Riera@imgtec.com>
----
- gdb/mips-linux-nat.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/mfbt/RandomNum.cpp
-+++ b/mfbt/RandomNum.cpp
-@@ -52,7 +52,7 @@ extern "C" BOOLEAN NTAPI RtlGenRandom(PV
- #  elif defined(__s390__)
- #    define GETRANDOM_NR 349
- #  elif defined(__mips__)
--#    include <sgidefs.h>
-+#    include <asm/sgidefs.h>
- #    if _MIPS_SIM == _MIPS_SIM_ABI32
- #      define GETRANDOM_NR 4353
- #    elif _MIPS_SIM == _MIPS_SIM_ABI64
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0007-fix-musl-build.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0007-fix-musl-build.patch
deleted file mode 100644
index c0834af..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0007-fix-musl-build.patch
+++ /dev/null
@@ -1,15 +0,0 @@
-Upstream: No
-Reason: mozjs60 miscompiles on musl if built with HAVE_THREAD_TLS_KEYWORD:
-https://github.com/void-linux/void-packages/issues/2598
---- a/js/src/old-configure.in
-+++ b/js/src/old-configure.in
-@@ -1072,6 +1072,9 @@ if test "$ac_cv_thread_keyword" = yes; t
-     *-android*|*-linuxandroid*)
-       :
-       ;;
-+    *-musl*)
-+      :
-+      ;;
-     *)
-       AC_DEFINE(HAVE_THREAD_TLS_KEYWORD)
-       ;;
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0008-riscv.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0008-riscv.patch
deleted file mode 100644
index 70177d0..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0008-riscv.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-Add RISCV32/64 support
-
-Upstream-Status: Pending
-
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
----
- build/moz.configure/init.configure                        | 6 ++++++
- python/mozbuild/mozbuild/configure/constants.py           | 2 ++
- 2 files changed, 8 insertions(+)
-
-diff --git a/build/moz.configure/init.configure b/build/moz.configure/init.configure
-index 0a6a33c..eeee87e 100644
---- a/build/moz.configure/init.configure
-+++ b/build/moz.configure/init.configure
-@@ -755,6 +755,12 @@ def split_triplet(triplet, allow_msvc=False):
-     elif cpu.startswith('aarch64'):
-         canonical_cpu = 'aarch64'
-         endianness = 'little'
-+    elif cpu in ("riscv32", "riscv32gc"):
-+        canonical_cpu = "riscv32"
-+        endianness = "little"
-+    elif cpu in ("riscv64", "riscv64gc"):
-+        canonical_cpu = "riscv64"
-+        endianness = "little"
-     elif cpu == 'sh4':
-         canonical_cpu = 'sh4'
-         endianness = 'little'
-diff --git a/python/mozbuild/mozbuild/configure/constants.py b/python/mozbuild/mozbuild/configure/constants.py
-index 7542dcd..de25be2 100644
---- a/python/mozbuild/mozbuild/configure/constants.py
-+++ b/python/mozbuild/mozbuild/configure/constants.py
-@@ -50,6 +50,8 @@ CPU_bitness = {
-     'mips64': 64,
-     'ppc': 32,
-     'ppc64': 64,
-+    'riscv32': 32,
-+    'riscv64': 64,
-     's390': 32,
-     's390x': 64,
-     'sh4': 32,
-@@ -82,6 +84,8 @@ CPU_preprocessor_checks = OrderedDict((
-     ('s390', '__s390__'),
-     ('ppc64', '__powerpc64__'),
-     ('ppc', '__powerpc__'),
-+    ('riscv32', '__riscv && __SIZEOF_POINTER__ == 4'),
-+    ('riscv64', '__riscv && __SIZEOF_POINTER__ == 8'),
-     ('Alpha', '__alpha__'),
-     ('hppa', '__hppa__'),
-     ('sparc64', '__sparc__ && __arch64__'),
--- 
-2.31.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0009-riscv-Disable-atomic-operations.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0009-riscv-Disable-atomic-operations.patch
deleted file mode 100644
index ba50e10..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0009-riscv-Disable-atomic-operations.patch
+++ /dev/null
@@ -1,52 +0,0 @@
-From 64ad80e6d95871f17be4cd01da15581f41ac0b2b Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Mon, 27 May 2019 21:10:34 -0700
-Subject: [PATCH] riscv: Disable atomic operations
-
-This was ported from what was used with mozjs-60 which was
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
-
-Upstream-Status: Inappropriate[old-version]
-
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
----
- js/src/jit/AtomicOperations.h                          | 3 ++-
- js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h | 8 ++++++++
- 2 files changed, 10 insertions(+), 1 deletion(-)
-
-diff --git a/js/src/jit/AtomicOperations.h b/js/src/jit/AtomicOperations.h
-index 0486cba..cf6b91c 100644
---- a/js/src/jit/AtomicOperations.h
-+++ b/js/src/jit/AtomicOperations.h
-@@ -391,7 +391,8 @@ inline bool AtomicOperations::isLockfreeJS(int32_t size) {
- #elif defined(__ppc__) || defined(__PPC__) || defined(__sparc__) ||     \
-     defined(__ppc64__) || defined(__PPC64__) || defined(__ppc64le__) || \
-     defined(__PPC64LE__) || defined(__alpha__) || defined(__hppa__) ||  \
--    defined(__sh__) || defined(__s390__) || defined(__s390x__)
-+    defined(__sh__) || defined(__s390__) || defined(__s390x__) || \
-+    defined(__riscv)
- #  include "jit/shared/AtomicOperations-feeling-lucky.h"
- #else
- #  error "No AtomicOperations support provided for this platform"
-diff --git a/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h b/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h
-index f002cd4..14bb5f9 100644
---- a/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h
-+++ b/js/src/jit/shared/AtomicOperations-feeling-lucky-gcc.h
-@@ -77,6 +77,14 @@
- #  endif
- #endif
- 
-+#ifdef __riscv
-+#  ifdef __riscv_xlen == 64
-+#    define HAS_64BIT_ATOMICS
-+#    define HAS_64BIT_LOCKFREE
-+#  endif
-+#endif
-+
-+
- // The default implementation tactic for gcc/clang is to use the newer __atomic
- // intrinsics added for use in C++11 <atomic>.  Where that isn't available, we
- // use GCC's older __sync functions instead.
--- 
-2.31.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0010-riscv-Set-march-correctly.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0010-riscv-Set-march-correctly.patch
deleted file mode 100644
index befede1..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0010-riscv-Set-march-correctly.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-From c3c2d1c69859c5e567005f0c3fa07a0dbe31e4a3 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
-Date: Fri, 29 Oct 2021 21:18:26 +0200
-Subject: [PATCH] riscv: Set march correctly
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Stolen from leftover patch in oe-core [1]
-
-[1] https://github.com/openembedded/openembedded-core/blob/c884878f6c833b18a3a95b193f5de68df5bcea48/meta/recipes-devtools/rust/files/rv64gc.patch
-
-Upstream-Status: Pending
-
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
----
- third_party/rust/cc/src/lib.rs           | 14 ++++++++++----
- third_party/rust/cc/.cargo-checksum.json | 2 +-
- 1 file changed, 10 insertions(+), 4 deletions(-)
-
-diff --git a/third_party/rust/cc/src/lib.rs b/third_party/rust/cc/src/lib.rs
-index 621d31d..6f72e13 100644
---- a/third_party/rust/cc/src/lib.rs
-+++ b/third_party/rust/cc/src/lib.rs
-@@ -1587,14 +1587,20 @@ impl Build {
-                     let mut parts = target.split('-');
-                     if let Some(arch) = parts.next() {
-                         let arch = &arch[5..];
--                        cmd.args.push(("-march=rv".to_owned() + arch).into());
--                        // ABI is always soft-float right now, update this when this is no longer the
--                        // case:
--                        if arch.starts_with("64") {
-+                        if target.contains("linux") && arch.starts_with("64") {
-+                            cmd.args.push(("-march=rv64gc").into());
-+                            cmd.args.push("-mabi=lp64d".into());
-+                        } else if target.contains("linux") && arch.starts_with("32") {
-+                            cmd.args.push(("-march=rv32gc").into());
-+                            cmd.args.push("-mabi=ilp32d".into());
-+                        } else if arch.starts_with("64") {
-+                            cmd.args.push(("-march=rv".to_owned() + arch).into());
-                             cmd.args.push("-mabi=lp64".into());
-                         } else {
-+                            cmd.args.push(("-march=rv".to_owned() + arch).into());
-                             cmd.args.push("-mabi=ilp32".into());
-                         }
-+                        cmd.args.push("-mcmodel=medany".into());
-                     }
-                 }
-             }
-diff --git a/third_party/rust/cc/.cargo-checksum.json b/third_party/rust/cc/.cargo-checksum.json
-index 417fde7..70e5d02 100644
---- a/third_party/rust/cc/.cargo-checksum.json
-+++ b/third_party/rust/cc/.cargo-checksum.json
-@@ -1 +1 @@
--{"files":{"Cargo.lock":"3aff5f8b0a7f4d72852b11b0526f0002e6bf55f19f1ebd6470d7f97fbd540e60","Cargo.toml":"6ab10d9b6a9c6f0909074e6698c90c6b6a7223661ec2e83174d2593117cbe7f2","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"7184fbdf375a057e673257348f6d7584c0dd11b66318d98f3647f69eb610b097","src/bin/gcc-shim.rs":"b77907875029494b6288841c3aed2e4939ed40708c7f597fca5c9e2570490ca6","src/com.rs":"bcdaf1c28b71e6ef889c6b08d1ce9d7c0761344a677f523bc4c3cd297957f804","src/lib.rs":"4753929dbb7b676c19d7cfa06d0a47e37003554b80c536cbf2b892d591ef61c2","src/registry.rs":"3cc1b5a50879fa751572878ae1d0afbfc960c11665258492754b2c8bccb0ff5d","src/setup_config.rs":"7014103587d3382eac599cb76f016e2609b8140970861b2237982d1db24af265","src/winapi.rs":"ea8b7edbb9ff87957254f465c2334e714c5d6b3b19a8d757c48ea7ca0881c50c","src/windows_registry.rs":"388e79dcf3e84078ae0b086c6cdee9cf9eb7e3ffafdcbf3e2df26163661f5856","tests/cc_env.rs":"e02b3b0824ad039b47e4462c5ef6dbe6c824c28e7953af94a0f28f7b5158042e","tests/cflags.rs":"57f06eb5ce1557e5b4a032d0c4673e18fbe6f8d26c1deb153126e368b96b41b3","tests/cxxflags.rs":"c2c6c6d8a0d7146616fa1caed26876ee7bc9fcfffd525eb4743593cade5f3371","tests/support/mod.rs":"71620b178583b6e6e5e0d4cac14e2cef6afc62fb6841e0c72ed1784543abf8ac","tests/test.rs":"1605640c9b94a77f48fc92e1dc0485bdf1960da5626e2e00279e4703691656bc"},"package":"aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8"}
-\ No newline at end of file
-+{"files":{"Cargo.lock":"3aff5f8b0a7f4d72852b11b0526f0002e6bf55f19f1ebd6470d7f97fbd540e60","Cargo.toml":"6ab10d9b6a9c6f0909074e6698c90c6b6a7223661ec2e83174d2593117cbe7f2","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"378f5840b258e2779c39418f3f2d7b2ba96f1c7917dd6be0713f88305dbda397","README.md":"7184fbdf375a057e673257348f6d7584c0dd11b66318d98f3647f69eb610b097","src/bin/gcc-shim.rs":"b77907875029494b6288841c3aed2e4939ed40708c7f597fca5c9e2570490ca6","src/com.rs":"bcdaf1c28b71e6ef889c6b08d1ce9d7c0761344a677f523bc4c3cd297957f804","src/lib.rs":"feab2b4cc51fcfb041f83a1a689960c3c9abfbaa9580ba186244a880586ba29a","src/registry.rs":"3cc1b5a50879fa751572878ae1d0afbfc960c11665258492754b2c8bccb0ff5d","src/setup_config.rs":"7014103587d3382eac599cb76f016e2609b8140970861b2237982d1db24af265","src/winapi.rs":"ea8b7edbb9ff87957254f465c2334e714c5d6b3b19a8d757c48ea7ca0881c50c","src/windows_registry.rs":"388e79dcf3e84078ae0b086c6cdee9cf9eb7e3ffafdcbf3e2df26163661f5856","tests/cc_env.rs":"e02b3b0824ad039b47e4462c5ef6dbe6c824c28e7953af94a0f28f7b5158042e","tests/cflags.rs":"57f06eb5ce1557e5b4a032d0c4673e18fbe6f8d26c1deb153126e368b96b41b3","tests/cxxflags.rs":"c2c6c6d8a0d7146616fa1caed26876ee7bc9fcfffd525eb4743593cade5f3371","tests/support/mod.rs":"71620b178583b6e6e5e0d4cac14e2cef6afc62fb6841e0c72ed1784543abf8ac","tests/test.rs":"1605640c9b94a77f48fc92e1dc0485bdf1960da5626e2e00279e4703691656bc"},"package":"aa87058dce70a3ff5621797f1506cb837edd02ac4c0ae642b4542dce802908b8"}
--- 
-2.31.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0011-replace-include-by-code-to-fix-arm-build.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0011-replace-include-by-code-to-fix-arm-build.patch
deleted file mode 100644
index adca9c7..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0011-replace-include-by-code-to-fix-arm-build.patch
+++ /dev/null
@@ -1,43 +0,0 @@
-From fd6847c9416f9eebde636e21d794d25d1be8791d Mon Sep 17 00:00:00 2001
-From: Mike Hommey <mh@glandium.org>
-Date: Sat, 1 Jun 2019 09:06:01 +0900
-Subject: [PATCH] Bug 1526653 - Include struct definitions for user_vfp and
- user_vfp_exc.
- 
-* We need this to fix arm builds
-* Stolen from [1]
-
-[1] https://salsa.debian.org/mozilla-team/firefox/commit/fd6847c9416f9eebde636e21d794d25d1be8791d
-
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
-
-Upstream-Status: Pending
----
- js/src/wasm/WasmSignalHandlers.cpp | 11 ++++++++++-
- 1 file changed, 10 insertions(+), 1 deletion(-)
-
-diff --git a/js/src/wasm/WasmSignalHandlers.cpp b/js/src/wasm/WasmSignalHandlers.cpp
-index 636537f8478..383c380f04c 100644
---- a/js/src/wasm/WasmSignalHandlers.cpp
-+++ b/js/src/wasm/WasmSignalHandlers.cpp
-@@ -248,7 +248,16 @@ using mozilla::DebugOnly;
- #endif
- 
- #ifdef WASM_EMULATE_ARM_UNALIGNED_FP_ACCESS
--#  include <sys/user.h>
-+struct user_vfp {
-+  unsigned long long fpregs[32];
-+  unsigned long fpscr;
-+};
-+
-+struct user_vfp_exc {
-+  unsigned long fpexc;
-+  unsigned long fpinst;
-+  unsigned long fpinst2;
-+};
- #endif
- 
- #if defined(ANDROID)
--- 
-GitLab
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0012-Add-SharedArrayRawBufferRefs-to-public-API.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0012-Add-SharedArrayRawBufferRefs-to-public-API.patch
deleted file mode 100644
index ca37ca7..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0012-Add-SharedArrayRawBufferRefs-to-public-API.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-From ccdd47cee610cb33fa5f67f856a68f5e411c79d5 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Andreas=20M=C3=BCller?= <schnitzeltony@gmail.com>
-Date: Sun, 31 Oct 2021 18:32:39 +0100
-Subject: [PATCH] Add SharedArrayRawBufferRefs to public API
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Stolen from [1]
-
-[1] https://src.fedoraproject.org/rpms/mozjs78/blob/rawhide/f/FixSharedArray.diff
-
-Upstream-Status: Pending
-
-Signed-off-by: Andreas Müller <schnitzeltony@gmail.com>
----
- js/public/StructuredClone.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/js/public/StructuredClone.h b/js/public/StructuredClone.h
-index cb3cd5b..06da4dd 100644
---- a/js/public/StructuredClone.h
-+++ b/js/public/StructuredClone.h
-@@ -381,7 +381,7 @@ enum OwnTransferablePolicy {
- namespace js {
- class SharedArrayRawBuffer;
- 
--class SharedArrayRawBufferRefs {
-+class JS_PUBLIC_API SharedArrayRawBufferRefs {
-  public:
-   SharedArrayRawBufferRefs() = default;
-   SharedArrayRawBufferRefs(SharedArrayRawBufferRefs&& other) = default;
--- 
-2.31.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0013-util.configure-fix-one-occasionally-reproduced-confi.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0013-util.configure-fix-one-occasionally-reproduced-confi.patch
deleted file mode 100644
index e943cf1..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0013-util.configure-fix-one-occasionally-reproduced-confi.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 430fd956b91c6208f166753578234c2f5db6352f Mon Sep 17 00:00:00 2001
-From: Changqing Li <changqing.li@windriver.com>
-Date: Thu, 11 Nov 2021 21:17:38 +0800
-Subject: [PATCH] util.configure: fix one occasionally reproduced configure 
- failure
-
-error:
-| checking whether the C++ compiler supports -Wno-range-loop-analysis...
-| DEBUG: Creating /tmp/conftest.jr1qrcw3.cpp with content:
-| DEBUG: | int
-| DEBUG: | main(void)
-| DEBUG: | {
-| DEBUG: |
-| DEBUG: | ;
-| DEBUG: | return 0;
-| DEBUG: | }
-| DEBUG: Executing: aarch64-wrs-linux-g++ -mcpu=cortex-a53 -march=armv8-a+crc -fstack-protector-strong -O2 -D_FORTIFY_SOURCE=2 -Wformat -Wformat-security -Werror=format-security --sysroot=/mozjs/91.1.0-r0/recipe-sysroot /tmp/conftest.jr1qrcw3.cpp -Werror -Wrange-loop-analysis -c
-| DEBUG: The command returned non-zero exit status 1.
-| DEBUG: Its error output was:
-...
-| File "/mozjs/91.1.0-r0/firefox-91.1.0/build/moz.configure/util.configure", line 239, in try_invoke_compiler
-| os.remove(path)
-| FileNotFoundError: [Errno 2] No such file or directory: '/tmp/conftest.jr1qrcw3.cpp'
-
-It should be another process that deleted this file by using
-"rm -rf conftest*" inappropriately
-
-Upstream-Status: Submitted [https://bugzilla.mozilla.org/show_bug.cgi?id=1740667]
-
-Signed-off-by: Changqing Li <changqing.li@windriver.com>
----
- build/moz.configure/util.configure | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/build/moz.configure/util.configure b/build/moz.configure/util.configure
-index 7ee1a498ad..511e257ad9 100644
---- a/build/moz.configure/util.configure
-+++ b/build/moz.configure/util.configure
-@@ -217,7 +217,7 @@ def try_invoke_compiler(compiler, language, source, flags=None, onerror=None):
-         'C++': '.cpp',
-     }[language]
- 
--    fd, path = mkstemp(prefix='conftest.', suffix=suffix, text=True)
-+    fd, path = mkstemp(prefix='try_invoke_compiler_conftest.', suffix=suffix, text=True)
-     try:
-         source = source.encode('ascii', 'replace')
- 
--- 
-2.17.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0014-rewrite-cargo-host-linker-in-python3.patch b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0014-rewrite-cargo-host-linker-in-python3.patch
deleted file mode 100644
index 7b93817..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78/0014-rewrite-cargo-host-linker-in-python3.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 9eceb43dd676afe2f675bd65ab369ba4d14f6537 Mon Sep 17 00:00:00 2001
-From: Changqing Li <changqing.li@windriver.com>
-Date: Thu, 18 Nov 2021 07:16:39 +0000
-Subject: [PATCH] Rewrite cargo-host-linker in python3
-
-Mozjs compile failed with this failure:
-/bin/sh: /lib64/libc.so.6: version `GLIBC_2.33' not found (required by /build/tmp-glibc/work/corei7-64-wrs-linux/mozjs/91.1.0-r0/recipe-sysroot-native/usr/lib/libtinfo.so.5)
-
-Root Cause:
-cargo-host-linker has /bin/sh as it's interpreter, but cargo run the cmd
-with LD_LIBRARY_PATH set to recipe-sysroot-native. The host /bin/sh links
-libtinfo.so.5 under recipe-sysroot-native, which needs higher libc. But
-host libc is older libc. So the incompatible problem occurred.
-
-Solution:
-rewrite cargo-host-linker in python3
-
-Upstream-Status: Inappropriate [oe specific]
-
-Signed-off-by: Changqing Li <changqing.li@windriver.com>
----
- build/cargo-host-linker | 24 +++++++---
- 1 file changed, 21 insertions(+), 3 deletions(-)
-
-diff --git a/build/cargo-host-linker b/build/cargo-host-linker
-index cbd0472bf7..ccd8bffec1 100755
---- a/build/cargo-host-linker
-+++ b/build/cargo-host-linker
-@@ -1,3 +1,21 @@
--#!/bin/sh
--# See comment in cargo-linker.
--eval ${MOZ_CARGO_WRAP_HOST_LD} ${MOZ_CARGO_WRAP_HOST_LDFLAGS} '"$@"'
-+#!/usr/bin/env python3
-+
-+import os,sys
-+
-+if os.environ['MOZ_CARGO_WRAP_HOST_LD'].strip():
-+    binary=os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[0]
-+else:
-+    sys.exit(0)
-+
-+if os.environ['MOZ_CARGO_WRAP_HOST_LDFLAGS'].strip():
-+    if os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[1:]:
-+        args=[os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[0]] + os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[1:] + [os.environ['MOZ_CARGO_WRAP_HOST_LDFLAGS']] + sys.argv[1:]
-+    else:
-+        args=[os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[0]] + [os.environ['MOZ_CARGO_WRAP_HOST_LDFLAGS']] + sys.argv[1:]
-+else:
-+    if os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[1:]:
-+        args=[os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[0]] + os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[1:] + sys.argv[1:]
-+    else:
-+        args=[os.environ['MOZ_CARGO_WRAP_HOST_LD'].split()[0]] + sys.argv[1:]
-+
-+os.execvp(binary, args)
--- 
-2.33.1
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78_78.15.0.bb b/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78_78.15.0.bb
deleted file mode 100644
index 7d4e4a8..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/meta-python/recipes-extended/mozjs/mozjs-78_78.15.0.bb
+++ /dev/null
@@ -1,146 +0,0 @@
-SUMMARY = "SpiderMonkey is Mozilla's JavaScript engine written in C/C++"
-HOMEPAGE = "https://developer.mozilla.org/en-US/docs/Mozilla/Projects/SpiderMonkey"
-LICENSE = "MPL-2.0"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=dc9b6ecd19a14a54a628edaaf23733bf"
-
-SRC_URI = " \
-    https://archive.mozilla.org/pub/firefox/releases/${PV}esr/source/firefox-${PV}esr.source.tar.xz \
-    file://0001-rust.configure-Skip-all-target-manipulations.patch \
-    file://0002-build-do-not-use-autoconf-s-config.sub-to-canonicali.patch \
-    file://0003-Do-not-check-binaries-after-build.patch \
-    file://0004-Cargo.toml-do-not-abort-on-panic.patch \
-    file://0005-Fixup-compatibility-of-mozbuild-with-Python-3.10.patch \
-    file://0006-use-asm-sgidefs.h.patch \
-    file://0007-fix-musl-build.patch \
-    file://0008-riscv.patch \
-    file://0009-riscv-Disable-atomic-operations.patch \
-    file://0010-riscv-Set-march-correctly.patch \
-    file://0011-replace-include-by-code-to-fix-arm-build.patch \
-    file://0012-Add-SharedArrayRawBufferRefs-to-public-API.patch \
-    file://0013-util.configure-fix-one-occasionally-reproduced-confi.patch \
-    file://0014-rewrite-cargo-host-linker-in-python3.patch \
-"
-
-SRC_URI[sha256sum] = "a4438d84d95171a6d4fea9c9f02c2edbf0475a9c614d968ebe2eedc25a672151"
-S = "${WORKDIR}/firefox-${@d.getVar("PV").replace("esr", "")}"
-
-DEPENDS = " \
-    autoconf-2.13-native \
-    icu-native \
-    icu \
-    cargo-native \
-    zlib \
-    python3-six \
-    python3-six-native \
-"
-
-inherit autotools pkgconfig rust python3native siteinfo
-
-JIT ?= ""
-JIT:mipsarch = "--disable-jit"
-
-EXTRA_OECONF = " \
-    --target=${RUST_TARGET_SYS} \
-    --host=${BUILD_SYS} \
-    --prefix=${prefix} \
-    --libdir=${libdir} \
-    --x-includes=${STAGING_INCDIR} \
-    --x-libraries=${STAGING_LIBDIR} \
-    --without-system-icu \
-    --disable-tests --disable-strip --disable-optimize \
-    --disable-jemalloc \
-    --with-system-icu \
-    ${@bb.utils.contains('DISTRO_FEATURES', 'ld-is-gold', "--enable-gold", '--disable-gold', d)} \
-    ${JIT} \
-"
-# Note: Python with mozilla build is a mess: E.g: python-six: to get an error
-# free configure we need:
-# * python3-six-native in DEPENDS
-# * python3-six in DEPENDS
-# * path to python-six shipped by mozilla in PYTHONPATH
-prepare_python_and_rust() {
-    if [ ! -f ${B}/PYTHONPATH ]; then
-        oldpath=`pwd`
-        cd ${S}
-        # Add mozjs python-modules necessary
-        PYTHONPATH="${S}/build:${S}/config"
-        PYTHONPATH="$PYTHONPATH:${S}/third_party/python/distro:${S}/third_party/python/jsmin"
-        PYTHONPATH="$PYTHONPATH:${S}/third_party/python/pytoml:${S}/third_party/python/six"
-        PYTHONPATH="$PYTHONPATH:${S}/third_party/python/pyyaml/lib3:${S}/third_party/python/which"
-        for sub_dir in python testing/mozbase; do
-            for module_dir in `ls $sub_dir -1`;do
-                [ $module_dir = "virtualenv" ] && continue
-                if [ -d "${S}/$sub_dir/$module_dir" ];then
-                    PYTHONPATH="$PYTHONPATH:${S}/$sub_dir/$module_dir"
-                fi
-            done
-        done
-        # looks odd but it's huge and we want to see what's in there
-        echo "$PYTHONPATH" > ${B}/PYTHONPATH
-        cd "$oldpath"
-    fi
-
-    export PYTHONPATH=`cat ${B}/PYTHONPATH`
-
-    export RUST_TARGET_PATH="${RUST_TARGET_PATH}"
-    export RUST_TARGET="${TARGET_SYS}"
-    export RUSTFLAGS="${RUSTFLAGS}"
-}
-
-export HOST_CC = "${BUILD_CC}"
-export HOST_CXX = "${BUILD_CXX}"
-export HOST_CFLAGS = "${BUILD_CFLAGS}"
-export HOST_CPPFLAGS = "${BUILD_CPPFLAGS}"
-export HOST_CXXFLAGS = "${BUILD_CXXFLAGS}"
-# otherwise we are asked for yasm...
-export AS = "${CC}"
-
-CPPFLAGS:append:mips:toolchain-clang = " -fpie"
-CPPFLAGS:append:mipsel:toolchain-clang = " -fpie"
-
-do_configure() {
-    prepare_python_and_rust
-
-    cd ${S}/js/src
-    autoconf213 --macrodir=${STAGING_DATADIR_NATIVE}/autoconf213 old-configure.in > old-configure
-
-    cd ${B}
-    # * use of /tmp can causes problems on heavily loaded hosts
-    # * with mozjs-78 we get without:
-    # | Path specified in LOCAL_INCLUDES (..) resolves to the topsrcdir or topobjdir (<tmpdir>/oe-core-glibc/work/cortexa72-mortsgna-linux/mozjs-78/78.15.0-r0/firefox-78.15.0/js/src), which is not allowed
-    mkdir -p "${B}/lcl_tmp"
-    TMPDIR="${B}/lcl_tmp"  CFLAGS="${CFLAGS}" CXXFLAGS="${CXXFLAGS}" ${S}/js/src/configure ${EXTRA_OECONF}
-
-    # inspired by what fedora [1] does: for big endian rebuild icu dat
-    # this avoids gjs qemu crash on mips at gir creation
-    # [1] https://src.fedoraproject.org/rpms/mozjs78/blob/rawhide/f/mozjs78.spec
-    if [ ${@oe.utils.conditional('SITEINFO_ENDIANNESS', 'le', 'little', 'big', d)} = "big" -a ! -e ${S}/config/external/icu/data/icudt67b.dat ]; then
-        echo "Do big endian icu dat-convert..."
-        icupkg -tb ${S}/config/external/icu/data/icudt67l.dat ${S}/config/external/icu/data/icudt67b.dat
-        rm -f ${S}/config/external/icu/data/icudt*l.dat
-    fi
-}
-
-do_compile:prepend() {
-    prepare_python_and_rust
-}
-
-do_install:prepend() {
-    prepare_python_and_rust
-}
-
-do_install:append() {
-    # tidy up installation
-    chmod -x ${D}${libdir}/pkgconfig/*.pc
-    sed -i 's:\x24{includedir}/mozjs-78/js/RequiredDefines.h:js/RequiredDefines.h:g' ${D}${libdir}/pkgconfig/*.pc
-
-    rm -f ${D}${libdir}/libjs_static.ajs
-}
-
-ARM_INSTRUCTION_SET:armv5 = "arm"
-ARM_INSTRUCTION_SET:armv4 = "arm"
-
-DISABLE_STATIC = ""
-
-PACKAGES =+ "lib${BPN}"
-FILES:lib${BPN} += "${libdir}/lib*"
diff --git a/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/files/0001-Fix-system-header-includes.patch b/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/files/0001-Fix-system-header-includes.patch
deleted file mode 100644
index 0164321..0000000
--- a/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/files/0001-Fix-system-header-includes.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 18786db1ad03716267927d983c83275469a1478a Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Fri, 8 May 2020 12:27:19 -0700
-Subject: [PATCH] Fix system header includes
-
-Use poll.h instead of sys/poll.h
-Fixes
-warning: #warning redirecting incorrect #include <sys/poll.h> to <poll.h> [-Wcpp]
-
-Include limits.h for PATH_MAX
-
-Fixes
-ras-events.c:359:16: error: 'PATH_MAX' undeclared (first use in this function)
-  359 |  char pipe_raw[PATH_MAX];
-      |                ^~~~~~~~
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- ras-events.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/ras-events.c b/ras-events.c
-index 511c93d..400e740 100644
---- a/ras-events.c
-+++ b/ras-events.c
-@@ -18,13 +18,14 @@
- #include <dirent.h>
- #include <errno.h>
- #include <fcntl.h>
-+#include <limits.h>
-+#include <poll.h>
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include <unistd.h>
- #include <sys/stat.h>
- #include <sys/types.h>
--#include <sys/poll.h>
- #include <signal.h>
- #include <sys/signalfd.h>
- #include "libtrace/kbuffer.h"
--- 
-2.26.2
-
diff --git a/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/files/0001-libtrace-Use-XSI-version-of-strerror_r-on-non-glibc-.patch b/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/files/0001-libtrace-Use-XSI-version-of-strerror_r-on-non-glibc-.patch
new file mode 100644
index 0000000..fef71af
--- /dev/null
+++ b/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/files/0001-libtrace-Use-XSI-version-of-strerror_r-on-non-glibc-.patch
@@ -0,0 +1,35 @@
+From e415152b51eacab8705b6b3274cc0d1a274772e0 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 30 Aug 2022 19:54:35 -0700
+Subject: [PATCH] libtrace: Use XSI version of strerror_r on non glibc systems
+
+The version used is glibc specific therefore make it so
+and provide a fallback for non-glibc systems
+
+Upstream-Status: Submitted [https://github.com/mchehab/rasdaemon/pull/70]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ libtrace/event-parse.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/libtrace/event-parse.c b/libtrace/event-parse.c
+index 6c705c3..6b651d5 100644
+--- a/libtrace/event-parse.c
++++ b/libtrace/event-parse.c
+@@ -5071,7 +5071,13 @@ int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum,
+ 	const char *msg;
+ 
+ 	if (errnum >= 0) {
++#if defined(__GLIBC__)
+ 		msg = strerror_r(errnum, buf, buflen);
++#else
++		if (strerror_r(errnum, buf, buflen) != 0)
++			snprintf(buf, buflen, "unknown error %i", errnum);
++		msg = buf;
++#endif
+ 		if (msg != buf) {
+ 			size_t len = strlen(msg);
+ 			memcpy(buf, msg, min(buflen - 1, len));
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/rasdaemon_0.6.7.bb b/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/rasdaemon_0.6.8.bb
similarity index 92%
rename from meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/rasdaemon_0.6.7.bb
rename to meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/rasdaemon_0.6.8.bb
index f30822b..3a525e8 100644
--- a/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/rasdaemon_0.6.7.bb
+++ b/meta-openembedded/meta-oe/dynamic-layers/perl-layer/recipes-support/rasdaemon/rasdaemon_0.6.8.bb
@@ -4,11 +4,11 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=d3070efe0afa3dc41608bd82c00bb0dc"
 
 SRC_URI = "git://github.com/mchehab/rasdaemon.git;branch=master;protocol=https \
-           file://0001-Fix-system-header-includes.patch \
+           file://0001-libtrace-Use-XSI-version-of-strerror_r-on-non-glibc-.patch \
            file://rasdaemon.service \
            file://init"
 
-SRCREV = "aa96737648d867a3d73e4151d05b54bbab494605"
+SRCREV = "c2255178a49f62c53009a456bc37dd5e37332f09"
 
 S = "${WORKDIR}/git"
 
@@ -32,10 +32,6 @@
 DEPENDS:append:libc-musl = " argp-standalone"
 LDFLAGS:append:libc-musl = " -largp"
 
-do_configure:prepend () {
-	( cd ${S}; autoreconf -vfi )
-}
-
 do_install:append() {
 	install -d ${D}${sysconfdir}/init.d
 	install -m 755 ${WORKDIR}/init ${D}${sysconfdir}/init.d/rasdaemon
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/fio/fio/0001-io_uring-Replace-pthread_self-with-s-tid.patch b/meta-openembedded/meta-oe/recipes-benchmark/fio/fio/0001-io_uring-Replace-pthread_self-with-s-tid.patch
deleted file mode 100644
index 766b1fe..0000000
--- a/meta-openembedded/meta-oe/recipes-benchmark/fio/fio/0001-io_uring-Replace-pthread_self-with-s-tid.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From 269164337e0168b93661bb95c6a4e462ae6d8b61 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Wed, 24 Aug 2022 18:08:53 -0700
-Subject: [PATCH] io_uring: Replace pthread_self with s->tid
-
-__init_rand64 takes 64bit value and srand48 takes unsigned 32bit value,
-pthread_t is opaque type and some libcs ( e.g. musl ) do not define them
-in plain old data types and ends up with errors
-
-| t/io_uring.c:809:32: error: incompatible pointer to integer conversion passing 'pthread_t' (aka 'struct __pthread *') to parameter of type 'uint64_t' (aka 'unsigned long') [-Wint-conver
-sion]
-|         __init_rand64(&s->rand_state, pthread_self());
-|                                       ^~~~~~~~~~~~~~
-
-Upstream-Status: Submitted [https://github.com/axboe/fio/pull/1455]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- t/io_uring.c | 5 ++---
- 1 file changed, 2 insertions(+), 3 deletions(-)
-
-diff --git a/t/io_uring.c b/t/io_uring.c
-index 35bf1956..f34a3554 100644
---- a/t/io_uring.c
-+++ b/t/io_uring.c
-@@ -799,15 +799,14 @@ static int submitter_init(struct submitter *s)
- 	int i, nr_batch, err;
- 	static int init_printed;
- 	char buf[80];
--
- 	s->tid = gettid();
- 	printf("submitter=%d, tid=%d, file=%s, node=%d\n", s->index, s->tid,
- 							s->filename, s->numa_node);
- 
- 	set_affinity(s);
- 
--	__init_rand64(&s->rand_state, pthread_self());
--	srand48(pthread_self());
-+	__init_rand64(&s->rand_state, s->tid);
-+	srand48(s->tid);
- 
- 	for (i = 0; i < MAX_FDS; i++)
- 		s->files[i].fileno = i;
--- 
-2.37.2
-
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/fio/fio_3.31.bb b/meta-openembedded/meta-oe/recipes-benchmark/fio/fio_3.32.bb
similarity index 88%
rename from meta-openembedded/meta-oe/recipes-benchmark/fio/fio_3.31.bb
rename to meta-openembedded/meta-oe/recipes-benchmark/fio/fio_3.32.bb
index f8d6301..90e2834 100644
--- a/meta-openembedded/meta-oe/recipes-benchmark/fio/fio_3.31.bb
+++ b/meta-openembedded/meta-oe/recipes-benchmark/fio/fio_3.32.bb
@@ -22,10 +22,8 @@
 PACKAGECONFIG ??= "${PACKAGECONFIG_NUMA}"
 PACKAGECONFIG[numa] = ",--disable-numa,numactl"
 
-SRCREV = "6cafe8445fd1e04e5f7d67bbc73029a538d1b253"
-SRC_URI = "git://git.kernel.dk/fio.git;branch=master \
-           file://0001-io_uring-Replace-pthread_self-with-s-tid.patch \
-           "
+SRCREV = "db7fc8d864dc4fb607a0379333a0db60431bd649"
+SRC_URI = "git://git.kernel.dk/fio.git;branch=master"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-Check-for-musl-define-guard-before-redefining-sockle.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-Check-for-musl-define-guard-before-redefining-sockle.patch
deleted file mode 100644
index d716949..0000000
--- a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-Check-for-musl-define-guard-before-redefining-sockle.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From ac1657e11cbb0545bdf4d1124def8c013958ed7e Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Wed, 12 Jul 2017 18:08:51 -0700
-Subject: [PATCH] Check for musl define guard before redefining socklen_t
-
-musl uses __DEFINED_socklen_t so check for that as well
-along with HAVE_socklen_t
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
-
----
- src/bench.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/bench.h b/src/bench.h
-index 597d068..2ebdf5a 100644
---- a/src/bench.h
-+++ b/src/bench.h
-@@ -77,7 +77,7 @@ typedef long long int64;
- #endif /* HAVE_int64_t */
- #endif /* HAVE_int64 */
- 
--#ifndef HAVE_socklen_t
-+#if !defined(HAVE_socklen_t) && !defined(__DEFINED_socklen_t)
- typedef int socklen_t;
- #endif
- 
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-bench.h-Fix-typo-in-specifying-string.h.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-bench.h-Fix-typo-in-specifying-string.h.patch
new file mode 100644
index 0000000..e9220ec
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-bench.h-Fix-typo-in-specifying-string.h.patch
@@ -0,0 +1,27 @@
+From 4bbedd25e74adb5cb181a9ae589adb2052ade630 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 23:34:48 -0700
+Subject: [PATCH] bench.h: Fix typo in specifying string.h
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/bench.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/bench.h b/src/bench.h
+index 2ebdf5a..750bebf 100644
+--- a/src/bench.h
++++ b/src/bench.h
+@@ -21,7 +21,7 @@ typedef unsigned char bool_t;
+ #include        <signal.h>
+ #include        <errno.h>
+ #ifndef WIN32
+-#include        <strings.h>
++#include        <string.h>
+ #endif
+ #include        <sys/types.h>
+ #ifndef WIN32
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-doc-Fix-typos-in-lat_unix_connect-manual-page.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-doc-Fix-typos-in-lat_unix_connect-manual-page.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-doc-Fix-typos-in-lat_unix_connect-manual-page.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-doc-Fix-typos-in-lat_unix_connect-manual-page.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-doc-Fix-typos-in-manual-pages.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-doc-Fix-typos-in-manual-pages.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-doc-Fix-typos-in-manual-pages.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-doc-Fix-typos-in-manual-pages.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-lat_fifo-Fix-cleanup-sequence.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-lat_fifo-Fix-cleanup-sequence.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-lat_fifo-Fix-cleanup-sequence.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-lat_fifo-Fix-cleanup-sequence.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-lat_http.c-Add-printf-format.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-lat_http.c-Add-printf-format.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-lat_http.c-Add-printf-format.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-lat_http.c-Add-printf-format.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-lmbench-Point-webpage-lm-to-target-directory.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-lmbench-Point-webpage-lm-to-target-directory.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-lmbench-Point-webpage-lm-to-target-directory.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-lmbench-Point-webpage-lm-to-target-directory.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-scripts-build-Fix-the-tests-to-build-with-clang15.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-scripts-build-Fix-the-tests-to-build-with-clang15.patch
new file mode 100644
index 0000000..f07cef6
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-scripts-build-Fix-the-tests-to-build-with-clang15.patch
@@ -0,0 +1,239 @@
+From bcc6816b92ecf409357865589069a82883d589f7 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 6 Sep 2022 22:53:51 -0700
+Subject: [PATCH] scripts/build: Fix the tests to build with clang15
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ scripts/build | 50 +++++++++++++++++++++++++-------------------------
+ 1 file changed, 25 insertions(+), 25 deletions(-)
+
+--- a/scripts/build
++++ b/scripts/build
+@@ -21,7 +21,7 @@ trap 'rm -f ${BASE}$$.s ${BASE}$$.c ${BA
+ LDLIBS="$LDLIBS -lm"
+ 
+ # check for HP-UX's ANSI compiler
+-echo "main(int ac, char *av[]) { int i; }" > ${BASE}$$.c
++echo "int main(int ac, char *av[]) { int i; }" > ${BASE}$$.c
+ if ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c 1>${NULL} 2>${NULL}
+ then
+ 	true;
+@@ -39,7 +39,7 @@ arch=`echo $OS | awk -F- '{print $1;}'`
+ if [ "X$CC" = "Xcc" -a "X$arch" = "Xia64" ]
+ then
+ 	echo "#include <stdlib.h>" > ${BASE}$$.c
+-	echo "main(int ac, char *av[])" >> ${BASE}$$.c
++	echo "int main(int ac, char *av[])" >> ${BASE}$$.c
+ 	echo "{ long* p = (long*)malloc(sizeof(long));" >> ${BASE}$$.c
+ 	echo "*p = 0; exit((int)*p); }" >> ${BASE}$$.c
+ 	${CC} ${CFLAGS} +DD64 -o ${BASE}$$ ${BASE}$$.c 1>${NULL} 2>${NULL} \
+@@ -51,14 +51,14 @@ fi
+ 
+ # check for bcopy (optionally set the SYS5 flag)
+ echo "#include <string.h>" > ${BASE}$$.c
+-echo "main() { char a[256], b[256]; bcopy(a, b, 256); }" >> ${BASE}$$.c
++echo "int main() { char a[256], b[256]; bcopy(a, b, 256); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	|| CFLAGS="${CFLAGS} -DSYS5"
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ 
+ # check for valloc
+ echo "#include <stdlib.h>" > ${BASE}$$.c
+-echo "main() { char* buf = valloc(123); }" >> ${BASE}$$.c
++echo "int main() { char* buf = valloc(123); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	|| CFLAGS="${CFLAGS} -Dvalloc=malloc"
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -67,7 +67,7 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ echo "#include <stdlib.h>" > ${BASE}$$.c
+ echo "#include <sys/types.h>" >> ${BASE}$$.c
+ echo "#include <unistd.h>" >> ${BASE}$$.c
+-echo "main() { off64_t off; }" >> ${BASE}$$.c
++echo "int main() { off64_t off; }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_off64_t"
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -76,7 +76,7 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ echo "#include <stdlib.h>" > ${BASE}$$.c
+ echo "#include <sys/types.h>" >> ${BASE}$$.c
+ echo "#include <unistd.h>" >> ${BASE}$$.c
+-echo "main() { int fd = 0; off64_t off = 0; off = lseek64(fd, off, SEEK_SET); }" >> ${BASE}$$.c
++echo "int main() { int fd = 0; off64_t off = 0; off = lseek64(fd, off, SEEK_SET); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_lseek64"
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -88,7 +88,7 @@ echo "#include <sys/resource.h>" >> ${BA
+ echo "#ifndef RUSAGE_SELF" >> ${BASE}$$.c
+ echo "#define RUSAGE_SELF 0" >> ${BASE}$$.c
+ echo "#endif /* RUSAGE_SELF */" >> ${BASE}$$.c
+-echo "main() { struct rusage ru; getrusage(RUSAGE_SELF, &ru); }" >> ${BASE}$$.c
++echo "int main() { struct rusage ru; getrusage(RUSAGE_SELF, &ru); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DRUSAGE"
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -125,24 +125,24 @@ fi
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ 
+ # check for -lrpc (cygwin/Windows)
+-echo "extern int pmap_set(); main() { pmap_set(); }" >${BASE}$$.c
++echo "extern int pmap_set(void); int main() { pmap_set(); }" >${BASE}$$.c
+ if ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL}; then
+        true;
+-else
+-       ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} -lrpc 1>${NULL} 2>${NULL} \
+-               && LDLIBS="${LDLIBS} -lrpc"
++elif ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} -lrpc 1>${NULL} 2>${NULL}; then
++      LDLIBS="${LDLIBS} -lrpc"
++else ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} -ltirpc 1>${NULL} 2>${NULL} && LDLIBS="${LDLIBS} -ltirpc"
+ fi
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ 
+ # check for OSs that have S_IFFIFO instead of S_IFIFO
+ echo "#include <sys/stat.h>" > ${BASE}$$.c
+-echo "main() { return (S_IFIFO); }" >> ${BASE}$$.c
++echo "int main() { return (S_IFIFO); }" >> ${BASE}$$.c
+ if ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL}; then
+ 	true;
+ else
+ 	rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ 	echo "#include <sys/stat.h>" > ${BASE}$$.c
+-	echo "main() { return (S_IFFIFO); }" >> ${BASE}$$.c
++	echo "int main() { return (S_IFFIFO); }" >> ${BASE}$$.c
+ 	${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 		|| CFLAGS="${CFLAGS} -DS_IFIFO=S_IFFIFO"
+ fi
+@@ -151,7 +151,7 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ # check that we have uint
+ echo "#include <stdlib.h>" > ${BASE}$$.c
+ echo "#include <sys/types.h>" >> ${BASE}$$.c
+-echo "main() { uint i = 0; return (i); }" >> ${BASE}$$.c
++echo "int main() { uint i = 0; return (i); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_uint=1";
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -161,7 +161,7 @@ HAVE_uint64=0
+ echo "#include <stdlib.h>" > ${BASE}$$.c
+ echo "#include <sys/types.h>" >> ${BASE}$$.c
+ echo "#include <rpc/types.h>" >> ${BASE}$$.c
+-echo "main() { uint64 i = 0; return (int)(i); }" >> ${BASE}$$.c
++echo "int main() { uint64 i = 0; return (int)(i); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_uint64=1" && HAVE_uint64=1;
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -170,7 +170,7 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ if [ ${HAVE_uint64} = 0 ]; then
+     echo "#include <stdlib.h>" > ${BASE}$$.c
+     echo "#include <sys/types.h>" >> ${BASE}$$.c
+-    echo "main() { uint64_t i = 0; return (int)(i); }" >> ${BASE}$$.c
++    echo "int main() { uint64_t i = 0; return (int)(i); }" >> ${BASE}$$.c
+     ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_uint64_t=1";
+     rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -181,7 +181,7 @@ HAVE_int64=0
+ echo "#include <stdlib.h>" > ${BASE}$$.c
+ echo "#include <sys/types.h>" >> ${BASE}$$.c
+ echo "#include <rpc/types.h>" >> ${BASE}$$.c
+-echo "main() { int64 i = 0; return (int)(i); }" >> ${BASE}$$.c
++echo "int main() { int64 i = 0; return (int)(i); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_int64=1" && HAVE_int64=1;
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -190,7 +190,7 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ if [ ${HAVE_int64} = 0 ]; then
+     echo "#include <stdlib.h>" > ${BASE}$$.c
+     echo "#include <sys/types.h>" >> ${BASE}$$.c
+-    echo "main() { int64_t i = 0; return (int)(i); }" >> ${BASE}$$.c
++    echo "int main() { int64_t i = 0; return (int)(i); }" >> ${BASE}$$.c
+     ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_int64_t=1";
+     rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -209,7 +209,7 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ echo "#include <stdlib.h>" > ${BASE}$$.c
+ echo "#include <sys/types.h>" >> ${BASE}$$.c
+ echo "#include <sys/socket.h>" >> ${BASE}$$.c
+-echo "main() { int s; struct sockaddr dest; socklen_t len; getsockname(s, &dest, &len); }" >> ${BASE}$$.c
++echo "int main() { int s; struct sockaddr dest; socklen_t len; getsockname(s, &dest, &len); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_socklen_t"
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -217,7 +217,7 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ # check that we have drand48 and srand48
+ HAVE_RANDOM=0
+ echo "#include <stdlib.h>" > ${BASE}$$.c
+-echo "main() { srand48(973); return (int)(1.0E9 * drand48()); }" >> ${BASE}$$.c
++echo "int main() { srand48(973); return (int)(1.0E9 * drand48()); }" >> ${BASE}$$.c
+ if ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL}; then
+ 	CFLAGS="${CFLAGS} -DHAVE_DRAND48"
+ 	HAVE_RANDOM=1
+@@ -226,7 +226,7 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ 
+ if [ ${HAVE_RANDOM} -eq 0 ]; then
+     echo "#include <stdlib.h>" > ${BASE}$$.c
+-    echo "main() { srand(973); return (10 * rand()) / RAND_MAX; }" >> ${BASE}$$.c
++    echo "int main() { srand(973); return (10 * rand()) / RAND_MAX; }" >> ${BASE}$$.c
+     if ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL}; then
+ 	CFLAGS="${CFLAGS} -DHAVE_RAND"
+ 	HAVE_RANDOM=1
+@@ -236,7 +236,7 @@ fi
+ 
+ if [ ${HAVE_RANDOM} -eq 0 ]; then
+     echo "#include <stdlib.h>" > ${BASE}$$.c
+-    echo "main() { srandom(973); return (10 * random()) / RAND_MAX; }" >> ${BASE}$$.c
++    echo "int main() { srandom(973); return (10 * random()) / RAND_MAX; }" >> ${BASE}$$.c
+     if ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL}; then
+ 	CFLAGS="${CFLAGS} -DHAVE_RANDOM"
+ 	HAVE_RANDOM=1
+@@ -247,7 +247,7 @@ fi
+ # check that we have sysmp
+ echo "#include <sys/types.h>" > ${BASE}$$.c
+ echo "#include <sys/sysmp.h>" >> ${BASE}$$.c
+-echo "main() { return (int)sysmp(MP_NPROCS); }" >> ${BASE}$$.c
++echo "int main() { return (int)sysmp(MP_NPROCS); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_SYSMP=1";
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -257,7 +257,7 @@ echo "#include <stdlib.h>" > ${BASE}$$.c
+ echo "#include <unistd.h>" >> ${BASE}$$.c
+ echo "#include <sys/types.h>" >> ${BASE}$$.c
+ echo "#include <sys/processor.h>" >> ${BASE}$$.c
+-echo "main() { return bindprocessor(BINDPROCESS, getpid(), 0); }" >> ${BASE}$$.c
++echo "int main() { return bindprocessor(BINDPROCESS, getpid(), 0); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_BINDPROCESSOR=1";
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -267,16 +267,17 @@ echo "#include <stdlib.h>" > ${BASE}$$.c
+ echo "#include <sys/types.h>" >> ${BASE}$$.c
+ echo "#include <sys/processor.h>" >> ${BASE}$$.c
+ echo "#include <sys/procset.h>" >> ${BASE}$$.c
+-echo "main() { return processor(P_PID, P_MYPID, 0, NULL); }" >> ${BASE}$$.c
++echo "int main() { return processor(P_PID, P_MYPID, 0, NULL); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_BINDPROCESSOR=1";
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ 
+ # check that we have sched_setaffinity
+-echo "#include <stdlib.h>" > ${BASE}$$.c
++echo "#define _GNU_SOURCE" > ${BASE}$$.c
++echo "#include <stdlib.h>" >> ${BASE}$$.c
+ echo "#include <unistd.h>" >> ${BASE}$$.c
+ echo "#include <sched.h>" >> ${BASE}$$.c
+-echo "main() { unsigned long mask = 1; return sched_setaffinity(0, sizeof(unsigned long), &mask); }" >> ${BASE}$$.c
++echo "int main() { unsigned long mask = 1; return sched_setaffinity(0, sizeof(unsigned long), &mask); }" >> ${BASE}$$.c
+ ${CC} ${CFLAGS} -o ${BASE}$$ ${BASE}$$.c ${LDLIBS} 1>${NULL} 2>${NULL} \
+ 	&& CFLAGS="${CFLAGS} -DHAVE_SCHED_SETAFFINITY=1";
+ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+@@ -285,4 +286,4 @@ rm -f ${BASE}$$ ${BASE}$$.o ${BASE}$$.c
+ if [ ! -d ${BINDIR} ]; then mkdir -p ${BINDIR}; fi
+ 
+ # now go ahead and build everything!
+-${MAKE} OS="${OS}" CC="${CC}" CFLAGS="${CFLAGS}" LDLIBS="${LDLIBS}" O="${BINDIR}" $*
++${MAKE} OS="${OS}" CC="${CC}" CFLAGS="${CFLAGS}" LDLIBS="${LDLIBS}" O="${BINDIR}" ${EXTRA_OEMAKE} $*
+--- a/src/lib_sched.c
++++ b/src/lib_sched.c
+@@ -1,3 +1,4 @@
++#define _GNU_SOURCE
+ #include "bench.h"
+ 
+ /* #define _DEBUG */
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-src-Makefile-use-libdir-instead-of-hardcoded-lib.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-src-Makefile-use-libdir-instead-of-hardcoded-lib.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0001-src-Makefile-use-libdir-instead-of-hardcoded-lib.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0001-src-Makefile-use-libdir-instead-of-hardcoded-lib.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0002-build-Adjust-CFLAGS-LDFLAGS-to-append-values-passed-.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0002-build-Adjust-CFLAGS-LDFLAGS-to-append-values-passed-.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/0002-build-Adjust-CFLAGS-LDFLAGS-to-append-values-passed-.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/0002-build-Adjust-CFLAGS-LDFLAGS-to-append-values-passed-.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/fix-lmbench-memory-check-failure.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/fix-lmbench-memory-check-failure.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/fix-lmbench-memory-check-failure.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/fix-lmbench-memory-check-failure.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/lmbench_result_html_report.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/lmbench_result_html_report.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/lmbench_result_html_report.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/lmbench_result_html_report.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/obey-ranlib.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/obey-ranlib.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/obey-ranlib.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/obey-ranlib.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/update-config-script.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/update-config-script.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/update-config-script.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/update-config-script.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/update-results-script.patch b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/update-results-script.patch
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench-3.0-a9/update-results-script.patch
rename to meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench/update-results-script.patch
diff --git a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench_3.0-a9.bb b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench_3.0-a9.bb
index 2046b13..d74c28f 100644
--- a/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench_3.0-a9.bb
+++ b/meta-openembedded/meta-oe/recipes-benchmark/lmbench/lmbench_3.0-a9.bb
@@ -9,7 +9,6 @@
 
 DEPENDS += "libtirpc"
 CFLAGS += "-I${STAGING_INCDIR}/tirpc"
-LDLIBS += " -ltirpc "
 
 PR = "r2"
 
@@ -23,13 +22,14 @@
            file://fix-lmbench-memory-check-failure.patch \
            file://0001-avoid-gcc-optimize-away-the-loops.patch \
            file://0001-lat_http.c-Add-printf-format.patch \
-           file://0001-Check-for-musl-define-guard-before-redefining-sockle.patch \
            file://0002-build-Adjust-CFLAGS-LDFLAGS-to-append-values-passed-.patch \
            file://0001-src-Makefile-use-libdir-instead-of-hardcoded-lib.patch \
            file://0001-lmbench-Point-webpage-lm-to-target-directory.patch \
            file://0001-doc-Fix-typos-in-manual-pages.patch \
            file://0001-lat_fifo-Fix-cleanup-sequence.patch \
            file://0001-doc-Fix-typos-in-lat_unix_connect-manual-page.patch \
+           file://0001-bench.h-Fix-typo-in-specifying-string.h.patch \
+           file://0001-scripts-build-Fix-the-tests-to-build-with-clang15.patch \
            "
 SRC_URI[md5sum] = "b3351a3294db66a72e2864a199d37cbf"
 SRC_URI[sha256sum] = "cbd5777d15f44eab7666dcac418054c3c09df99826961a397d9acf43d8a2a551"
@@ -37,8 +37,11 @@
 UPSTREAM_CHECK_URI = "https://sourceforge.net/projects/lmbench/files/development/"
 UPSTREAM_CHECK_REGEX = "lmbench-(?P<pver>\d+(\.\d+)+-[a-z]+\d+)"
 
+export OS = "${TARGET_SYS}"
+export TARGET = "${TARGET_OS}"
+
 EXTRA_OEMAKE = 'CC="${CC}" AR="${AR}" RANLIB="${RANLIB}" CFLAGS="${CFLAGS}" \
-                LDFLAGS="${LDFLAGS}" LDLIBS="${LDLIBS}" LD="${LD}" OS="${TARGET_SYS}" \
+                LDFLAGS="${LDFLAGS}" LD="${LD}" OS="${TARGET_SYS}" \
                 TARGET="${TARGET_OS}" BASE="${prefix}" MANDIR="${mandir}"'
 
 do_configure() {
@@ -53,7 +56,7 @@
         CFLAGS="${CFLAGS} -DHAVE_uint"
     fi
     install -d ${S}/bin/${TARGET_SYS}
-    oe_runmake -C src
+    ${S}/scripts/build
 }
 
 do_install () {
diff --git a/meta-openembedded/meta-oe/recipes-bsp/con2fbmap/con2fbmap/0001-con2fbmap-Add-missing-include-on-string.h.patch b/meta-openembedded/meta-oe/recipes-bsp/con2fbmap/con2fbmap/0001-con2fbmap-Add-missing-include-on-string.h.patch
new file mode 100644
index 0000000..d1b75f1
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-bsp/con2fbmap/con2fbmap/0001-con2fbmap-Add-missing-include-on-string.h.patch
@@ -0,0 +1,28 @@
+From 0a948d8d91b789e9c9b60136fada75b1d464af63 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 18:31:27 -0700
+Subject: [PATCH] con2fbmap: Add missing include on string.h
+
+strerror() is from string.h therefore include it to fix -Wimplicit-function-declaration
+warnings
+
+Upstream-Status: Submitted [https://gitlab.com/pibox/con2fbmap/-/merge_requests/1]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/con2fbmap.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/con2fbmap.c b/src/con2fbmap.c
+index 7831931..7a2e870 100644
+--- a/src/con2fbmap.c
++++ b/src/con2fbmap.c
+@@ -1,5 +1,6 @@
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <string.h> /* for strerror */
+ #include <unistd.h>
+ #include <fcntl.h>
+ #include <sys/ioctl.h>
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-bsp/con2fbmap/con2fbmap_git.bb b/meta-openembedded/meta-oe/recipes-bsp/con2fbmap/con2fbmap_git.bb
index 5d1c5ba..302a61e 100644
--- a/meta-openembedded/meta-oe/recipes-bsp/con2fbmap/con2fbmap_git.bb
+++ b/meta-openembedded/meta-oe/recipes-bsp/con2fbmap/con2fbmap_git.bb
@@ -9,7 +9,9 @@
 DEPENDS = ""
 
 SRCREV = "61ed2f28b294b1ebeb767df8cb5fcd391709c8e2"
-SRC_URI = "git://gitlab.com/pibox/con2fbmap.git;protocol=https;branch=master"
+SRC_URI = "git://gitlab.com/pibox/con2fbmap.git;protocol=https;branch=master \
+           file://0001-con2fbmap-Add-missing-include-on-string.h.patch \
+           "
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd/c54ae9c524998e449b822feb465a0c90317cd735.patch b/meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd/c54ae9c524998e449b822feb465a0c90317cd735.patch
new file mode 100644
index 0000000..947c764
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd/c54ae9c524998e449b822feb465a0c90317cd735.patch
@@ -0,0 +1,26 @@
+From c54ae9c524998e449b822feb465a0c90317cd735 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 20 Sep 2022 15:10:26 +0100
+Subject: [PATCH] trivial: Fix compile when using python 3.7 or older
+
+Upstream-Status: Backport [https://github.com/fwupd/fwupd/pull/5051]
+Signed-off-by: Richard Hughes <richard@hughsie.com>
+---
+ contrib/generate-gresource-xml.py | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/contrib/generate-gresource-xml.py b/contrib/generate-gresource-xml.py
+index 235a770020..569c804937 100755
+--- a/contrib/generate-gresource-xml.py
++++ b/contrib/generate-gresource-xml.py
+@@ -23,6 +23,9 @@
+         n_file.set("preprocess", "xml-stripblanks")
+     n_file.set("alias", os.path.basename(fn))
+ with open(sys.argv[1], "wb") as f:
+-    f.write(ET.tostring(root, "utf-8", xml_declaration=True))
++    try:
++        f.write(ET.tostring(root, "utf-8", xml_declaration=True))
++    except TypeError:
++        f.write(ET.tostring(root, "utf-8"))
+ 
+ sys.exit(0)
diff --git a/meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd_1.8.1.bb b/meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd_1.8.4.bb
similarity index 94%
rename from meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd_1.8.1.bb
rename to meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd_1.8.4.bb
index 7576b84..72f37ae 100644
--- a/meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd_1.8.1.bb
+++ b/meta-openembedded/meta-oe/recipes-bsp/fwupd/fwupd_1.8.4.bb
@@ -2,11 +2,12 @@
 LICENSE = "LGPL-2.1-or-later"
 LIC_FILES_CHKSUM = "file://COPYING;md5=4fbd65380cdd255951079008b364516c"
 
-DEPENDS = "glib-2.0 libxmlb json-glib sqlite3 libjcat gcab vala-native"
+DEPENDS = "glib-2.0 libxmlb json-glib libjcat gcab vala-native"
 
 SRC_URI = "https://github.com/${BPN}/${BPN}/releases/download/${PV}/${BP}.tar.xz \
+           file://c54ae9c524998e449b822feb465a0c90317cd735.patch \
            file://run-ptest"
-SRC_URI[sha256sum] = "5756466442eb9134fb40cdc8a46f9e01003247c8eac82fa5d8b16f6420375b79"
+SRC_URI[sha256sum] = "adfa07434cdc29ec41c40fef460e8d970963fe0c7e849dec7f3932adb161f886"
 
 UPSTREAM_CHECK_URI = "https://github.com/${BPN}/${BPN}/releases"
 
@@ -47,7 +48,6 @@
                    plugin_synaptics_mst \
                    plugin_synaptics_rmi \
                    plugin_scsi \
-                   plugin_thunderbolt \
                    plugin_uf2 \
                    plugin_upower \
                    sqlite"
@@ -99,7 +99,6 @@
 PACKAGECONFIG[plugin_scsi] = "-Dplugin_scsi=true,-Dplugin_scsi=false"
 PACKAGECONFIG[plugin_synaptics_mst] = "-Dplugin_synaptics_mst=true,-Dplugin_synaptics_mst=false"
 PACKAGECONFIG[plugin_synaptics_rmi] = "-Dplugin_synaptics_rmi=true,-Dplugin_synaptics_rmi=false"
-PACKAGECONFIG[plugin_thunderbolt] = "-Dplugin_thunderbolt=true,-Dplugin_thunderbolt=false"
 PACKAGECONFIG[plugin_tpm] = "-Dplugin_tpm=true,-Dplugin_tpm=false,tpm2-tss"
 # Turn off the capsule splash as it needs G-I at buildtime, which isn't currently supported
 PACKAGECONFIG[plugin_uefi_capsule] = "-Dplugin_uefi_capsule=true -Dplugin_uefi_capsule_splash=false,-Dplugin_uefi_capsule=false,efivar fwupd-efi"
@@ -108,7 +107,7 @@
 PACKAGECONFIG[plugin_upower] = "-Dplugin_upower=true,-Dplugin_upower=false"
 
 # Always disable these plugins on non-x86 platforms as they don't compile or are useless
-DISABLE_NON_X86 = "plugin_amt plugin_intel_spi plugin_msr plugin_thunderbolt"
+DISABLE_NON_X86 = "plugin_amt plugin_intel_spi plugin_msr"
 DISABLE_NON_X86:x86 = ""
 DISABLE_NON_X86:x86-64 = ""
 PACKAGECONFIG:remove = "${DISABLE_NON_X86}"
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/gensio/files/0001-configure-Check-for-request_init-API-when-probing-fo.patch b/meta-openembedded/meta-oe/recipes-connectivity/gensio/files/0001-configure-Check-for-request_init-API-when-probing-fo.patch
new file mode 100644
index 0000000..3b4caa2
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-connectivity/gensio/files/0001-configure-Check-for-request_init-API-when-probing-fo.patch
@@ -0,0 +1,31 @@
+From f56e59712d8708c74a4cf66d24b5703fc16b22ae Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 11:31:48 -0700
+Subject: [PATCH] configure: Check for request_init API when probing for
+ libwrap
+
+checking for main() is not right check, since this function is not part
+of libwrap but the app. Newer autocof and toolchain may fail
+
+Upstream-Status: Submitted [https://github.com/cminyard/gensio/pull/48]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index 90cfcb6b..1c06d918 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -109,7 +109,7 @@ AC_ARG_WITH(tcp-wrappers,
+ if test "$tcp_wrappers" != "no"
+ then
+   AC_CHECK_HEADERS([tcpd.h])
+-  AC_CHECK_LIB(wrap,main)
++  AC_CHECK_LIB(wrap,request_init)
+ fi
+ 
+ AC_ARG_ENABLE([doc],
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/gensio/gensio_2.5.2.bb b/meta-openembedded/meta-oe/recipes-connectivity/gensio/gensio_2.5.2.bb
index 9de2120..d6b5f19 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/gensio/gensio_2.5.2.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/gensio/gensio_2.5.2.bb
@@ -9,6 +9,7 @@
 
 SRC_URI = "git://github.com/cminyard/gensio;protocol=https;branch=master \
            file://0001-tools-gensiot-Fix-build-with-musl.patch \
+           file://0001-configure-Check-for-request_init-API-when-probing-fo.patch \
 "
 
 S = "${WORKDIR}/git"
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/iwd/iwd_1.29.bb b/meta-openembedded/meta-oe/recipes-connectivity/iwd/iwd_1.30.bb
similarity index 94%
rename from meta-openembedded/meta-oe/recipes-connectivity/iwd/iwd_1.29.bb
rename to meta-openembedded/meta-oe/recipes-connectivity/iwd/iwd_1.30.bb
index eada93b..1f0bcd4 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/iwd/iwd_1.29.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/iwd/iwd_1.30.bb
@@ -8,7 +8,7 @@
 SRC_URI = "https://www.kernel.org/pub/linux/network/wireless/${BP}.tar.xz \
            file://0001-build-Use-abs_top_srcdir-instead-of-abs_srcdir-for-e.patch \
            "
-SRC_URI[sha256sum] = "71533fe3b8f6381f24832198ba11d00f04a361454770c173b3b66bc3cdf272bd"
+SRC_URI[sha256sum] = "9fd13512dc27d83efb8d341f7df98f5488f70131686021fcd0d93fc97af013b8"
 
 inherit autotools manpages pkgconfig python3native systemd
 
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/libimobiledevice/libimobiledevice_git.bb b/meta-openembedded/meta-oe/recipes-connectivity/libimobiledevice/libimobiledevice_git.bb
index 3dfd4e9..5217687 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/libimobiledevice/libimobiledevice_git.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/libimobiledevice/libimobiledevice_git.bb
@@ -17,3 +17,5 @@
 inherit autotools pkgconfig
 
 EXTRA_OECONF = " --without-cython "
+
+CFLAGS += "-D_GNU_SOURCE"
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/linuxptp/linuxptp/0001-include-string.h-for-strncpy.patch b/meta-openembedded/meta-oe/recipes-connectivity/linuxptp/linuxptp/0001-include-string.h-for-strncpy.patch
new file mode 100644
index 0000000..60f2079
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-connectivity/linuxptp/linuxptp/0001-include-string.h-for-strncpy.patch
@@ -0,0 +1,27 @@
+From d49b4fb8063ecd89617587e5ea566cc9da9393ef Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 19:09:18 -0700
+Subject: [PATCH] include string.h for strncpy()
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ interface.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/interface.c b/interface.c
+index 65bdff0..5228db9 100644
+--- a/interface.c
++++ b/interface.c
+@@ -5,6 +5,7 @@
+  * @note SPDX-License-Identifier: GPL-2.0+
+  */
+ #include <stdlib.h>
++#include <string.h>
+ #include "interface.h"
+ 
+ struct interface {
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/linuxptp/linuxptp_3.1.1.bb b/meta-openembedded/meta-oe/recipes-connectivity/linuxptp/linuxptp_3.1.1.bb
index 9c0f56e..bb4871e 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/linuxptp/linuxptp_3.1.1.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/linuxptp/linuxptp_3.1.1.bb
@@ -6,6 +6,7 @@
 SRC_URI = "http://sourceforge.net/projects/linuxptp/files/v3.1/linuxptp-${PV}.tgz \
            file://build-Allow-CC-and-prefix-to-be-overriden.patch \
            file://Use-cross-cpp-in-incdefs.patch \
+           file://0001-include-string.h-for-strncpy.patch \
            "
 
 UPSTREAM_CHECK_URI = "https://sourceforge.net/projects/linuxptp/files/"
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/modemmanager/modemmanager_1.18.10.bb b/meta-openembedded/meta-oe/recipes-connectivity/modemmanager/modemmanager_1.18.12.bb
similarity index 97%
rename from meta-openembedded/meta-oe/recipes-connectivity/modemmanager/modemmanager_1.18.10.bb
rename to meta-openembedded/meta-oe/recipes-connectivity/modemmanager/modemmanager_1.18.12.bb
index 9316cbb..b06e3eb 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/modemmanager/modemmanager_1.18.10.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/modemmanager/modemmanager_1.18.12.bb
@@ -12,7 +12,7 @@
 
 DEPENDS = "glib-2.0 libgudev libxslt-native dbus"
 
-SRCREV ?= "1338749739e0c01bdbc6ddf92388a781806ec3ac"
+SRCREV ?= "c234bd55c9d9618c1478b5e80aaf4b8f965be181"
 
 # Patch 0001 will be in ModemManager > 1.19
 SRC_URI = " \
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/ser2net/ser2net_4.3.7.bb b/meta-openembedded/meta-oe/recipes-connectivity/ser2net/ser2net_4.3.8.bb
similarity index 83%
rename from meta-openembedded/meta-oe/recipes-connectivity/ser2net/ser2net_4.3.7.bb
rename to meta-openembedded/meta-oe/recipes-connectivity/ser2net/ser2net_4.3.8.bb
index 13ce13a..c8108d6 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/ser2net/ser2net_4.3.7.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/ser2net/ser2net_4.3.8.bb
@@ -9,7 +9,7 @@
 
 SRC_URI = "${SOURCEFORGE_MIRROR}/project/ser2net/ser2net/ser2net-${PV}.tar.gz"
 
-SRC_URI[sha256sum] = "542915e240ae8b5c7dcec8d1589e6067818532900d45cfef226cea9f0e671d13"
+SRC_URI[sha256sum] = "e5620975523059a38709bb53c0567600adbbcb8011066a2d2fe1b4db9efe0ba3"
 
 UPSTREAM_CHECK_URI = "http://sourceforge.net/projects/ser2net/files/ser2net"
 
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/thrift/thrift_0.16.0.bb b/meta-openembedded/meta-oe/recipes-connectivity/thrift/thrift_0.17.0.bb
similarity index 95%
rename from meta-openembedded/meta-oe/recipes-connectivity/thrift/thrift_0.16.0.bb
rename to meta-openembedded/meta-oe/recipes-connectivity/thrift/thrift_0.17.0.bb
index 8141abe..24d07b8 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/thrift/thrift_0.16.0.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/thrift/thrift_0.17.0.bb
@@ -11,7 +11,7 @@
 SRC_URI = "https://www-eu.apache.org/dist/thrift//${PV}/${BPN}-${PV}.tar.gz \
            file://0001-DefineInstallationPaths.cmake-Define-libdir-in-terms.patch \
           "
-SRC_URI[sha256sum] = "f460b5c1ca30d8918ff95ea3eb6291b3951cf518553566088f3f2be8981f6209"
+SRC_URI[sha256sum] = "b272c1788bb165d99521a2599b31b97fa69e5931d099015d91ae107a0b0cc58f"
 
 BBCLASSEXTEND = "native nativesdk"
 
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/files/0001-wfa_cmdproc-Store-return-value-into-location.patch b/meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/files/0001-wfa_cmdproc-Store-return-value-into-location.patch
new file mode 100644
index 0000000..074f0fe
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/files/0001-wfa_cmdproc-Store-return-value-into-location.patch
@@ -0,0 +1,32 @@
+From 2b7d6cf62296ff4e25e5ad909aa39a257bf9ff78 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 19:37:35 -0700
+Subject: [PATCH] wfa_cmdproc: Store return value into location
+
+Fixes
+| wfa_cmdproc.c:100:20: error: incompatible integer to pointer conversion assigning to 'int *' from 'int' [-Wint-conversion]
+|         paramValue = atoi(str);
+|                    ^ ~~~~~~~~~
+
+Upstream-Status: Submitted [https://github.com/Wi-FiTestSuite/Wi-FiTestSuite-Linux-DUT/pull/57]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ lib/wfa_cmdproc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/wfa_cmdproc.c b/lib/wfa_cmdproc.c
+index 9f5fd16..49a3f16 100644
+--- a/lib/wfa_cmdproc.c
++++ b/lib/wfa_cmdproc.c
+@@ -97,7 +97,7 @@ int getParamValueInt(char *pcmdStr, char *pParam, int *paramValue)
+     if(strcasecmp(pcmdStr, pParam) == 0)
+     {
+         str = strtok_r(NULL, ",", &pcmdStr);
+-        paramValue = atoi(str);
++        *paramValue = atoi(str);
+         return 0;
+     }
+     return -1;
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/wifi-test-suite_git.bb b/meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/wifi-test-suite_10.10.1.bb
similarity index 92%
rename from meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/wifi-test-suite_git.bb
rename to meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/wifi-test-suite_10.10.1.bb
index 490f832..eda01ca 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/wifi-test-suite_git.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/wifi-test-suite/wifi-test-suite_10.10.1.bb
@@ -8,13 +8,14 @@
 SECTION = "test"
 
 S = "${WORKDIR}/git"
-SRCREV = "2da947374c8324f88a0e2155aeba4cf75464b0d8"
+SRCREV = "12e85fbeca8ca21a632d18e55089a8a7606d64aa"
 SRC_URI = "git://github.com/Wi-FiTestSuite/Wi-FiTestSuite-Linux-DUT.git;branch=master;protocol=https \
 	file://0001-Use-toolchain-from-environment-variables.patch \
 	file://0002-Add-missing-include-removes-unnedded-stuff-and-add-n.patch \
 	file://0003-fix-path-to-usr-sbin-for-script-and-make-script-for-.patch \
 	file://0004-run-ranlib-per-library-and-use-AR.patch \
         file://fno-common.patch \
+        file://0001-wfa_cmdproc-Store-return-value-into-location.patch \
 "
 
 # to avoid host path QA error
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/wvdial/wvstreams_4.6.1.bb b/meta-openembedded/meta-oe/recipes-connectivity/wvdial/wvstreams_4.6.1.bb
index d73c646..3453f3e 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/wvdial/wvstreams_4.6.1.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/wvdial/wvstreams_4.6.1.bb
@@ -4,7 +4,7 @@
 LICENSE = "LGPL-2.0-only"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=55ca817ccb7d5b5b66355690e9abc605"
 
-DEPENDS = "zlib openssl (>= 0.9.8) dbus readline"
+DEPENDS = "zlib openssl (>= 0.9.8) dbus readline boost"
 DEPENDS:append:libc-musl = " argp-standalone libexecinfo"
 
 SRC_URI = "https://storage.googleapis.com/google-code-archive-downloads/v2/code.google.com/${BPN}/${BP}.tar.gz \
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix/0001-initialize-msghdr-portably.patch b/meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix/0001-initialize-msghdr-portably.patch
new file mode 100644
index 0000000..a7a1dc1
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix/0001-initialize-msghdr-portably.patch
@@ -0,0 +1,52 @@
+From 37b662cf44e14b33e93759d4b129d23095b41372 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 23:55:02 -0700
+Subject: [PATCH] initialize msghdr portably
+
+on linux we have musl as an option for system C library and current
+initialization assumes the structure to be same as glibc, therefore
+initialize the elements of structure instead, so it can work on both
+glibc and musl.
+
+Fixes
+net.c:115:79: error: incompatible pointer to integer conversion initializing 'int' with an expression of type 'void *' [-Wint-conversion]
+        struct msghdr           s_msg = { (void *)&s_sa, sizeof(struct sockaddr_nl), s_io, 1, NULL, 0, 0};
+                                                                                              ^~~~
+
+Upstream-Status: Submitted [https://github.com/zabbix/zabbix/pull/77]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/libs/zbxsysinfo/linux/net.c | 12 ++++++++++--
+ 1 file changed, 10 insertions(+), 2 deletions(-)
+
+diff --git a/src/libs/zbxsysinfo/linux/net.c b/src/libs/zbxsysinfo/linux/net.c
+index 1425a3604d..98f03de0dd 100644
+--- a/src/libs/zbxsysinfo/linux/net.c
++++ b/src/libs/zbxsysinfo/linux/net.c
+@@ -114,13 +114,21 @@ static int	find_tcp_port_by_state_nl(unsigned short port, int state, int *found)
+ 
+ 	struct sockaddr_nl	s_sa = { AF_NETLINK, 0, 0, 0 };
+ 	struct iovec		s_io[1] = { { &request, sizeof(request) } };
+-	struct msghdr		s_msg = { (void *)&s_sa, sizeof(struct sockaddr_nl), s_io, 1, NULL, 0, 0};
++	struct msghdr		s_msg = { 0 };
++	s_msg.msg_name 		= (void *)&s_sa;
++	s_msg.msg_namelen 	= sizeof(struct sockaddr_nl);
++	s_msg.msg_iov 		= s_io;
++	s_msg.msg_iovlen 	= 1;
+ 
+ 	char			buffer[BUFSIZ] = { 0 };
+ 
+ 	struct sockaddr_nl	r_sa = { AF_NETLINK, 0, 0, 0 };
+ 	struct iovec		r_io[1] = { { buffer, BUFSIZ } };
+-	struct msghdr		r_msg = { (void *)&r_sa, sizeof(struct sockaddr_nl), r_io, 1, NULL, 0, 0};
++	struct msghdr		r_msg = { 0 };
++	r_msg.msg_name 		= (void *)&r_sa;
++	r_msg.msg_namelen 	= sizeof(struct sockaddr_nl);
++	r_msg.msg_iov 		= r_io;
++	r_msg.msg_iovlen 	= 1;
+ 
+ 	struct nlmsghdr		*r_hdr;
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix_6.2.1.bb b/meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix_6.2.3.bb
similarity index 94%
rename from meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix_6.2.1.bb
rename to meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix_6.2.3.bb
index 9949bd8..58d1f9e 100644
--- a/meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix_6.2.1.bb
+++ b/meta-openembedded/meta-oe/recipes-connectivity/zabbix/zabbix_6.2.3.bb
@@ -25,10 +25,10 @@
 
 SRC_URI = "https://cdn.zabbix.com/zabbix/sources/stable/6.2/${BPN}-${PV}.tar.gz \
     file://0001-Fix-configure.ac.patch \
+    file://0001-initialize-msghdr-portably.patch \
     file://zabbix-agent.service \
 "
-
-SRC_URI[sha256sum] = "f3d6b7cf4e67d820ce7d28cd54ac67724f7453f261f668877e6410cd21ab9ea1"
+SRC_URI[sha256sum] = "2be7e57fb33a55fee71480598e317ffa6a8ee5a39639a7e1b42b2ea6872107b5"
 
 inherit autotools-brokensep linux-kernel-base pkgconfig systemd useradd
 
diff --git a/meta-openembedded/meta-oe/recipes-core/dbus/dbus-broker_31.bb b/meta-openembedded/meta-oe/recipes-core/dbus/dbus-broker_32.bb
similarity index 92%
rename from meta-openembedded/meta-oe/recipes-core/dbus/dbus-broker_31.bb
rename to meta-openembedded/meta-oe/recipes-core/dbus/dbus-broker_32.bb
index 6eb45ed..900b5e3 100644
--- a/meta-openembedded/meta-oe/recipes-core/dbus/dbus-broker_31.bb
+++ b/meta-openembedded/meta-oe/recipes-core/dbus/dbus-broker_32.bb
@@ -7,7 +7,7 @@
 LIC_FILES_CHKSUM = "file://LICENSE;md5=7b486c2338d225a1405d979ed2c15ce8"
 
 SRC_URI = "https://github.com/bus1/dbus-broker/releases/download/v${PV}/dbus-broker-${PV}.tar.xz"
-SRC_URI[sha256sum] = "f229d7f1de8a3e344d54a4a30fbcb545387549e968009c06b87f7f1196c97496"
+SRC_URI[sha256sum] = "bea7f653e7251063c5f427e9e3f93562d38a0d8667ae6d49fb56f113605985de"
 
 UPSTREAM_CHECK_URI = "https://github.com/bus1/${BPN}/releases"
 
diff --git a/meta-openembedded/meta-oe/recipes-core/opencl/ocl-icd_2.3.1.bb b/meta-openembedded/meta-oe/recipes-core/opencl/ocl-icd_2.3.1.bb
index ae9c5bc..250cb68 100644
--- a/meta-openembedded/meta-oe/recipes-core/opencl/ocl-icd_2.3.1.bb
+++ b/meta-openembedded/meta-oe/recipes-core/opencl/ocl-icd_2.3.1.bb
@@ -15,4 +15,4 @@
 
 DEPENDS = "ruby-native"
 
-BBCLASSEXTEND = "native"
+BBCLASSEXTEND = "native nativesdk"
diff --git a/meta-openembedded/meta-oe/recipes-core/opencl/opencl-headers_git.bb b/meta-openembedded/meta-oe/recipes-core/opencl/opencl-headers_git.bb
index 4ecb8fc..8ad7d9d 100644
--- a/meta-openembedded/meta-oe/recipes-core/opencl/opencl-headers_git.bb
+++ b/meta-openembedded/meta-oe/recipes-core/opencl/opencl-headers_git.bb
@@ -5,8 +5,8 @@
 SECTION = "base"
 
 S = "${WORKDIR}/git"
-PV = "2022.05.18"
-SRCREV = "def8be9d35fda35492b72f54a94515f7df8d1e9f"
+PV = "2022.09.23"
+SRCREV = "5e2f8c66476bcf60eb8c2921807e34efb30294ee"
 SRC_URI = "git://github.com/KhronosGroup/OpenCL-Headers.git;branch=main;protocol=https"
 
 do_configure[noexec] = "1"
diff --git a/meta-openembedded/meta-oe/recipes-core/packagegroups/packagegroup-meta-oe.bb b/meta-openembedded/meta-oe/recipes-core/packagegroups/packagegroup-meta-oe.bb
index 756ba46..cd99fb4 100644
--- a/meta-openembedded/meta-oe/recipes-core/packagegroups/packagegroup-meta-oe.bb
+++ b/meta-openembedded/meta-oe/recipes-core/packagegroups/packagegroup-meta-oe.bb
@@ -222,7 +222,6 @@
     psqlodbc \
     rocksdb \
     soci \
-    sqlite \
 "
 RDEPENDS:packagegroup-meta-oe-dbs:remove:libc-musl:powerpc = "rocksdb"
 
diff --git a/meta-openembedded/meta-oe/recipes-core/pim435/pim435_git.bb b/meta-openembedded/meta-oe/recipes-core/pim435/pim435_git.bb
index f73a0fd..80e3cc6 100644
--- a/meta-openembedded/meta-oe/recipes-core/pim435/pim435_git.bb
+++ b/meta-openembedded/meta-oe/recipes-core/pim435/pim435_git.bb
@@ -9,8 +9,8 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSES/MIT.txt;md5=7dda4e90ded66ab88b86f76169f28663"
 
-SRC_URI = "git://booting.oniroproject.org/distro/components/pim435;protocol=https;branch=main"
-SRCREV = "ee07a83de4d0ecdf4b5de20a7e374d36a9a6f5d5"
+SRC_URI = "git://gitlab.eclipse.org/eclipse/oniro-blueprints/core/pim435;protocol=https;branch=main"
+SRCREV = "445ed623ec8d3ecbb1d566900b4ef3fb3031d689"
 S = "${WORKDIR}/git"
 
 DEPENDS = "i2c-tools"
diff --git a/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-1.1.0/7f437a6e06d2ec3abd3e2fd1101ab6aab386bc44.patch b/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-1.1.0/7f437a6e06d2ec3abd3e2fd1101ab6aab386bc44.patch
deleted file mode 100644
index 641935f..0000000
--- a/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-1.1.0/7f437a6e06d2ec3abd3e2fd1101ab6aab386bc44.patch
+++ /dev/null
@@ -1,54 +0,0 @@
-commit 7f437a6e06d2ec3abd3e2fd1101ab6aab386bc44
-Author: Stanislav Angelovic <stanislav.angelovic@siemens.com>
-Date:   Thu Jan 27 13:38:19 2022 +0100
-
-    fix(tests): printer for std::chrono in googletest v1.11.0
-
-diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
-index fbf198c..ec42e55 100644
---- a/tests/CMakeLists.txt
-+++ b/tests/CMakeLists.txt
-@@ -19,7 +19,7 @@ if (NOT TARGET GTest::gmock)
-     if (NOT TARGET GTest::gmock)
-         include(FetchContent)
- 
--        message("Fetching googletest...")
-+        message("Fetching googletest v${GOOGLETEST_VERSION}...")
-         FetchContent_Declare(googletest
-                             GIT_REPOSITORY ${GOOGLETEST_GIT_REPO}
-                             GIT_TAG        release-${GOOGLETEST_VERSION}
-diff --git a/tests/integrationtests/Defs.h b/tests/integrationtests/Defs.h
-index 2f129a4..2bfc7c9 100644
---- a/tests/integrationtests/Defs.h
-+++ b/tests/integrationtests/Defs.h
-@@ -56,20 +56,18 @@ const bool DEFAULT_BLOCKING_VALUE{true};
- 
- constexpr const double DOUBLE_VALUE{3.24L};
- 
--/** Duration stream operator for human readable gtest value output.
-- *
-- * Note that the conversion to double is lossy if the input type has 64 or more bits.
-- * This is ok for our integration tests because they don't have very
-- * accurate timing requirements.
-- *
-- * @return human readable duration in seconds
-- */
-+}}
-+
-+namespace testing::internal {
-+
-+// Printer for std::chrono::duration types.
-+// This is a workaround, since it's not a good thing to add this to std namespace.
- template< class Rep, class Period >
--static std::ostream& operator<<(std::ostream& os, const std::chrono::duration<Rep, Period>& d)
--{
-+void PrintTo(const ::std::chrono::duration<Rep, Period>& d, ::std::ostream* os) {
-     auto seconds = std::chrono::duration_cast<std::chrono::duration<double>>(d);
--    return os << seconds.count() << " s";
-+    *os << seconds.count() << "s";
-+}
-+
- }
--}}
- 
- #endif /* SDBUS_CPP_INTEGRATIONTESTS_DEFS_H_ */
diff --git a/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-1.1.0/run-ptest b/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-1.2.0/run-ptest
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-1.1.0/run-ptest
rename to meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-1.2.0/run-ptest
diff --git a/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-tools_1.1.0.bb b/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-tools_1.2.0.bb
similarity index 88%
rename from meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-tools_1.1.0.bb
rename to meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-tools_1.2.0.bb
index 01f66c3..27b965a 100644
--- a/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-tools_1.1.0.bb
+++ b/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++-tools_1.2.0.bb
@@ -8,7 +8,7 @@
 
 DEPENDS += "expat"
 
-SRCREV = "bca8e81037766a0454740c40307eea32831c101f"
+SRCREV = "751c1addc4fd2f949a466f488c1b7de2ca3b76dc"
 SRC_URI = "git://github.com/Kistler-Group/sdbus-cpp.git;protocol=https;branch=master;subpath=tools"
 
 S = "${WORKDIR}/tools"
diff --git a/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++_1.1.0.bb b/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++_1.2.0.bb
similarity index 91%
rename from meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++_1.1.0.bb
rename to meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++_1.2.0.bb
index 107dbc6..75cd815 100644
--- a/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++_1.1.0.bb
+++ b/meta-openembedded/meta-oe/recipes-core/sdbus-c++/sdbus-c++_1.2.0.bb
@@ -16,11 +16,10 @@
 
 DEPENDS += "expat"
 
-SRCREV = "bca8e81037766a0454740c40307eea32831c101f"
+SRCREV = "751c1addc4fd2f949a466f488c1b7de2ca3b76dc"
 
 SRC_URI = "git://github.com/Kistler-Group/sdbus-cpp.git;protocol=https;branch=master"
-SRC_URI += "file://run-ptest \
-            file://7f437a6e06d2ec3abd3e2fd1101ab6aab386bc44.patch "
+SRC_URI += "file://run-ptest"
 
 EXTRA_OECMAKE = "-DBUILD_CODE_GEN=OFF \
                  -DBUILD_DOC=ON \
diff --git a/meta-openembedded/meta-oe/recipes-core/toybox/toybox/0001-portability-Avoid-glibc-and-linux-mount.h-conflict.patch b/meta-openembedded/meta-oe/recipes-core/toybox/toybox/0001-portability-Avoid-glibc-and-linux-mount.h-conflict.patch
deleted file mode 100644
index 689ee2a..0000000
--- a/meta-openembedded/meta-oe/recipes-core/toybox/toybox/0001-portability-Avoid-glibc-and-linux-mount.h-conflict.patch
+++ /dev/null
@@ -1,161 +0,0 @@
-From 89000d9cb226cd864fa247f2428c9eaf7f414882 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sun, 14 Aug 2022 10:02:15 -0700
-Subject: [PATCH] portability: Avoid glibc and linux mount.h conflict
-
-With glibc 2.36+ linux/mount.h> and <sys/mount.h> headers are
-no longer directly compatible
-
-Upstream-Status: Submitted [https://github.com/landley/toybox/pull/364]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- lib/portability.h | 6 +++++-
- 1 file changed, 5 insertions(+), 1 deletion(-)
-
---- a/lib/portability.h
-+++ b/lib/portability.h
-@@ -180,11 +180,29 @@ void *memmem(const void *haystack, size_
- #endif
- 
- // Linux headers not listed by POSIX or LSB
--#include <sys/mount.h>
- #ifdef __linux__
- #include <sys/statfs.h>
- #include <sys/swap.h>
- #include <sys/sysinfo.h>
-+
-+#ifndef BLKDISCARD
-+#define BLKDISCARD _IO(0x12,119)
-+#endif
-+#ifndef BLKSECDISCARD
-+#define BLKSECDISCARD _IO(0x12,125)
-+#endif
-+#ifndef BLKZEROOUT
-+#define BLKZEROOUT _IO(0x12,127)
-+#endif
-+#ifndef FIFREEZE
-+#define FIFREEZE        _IOWR('X', 119, int)    /* Freeze */
-+#endif
-+#ifndef FITHAW
-+#define FITHAW          _IOWR('X', 120, int)    /* Thaw */
-+#endif
-+
-+#else
-+#include <sys/mount.h>
- #endif
- 
- #ifdef __APPLE__
---- a/toys/other/switch_root.c
-+++ b/toys/other/switch_root.c
-@@ -19,6 +19,7 @@ config SWITCH_ROOT
- 
- #define FOR_switch_root
- #include "toys.h"
-+#include <sys/mount.h>
- #include <sys/vfs.h>
- 
- GLOBALS(
---- a/toys/other/blkdiscard.c
-+++ b/toys/other/blkdiscard.c
-@@ -31,8 +31,7 @@ config BLKDISCARD
- 
- #define FOR_blkdiscard
- #include "toys.h"
--
--#include <linux/fs.h>
-+#include <sys/mount.h>
- 
- GLOBALS(
-   long o, l;
---- a/toys/other/blockdev.c
-+++ b/toys/other/blockdev.c
-@@ -31,7 +31,7 @@ config BLOCKDEV
- 
- #define FOR_blockdev
- #include "toys.h"
--#include <linux/fs.h>
-+#include <sys/mount.h>
- 
- GLOBALS(
-   long setbsz, setra;
---- a/toys/other/fsfreeze.c
-+++ b/toys/other/fsfreeze.c
-@@ -18,7 +18,6 @@ config FSFREEZE
- 
- #define FOR_fsfreeze
- #include "toys.h"
--#include <linux/fs.h>
- 
- void fsfreeze_main(void)
- {
---- a/lib/portability.c
-+++ b/lib/portability.c
-@@ -5,7 +5,7 @@
-  */
- 
- #include "toys.h"
--
-+#include <sys/mount.h>
- // We can't fork() on nommu systems, and vfork() requires an exec() or exit()
- // before resuming the parent (because they share a heap until then). And no,
- // we can't implement our own clone() call that does the equivalent of fork()
---- a/toys/lsb/mount.c
-+++ b/toys/lsb/mount.c
-@@ -58,6 +58,7 @@ config MOUNT
- 
- #define FOR_mount
- #include "toys.h"
-+#include <sys/mount.h>
- 
- GLOBALS(
-   struct arg_list *o;
---- a/toys/lsb/umount.c
-+++ b/toys/lsb/umount.c
-@@ -30,6 +30,7 @@ config UMOUNT
- 
- #define FOR_umount
- #include "toys.h"
-+#include <sys/mount.h>
- 
- GLOBALS(
-   struct arg_list *t;
---- a/toys/other/eject.c
-+++ b/toys/other/eject.c
-@@ -22,6 +22,7 @@ config EJECT
- 
- #define FOR_eject
- #include "toys.h"
-+#include <sys/mount.h>
- #include <scsi/sg.h>
- #include <scsi/scsi.h>
- #include <linux/cdrom.h>
---- a/toys/other/freeramdisk.c
-+++ b/toys/other/freeramdisk.c
-@@ -16,6 +16,7 @@ config FREERAMDISK
- */
- 
- #include "toys.h"
-+#include <sys/mount.h>
- 
- void freeramdisk_main(void)
- {
---- a/toys/other/nbd_client.c
-+++ b/toys/other/nbd_client.c
-@@ -36,6 +36,7 @@ config NBD_CLIENT
- #define FOR_nbd_client
- #include "toys.h"
- #include <linux/nbd.h>
-+#include <linux/fs.h>
- 
- void nbd_client_main(void)
- {
---- a/toys/other/partprobe.c
-+++ b/toys/other/partprobe.c
-@@ -18,6 +18,7 @@ config PARTPROBE
- */
- 
- #include "toys.h"
-+#include <sys/mount.h>
- 
- static void do_partprobe(int fd, char *name)
- {
diff --git a/meta-openembedded/meta-oe/recipes-core/toybox/toybox/0001-portability-Fix-timer_settime_wrap-for-32bit-systems.patch b/meta-openembedded/meta-oe/recipes-core/toybox/toybox/0001-portability-Fix-timer_settime_wrap-for-32bit-systems.patch
new file mode 100644
index 0000000..8976b37
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-core/toybox/toybox/0001-portability-Fix-timer_settime_wrap-for-32bit-systems.patch
@@ -0,0 +1,37 @@
+From 7284c7ae0df9aa5a9c8aa0a81a018e17289fe2c4 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 8 Sep 2022 07:22:26 -0700
+Subject: [PATCH] portability: Fix timer_settime_wrap for 32bit systems with
+ 64bit time_t
+
+glibc does not define SYS_timer_settime if the 32bit syscall is not
+available, new architectures like riscv32 has defaulted to 64bit time_t
+from get go and avoided wiring 32bit syscall, therefore alias it to
+64bit version here
+
+Upstream-Status: Submitted [https://github.com/landley/toybox/pull/373]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ lib/portability.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+diff --git a/lib/portability.c b/lib/portability.c
+index d955d081..4baa9367 100644
+--- a/lib/portability.c
++++ b/lib/portability.c
+@@ -711,6 +711,12 @@ int timer_create_wrap(clockid_t c, struct sigevent *se, timer_t *t)
+   return 0;
+ }
+ 
++#if !defined(SYS_timer_settime) && defined(SYS_timer_settime64)
++// glibc does not define defines SYS_timer_settime on 32-bit systems
++// with 64-bit time_t defaults e.g. riscv32
++#define SYS_timer_settime SYS_timer_settime64
++#endif
++
+ int timer_settime_wrap(timer_t t, int flags, struct itimerspec *val,
+   struct itimerspec *old)
+ {
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-core/toybox/toybox_0.8.8.bb b/meta-openembedded/meta-oe/recipes-core/toybox/toybox_0.8.8.bb
index e27f9ed..6836b14 100644
--- a/meta-openembedded/meta-oe/recipes-core/toybox/toybox_0.8.8.bb
+++ b/meta-openembedded/meta-oe/recipes-core/toybox/toybox_0.8.8.bb
@@ -8,7 +8,7 @@
 inherit cml1 update-alternatives
 
 SRC_URI = "http://www.landley.net/toybox/downloads/${BPN}-${PV}.tar.gz \
-           file://0001-portability-Avoid-glibc-and-linux-mount.h-conflict.patch \
+           file://0001-portability-Fix-timer_settime_wrap-for-32bit-systems.patch \
            "
 SRC_URI[sha256sum] = "dafd41978d40f02a61cf1be99a2b4a25812bbfb9c3157e679ee7611202d6ac58"
 
diff --git a/meta-openembedded/meta-oe/recipes-crypto/libmcrypt/libmcrypt_2.5.8.bb b/meta-openembedded/meta-oe/recipes-crypto/libmcrypt/libmcrypt_2.5.8.bb
index 50cdf22..a3d09c7 100644
--- a/meta-openembedded/meta-oe/recipes-crypto/libmcrypt/libmcrypt_2.5.8.bb
+++ b/meta-openembedded/meta-oe/recipes-crypto/libmcrypt/libmcrypt_2.5.8.bb
@@ -14,6 +14,8 @@
 
 inherit autotools-brokensep gettext binconfig
 
+CFLAGS += "-Wno-error=implicit-int"
+
 do_configure() {
         install -m 0755 ${STAGING_DATADIR_NATIVE}/gnu-config/config.guess ${S}
         install -m 0755 ${STAGING_DATADIR_NATIVE}/gnu-config/config.sub ${S}
diff --git a/meta-openembedded/meta-oe/recipes-dbs/mysql/mariadb.inc b/meta-openembedded/meta-oe/recipes-dbs/mysql/mariadb.inc
index c63d511..7bd4ac0 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/mysql/mariadb.inc
+++ b/meta-openembedded/meta-oe/recipes-dbs/mysql/mariadb.inc
@@ -224,6 +224,17 @@
 # Allow old code to link to the backward compatible library
 RDEPENDS:libmysqlclient-dev = "libmysqlclient-r-dev"
 
+PACKAGE_PREPROCESS_FUNCS += "src_package_preprocess"
+src_package_preprocess () {
+        # Trim build paths from comments in generated sources to ensure reproducibility
+        sed -i -e "s,${S}/sql/,,g" \
+               -e "s,${B}/sql/,,g" \
+            ${B}/sql/yy_oracle.hh \
+            ${B}/sql/yy_mariadb.cc \
+            ${B}/sql/yy_mariadb.hh \
+            ${B}/sql/yy_oracle.cc
+}
+
 FILES:libmysqlclient = "\
     ${libdir}/libmysqlclient.so.* \
     ${libdir}/libmariadb.so.* \
diff --git a/meta-openembedded/meta-oe/recipes-dbs/postgresql/files/0001-config_info.c-not-expose-build-info.patch b/meta-openembedded/meta-oe/recipes-dbs/postgresql/files/0001-config_info.c-not-expose-build-info.patch
index 101a748..52ca276 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/postgresql/files/0001-config_info.c-not-expose-build-info.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/postgresql/files/0001-config_info.c-not-expose-build-info.patch
@@ -30,7 +30,16 @@
 index e72e729..b482c20 100644
 --- a/src/common/config_info.c
 +++ b/src/common/config_info.c
-@@ -123,74 +123,6 @@ get_configdata(const char *my_exec_path, size_t *configdata_len)
+@@ -38,7 +38,7 @@
+ 	int			i = 0;
+ 
+ 	/* Adjust this to match the number of items filled below */
+-	*configdata_len = 23;
++	*configdata_len = 14;
+ 	configdata = (ConfigData *) palloc(*configdata_len * sizeof(ConfigData));
+ 
+ 	configdata[i].name = pstrdup("BINDIR");
+@@ -123,74 +123,6 @@
  	configdata[i].setting = pstrdup(path);
  	i++;
  
diff --git a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-donot-use-the-hardcode-libdir.patch b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-donot-use-the-hardcode-libdir.patch
index 7eda038..acfaa3f 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-donot-use-the-hardcode-libdir.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-donot-use-the-hardcode-libdir.patch
@@ -1,26 +1,27 @@
-[PATCH] do not use the hardcode libdir
+From 8f3ed8df4721991958a5becf75a69493d67e7514 Mon Sep 17 00:00:00 2001
+From: "Roy.Li" <rongqing.li@windriver.com>
+Date: Tue, 5 Sep 2017 10:24:10 +0800
+Subject: [PATCH] [PATCH] do not use the hardcode libdir
 
 Upstream-status: Pending
 
 Signed-off-by: Roy.Li <rongqing.li@windriver.com>
 Signed-off-by: Jackie Huang <jackie.huang@windriver.com>
+
 ---
  configure.ac | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/configure.ac b/configure.ac
-index 9b88d4c..df5ad7a 100644
+index 00b359e..ba50e6f 100644
 --- a/configure.ac
 +++ b/configure.ac
-@@ -140,7 +140,7 @@ if test "$with_libpq" != yes; then
+@@ -114,7 +114,7 @@ if test "$with_libpq" != yes; then
  	if test -d "$with_libpq"; then
- 		PATH="$PATH:$with_libpq/bin"
- 		CPPFLAGS="$CPPFLAGS -I$with_libpq/include"
+ 		PATH="$with_libpq/bin:$PATH"
+ 		CPPFLAGS="$CPPFLAGS -I$with_libpq/include -I$with_libpq/include/postgresql/internal"
 -		LDFLAGS="$LDFLAGS -L$with_libpq/lib"
 +		LDFLAGS="$LDFLAGS -L$with_libpq/${base_libdir}"
  	else
  		if test -x "$with_libpq"; then
  			PG_CONFIG=$with_libpq
--- 
-2.8.1
-
diff --git a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-fix-for-ptest-support.patch b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-fix-for-ptest-support.patch
index a8f14e7..30e67b7 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-fix-for-ptest-support.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-fix-for-ptest-support.patch
@@ -1,3 +1,6 @@
+From 96896b88776d0080609ec830cf9538d2babe665a Mon Sep 17 00:00:00 2001
+From: Jackie Huang <jackie.huang@windriver.com>
+Date: Tue, 5 Sep 2017 10:24:10 +0800
 Subject: [PATCH] psqlodbc: fixes for ptest support
 
 * Fix the LIBODBC since we don't use ODBC_CONFIG.
@@ -9,27 +12,28 @@
 Upstream-Status: Inappropriate [OE ptest specific]
 
 Signed-off-by: Jackie Huang <jackie.huang@windriver.com>
+
 ---
  test/Makefile.in    |  2 +-
  test/odbcini-gen.sh |  8 ++++----
- test/runsuite.c     | 26 +++++++++++++-------------
- 3 files changed, 18 insertions(+), 18 deletions(-)
+ test/runsuite.c     | 20 ++++++++++----------
+ 3 files changed, 15 insertions(+), 15 deletions(-)
 
 diff --git a/test/Makefile.in b/test/Makefile.in
-index 8710616..fcb470e 100644
+index 3f9a9af..09406ae 100644
 --- a/test/Makefile.in
 +++ b/test/Makefile.in
-@@ -18,7 +18,7 @@ CFLAGS = @CFLAGS@
+@@ -19,7 +19,7 @@ CPPFLAGS = @CPPFLAGS@ -I..	# config.h
  ODBC_CONFIG = @ODBC_CONFIG@
  PROVE = @PROVE@
  
--LIBODBC := $(shell $(ODBC_CONFIG) --libs)
+-LIBODBC = @LIBODBC@
 +LIBODBC = -lodbc
  
  all: $(TESTBINS) runsuite reset-db
  
 diff --git a/test/odbcini-gen.sh b/test/odbcini-gen.sh
-index d2c2c87..6068d9d 100755
+index 2eaba35..6555cdb 100755
 --- a/test/odbcini-gen.sh
 +++ b/test/odbcini-gen.sh
 @@ -6,7 +6,7 @@
@@ -41,7 +45,7 @@
  driver=${drvr}.so
  if test ! -e $driver ; then
  	driver=${drvr}.dll
-@@ -33,10 +33,10 @@ Driver          = psqlodbc test driver
+@@ -52,10 +52,10 @@ Driver          = PostgreSQL Unicode
  Trace           = No
  TraceFile               =
  Database                = contrib_regression
@@ -56,10 +60,10 @@
  RowVersioning           = No
  ShowSystemTables                = No
 diff --git a/test/runsuite.c b/test/runsuite.c
-index 583cf35..fd2a90e 100644
+index 3be5732..cd842dc 100644
 --- a/test/runsuite.c
 +++ b/test/runsuite.c
-@@ -51,7 +51,7 @@ bailout(const char *fmt, ...)
+@@ -55,7 +55,7 @@ bailout(const char *fmt, ...)
  
  /* Given a test program's name, get the test name */
  void
@@ -68,7 +72,7 @@
  {
  	const char *basename;
  #ifdef WIN32
-@@ -65,7 +65,7 @@ parse_argument(const char *in, char *testname, char *binname)
+@@ -69,7 +69,7 @@ parse_argument(const char *in, char *testname, char *binname)
  	if (strchr(in, DIR_SEP) == NULL)
  	{
  		strcpy(testname, in);
@@ -77,7 +81,7 @@
  		return;
  	}
  
-@@ -127,7 +127,7 @@ int main(int argc, char **argv)
+@@ -131,7 +131,7 @@ int main(int argc, char **argv)
  	failures = 0;
  	for (i = 1, j = 1; i <= numtests; i++, j++)
  	{
@@ -86,7 +90,7 @@
  		if (runtest(binname, testname, i, inputdir) != 0)
  			failures++;
  	}
-@@ -157,29 +157,29 @@ runtest(const char *binname, const char *testname, int testno, const char *input
+@@ -161,29 +161,29 @@ runtest(const char *binname, const char *testname, int testno, const char *input
  #ifndef WIN32
  	snprintf(cmdline, sizeof(cmdline),
  			 "ODBCSYSINI=. ODBCINSTINI=./odbcinst.ini ODBCINI=./odbc.ini "
@@ -123,26 +127,3 @@
  		ret = 0;
  	}
  	fflush(stdout);
-@@ -196,7 +196,7 @@ rundiff(const char *testname, const char *inputdir)
- 	char	   *result;
- 	size_t		result_len;
- 
--	snprintf(filename, sizeof(filename), "results/%s.out", testname);
-+	snprintf(filename, sizeof(filename), "%s/results/%s.out", inputdir, testname);
- 	result = slurpfile(filename, &result_len);
- 
- 	outputno = 0;
-@@ -244,8 +244,8 @@ rundiff(const char *testname, const char *inputdir)
- 	 * files and print the smallest diff?
- 	 */
- 	snprintf(cmdline, sizeof(cmdline),
--			 "diff -c %s/expected/%s.out results/%s.out >> regression.diffs",
--			 inputdir, testname, testname);
-+			 "diff -c %s/expected/%s.out %s/results/%s.out >> regression.diffs",
-+			 inputdir, testname, inputdir, testname);
- 	if (system(cmdline) == -1)
- 		printf("# diff failed\n");
- 
--- 
-2.8.2
-
diff --git a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-remove-some-checks-for-cross-compiling.patch b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-remove-some-checks-for-cross-compiling.patch
index 1d98818..9131d70 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-remove-some-checks-for-cross-compiling.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/files/psqlodbc-remove-some-checks-for-cross-compiling.patch
@@ -1,3 +1,6 @@
+From 8ca6b0c72b6b933642ec7c4ebb83734244fec46f Mon Sep 17 00:00:00 2001
+From: "Song.Li" <Song.Li@windriver.com>
+Date: Tue, 5 Sep 2017 10:24:10 +0800
 Subject: [PATCH] remove some checks for cross-compiling
 
 some lib check is not suitable for
@@ -8,15 +11,16 @@
 Signed-off-by: Song.Li <Song.Li@windriver.com>
 Signed-off-by: Kai Kang <kai.kang@windriver.com>
 Signed-off-by: Jackie Huang <jackie.huang@windriver.com>
+
 ---
- configure.ac | 66 +++++++-----------------------------------------------------
- 1 file changed, 7 insertions(+), 59 deletions(-)
+ configure.ac | 80 +++++-----------------------------------------------
+ 1 file changed, 7 insertions(+), 73 deletions(-)
 
 diff --git a/configure.ac b/configure.ac
-index df5ad7a..b72bd4c 100644
+index 7f79563..00b359e 100644
 --- a/configure.ac
 +++ b/configure.ac
-@@ -46,57 +46,19 @@ AC_ARG_WITH(iodbc, [  --with-iodbc[[=DIR]]	  [[default=no]] DIR is the iODBC bas
+@@ -57,71 +57,20 @@ AC_ARG_WITH(iodbc, [  --with-iodbc[[=DIR]]	  [[default=no]] DIR is the iODBC bas
  if test "$with_iodbc" != no; then
  	with_unixodbc=no
  	AC_DEFINE(WITH_IODBC, 1, [Define to 1 to build with iODBC support])
@@ -34,6 +38,7 @@
 -	fi
  fi
  
+ wo_odbc_config=__without_odbc_config
  if test "$with_unixodbc" != no; then
  	AC_DEFINE(WITH_UNIXODBC, 1,
              [Define to 1 to build with unixODBC support])
@@ -42,7 +47,9 @@
 -	else
 -		ODBC_CONFIG=$with_unixodbc
 -	fi
--	if test ! -x "${ODBC_CONFIG}/bin/odbc_config"; then
+-	if test "${ODBC_CONFIG}" = "${wo_odbc_config}"; then
+-		:
+-	elif test ! -x "${ODBC_CONFIG}/bin/odbc_config"; then
 -		if test ! -x "${ODBC_CONFIG}"; then
 -			AC_MSG_ERROR([odbc_config not found (required for unixODBC build)])
 -		fi
@@ -62,22 +69,33 @@
 -		# Linking libiodoc is rather problematic
 -		[ODBC_LIBDIR=`${ODBC_CONFIG} --libs | sed -e "s/^\(-L\|.*[ \t]-L\)\([^ \n\r\f\t]*\).*$/-L\2/"`]
 -		LDFLAGS="$LDFLAGS ${ODBC_LIBDIR}"
+-		LIBODBC=`${ODBC_CONFIG} --libs`	# for regression test
+-	elif test "${ODBC_CONFIG}" = "${wo_odbc_config}"; then
+-		ODBC_INCLUDE=/usr/include
+-		CPPFLAGS="$CPPFLAGS -I${ODBC_INCLUDE}"
+-		ODBC_LIBDiR=""
+-		LIBODBC="-lodbc"	# for regression test
 -	else
 -		ODBC_INCLUDE=`${ODBC_CONFIG} --include-prefix`
 -		CPPFLAGS="$CPPFLAGS -I${ODBC_INCLUDE}"
--		# Linking libodoc is rather problematic
+-		# Linking libodbc is rather problematic
 -		ODBC_LIBDIR=`${ODBC_CONFIG} --lib-prefix`
--		LDFLAGS="$LDFLAGS -L${ODBC_LIBDIR}"
+-		if test "${ODBC_LIBDIR}" != ""; then
+-			LDFLAGS="$LDFLAGS -L${ODBC_LIBDIR}"
+-		fi
+-		LIBODBC=`${ODBC_CONFIG} --libs`	# for regression test
 -	fi
 -	AC_MSG_NOTICE([using $ODBC_INCLUDE $ODBC_LIBDIR])
+-	AC_MSG_NOTICE([using $LIBODBC for regression test])
 -fi
+-AC_SUBST(LIBODBC)
 +ODBC_LIBS="-lodbcinst"
 +LIBS="$LIBS ${ODBC_LIBS}"
 +AC_MSG_NOTICE([using $ODBC_INCLUDE $ODBC_LIBS])
  
  #
  # SQLCOLATTRIBUTE_SQLLEN check
-@@ -176,18 +138,10 @@ PGAC_ARG_BOOL(enable, pthreads, yes,
+@@ -190,19 +139,10 @@ PGAC_ARG_BOOL(enable, pthreads, yes,
  # Find libpq headers and libraries
  #
  
@@ -87,8 +105,9 @@
 -
 -if test -n "$PG_CONFIG"; then
 -  pg_includedir=`"$PG_CONFIG" --includedir`
+-  pg_pkg_includedir=`"$PG_CONFIG" --pkgincludedir`
 -  pg_libdir=`"$PG_CONFIG" --libdir`
--  CPPFLAGS="$CPPFLAGS -I$pg_includedir"
+-  CPPFLAGS="$CPPFLAGS -I$pg_includedir -I$pg_pkg_includedir/internal"
 -  LDFLAGS="$LDFLAGS -L$pg_libdir"
 -fi
 -
@@ -100,7 +119,7 @@
  
  # 1. Programs
  
-@@ -211,12 +165,6 @@ if test "$with_iodbc" != no; then
+@@ -226,12 +166,6 @@ if test "$with_iodbc" != no; then
                   [AC_MSG_ERROR([iODBC library "iodbcinst" not found])])
  fi
  
@@ -113,6 +132,3 @@
  AC_CHECK_LIB(pq, PQsetSingleRowMode, [],
  	      [AC_MSG_ERROR([libpq library version >= 9.2 is required])])
  
--- 
-2.8.1
-
diff --git a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc_09.05.0300.bb b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc_09.05.0300.bb
deleted file mode 100644
index b0fc35e..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc_09.05.0300.bb
+++ /dev/null
@@ -1,14 +0,0 @@
-require ${PN}.inc
-
-LICENSE = "LGPL-2.0-only"
-LIC_FILES_CHKSUM = "file://license.txt;md5=6db3822fc7512e83087ba798da013692"
-
-SRC_URI = "http://ftp.postgresql.org/pub/odbc/versions/src/${BPN}-${PV}.tar.gz \
-    file://psqlodbc-remove-some-checks-for-cross-compiling.patch \
-    file://psqlodbc-donot-use-the-hardcode-libdir.patch \
-    file://psqlodbc-fix-for-ptest-support.patch \
-    file://run-ptest \
-"
-
-SRC_URI[md5sum] = "4c6e0b22187d7bb1c998ffac89e50f6b"
-SRC_URI[sha256sum] = "9521f328bf28aaaf5c8488dc89792b614f9d6271742c0baf9bb41c97537764a8"
diff --git a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc.inc b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc_13.02.0000.bb
similarity index 77%
rename from meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc.inc
rename to meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc_13.02.0000.bb
index ae20a72..299abee 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc.inc
+++ b/meta-openembedded/meta-oe/recipes-dbs/psqlodbc/psqlodbc_13.02.0000.bb
@@ -16,6 +16,18 @@
 SECTION = "libs"
 HOMEPAGE = "http://psqlodbc.projects.postgresql.org/"
 
+LICENSE = "LGPL-2.0-only"
+LIC_FILES_CHKSUM = "file://license.txt;md5=6db3822fc7512e83087ba798da013692"
+
+SRC_URI = "http://ftp.postgresql.org/pub/odbc/versions/src/${BPN}-${PV}.tar.gz \
+    file://psqlodbc-remove-some-checks-for-cross-compiling.patch \
+    file://psqlodbc-donot-use-the-hardcode-libdir.patch \
+    file://psqlodbc-fix-for-ptest-support.patch \
+    file://run-ptest \
+"
+
+SRC_URI[sha256sum] = "b39b7e5c41fd6475c551112fa724bf57c4a446175ec4188a90e2844cc1612585"
+
 DEPENDS += "postgresql unixodbc"
 
 EXTRA_OECONF = "\
@@ -24,8 +36,7 @@
     --with-unixodbc=yes \
     --with-libpq=${STAGING_LIBDIR}/.. \
     --enable-pthreads \
-    --disable-unicode \
-    LIBS="-lpthread" \
+    LIBS='-lpthread' \
 "
 
 inherit autotools pkgconfig ptest
@@ -48,3 +59,4 @@
 
 # The tests need a local PostgreSQL server running
 RDEPENDS:${PN}-ptest = "postgresql"
+
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-cmake-Add-check-for-atomic-support.patch b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-cmake-Add-check-for-atomic-support.patch
index 8bdd27f..2906100 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-cmake-Add-check-for-atomic-support.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-cmake-Add-check-for-atomic-support.patch
@@ -27,7 +27,7 @@
 
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -935,7 +935,12 @@ endif()
+@@ -1000,7 +1000,12 @@ option(ROCKSDB_BUILD_SHARED "Build share
  if(WIN32)
    set(SYSTEM_LIBS ${SYSTEM_LIBS} shlwapi.lib rpcrt4.lib)
  else()
@@ -39,7 +39,7 @@
 +  endif()
  endif()
  
- add_library(${ROCKSDB_STATIC_LIB} STATIC ${SOURCES} ${BUILD_VERSION_CC})
+ set(ROCKSDB_PLUGIN_EXTERNS "")
 --- /dev/null
 +++ b/cmake/modules/CheckAtomic.cmake
 @@ -0,0 +1,69 @@
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-cmake-Use-exported-target-for-bz2.patch b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-cmake-Use-exported-target-for-bz2.patch
index d305475..139312a 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-cmake-Use-exported-target-for-bz2.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-cmake-Use-exported-target-for-bz2.patch
@@ -13,16 +13,7 @@
 
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -32,7 +32,7 @@
- # 3. cmake ..
- # 4. make -j
- 
--cmake_minimum_required(VERSION 3.5.1)
-+cmake_minimum_required(VERSION 3.7.2)
- 
- list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_LIST_DIR}/cmake/modules/")
- include(ReadVersion)
-@@ -152,12 +152,7 @@ else()
+@@ -153,12 +153,7 @@ else()
    if(WITH_BZ2)
      find_package(BZip2 REQUIRED)
      add_definitions(-DBZIP2)
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-folly-Use-SYS_futex-for-syscall.patch b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-folly-Use-SYS_futex-for-syscall.patch
deleted file mode 100644
index 9c70d4f..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-folly-Use-SYS_futex-for-syscall.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From ddcc8a9f7e0f0bfee96f2f0a0c10f21f9fa9b05d Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sun, 15 Nov 2020 15:02:28 -0800
-Subject: [PATCH] folly: Use SYS_futex for syscall
-
-glibc defines SYS_futex and on newer 32bit CPUs like RISCV-32, arc there
-is no 32bit time_t therefore define SYS_futex in terms of SYS_futex_time64
-
-Upstream-Status: Submitted [https://github.com/facebook/rocksdb/pull/7676]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- third-party/folly/folly/detail/Futex.cpp | 10 ++++++++--
- 1 file changed, 8 insertions(+), 2 deletions(-)
-
---- a/third-party/folly/folly/detail/Futex.cpp
-+++ b/third-party/folly/folly/detail/Futex.cpp
-@@ -48,9 +48,15 @@ namespace {
- #define FUTEX_CLOCK_REALTIME 256
- #endif
- 
-+/// Newer 32bit CPUs eg. RISCV-32 are defaulting to 64bit time_t from get go and
-+/// therefore do not define __NR_futex
-+#if !defined(SYS_futex) && defined(SYS_futex_time64)
-+# define SYS_futex SYS_futex_time64
-+#endif
-+
- int nativeFutexWake(const void* addr, int count, uint32_t wakeMask) {
-   long rv = syscall(
--      __NR_futex,
-+      SYS_futex,
-       addr, /* addr1 */
-       FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, /* op */
-       count, /* val */
-@@ -112,7 +118,7 @@ FutexResult nativeFutexWaitImpl(
-   // Unlike FUTEX_WAIT, FUTEX_WAIT_BITSET requires an absolute timeout
-   // value - http://locklessinc.com/articles/futex_cheat_sheet/
-   long rv = syscall(
--      __NR_futex,
-+      SYS_futex,
-       addr, /* addr1 */
-       op, /* op */
-       expected, /* val */
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-jemalloc_helper-Limit-the-mm_malloc.h-hack-to-glibc-.patch b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-jemalloc_helper-Limit-the-mm_malloc.h-hack-to-glibc-.patch
deleted file mode 100644
index dbb0dda..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-jemalloc_helper-Limit-the-mm_malloc.h-hack-to-glibc-.patch
+++ /dev/null
@@ -1,23 +0,0 @@
-From 1a69d4cc3f97e348dba9714c7ec60da1a8650664 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Tue, 15 Jun 2021 22:05:36 -0700
-Subject: [PATCH] jemalloc_helper: Limit the mm_malloc.h hack to glibc on linux
-
-Musl does not need this hack
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- port/jemalloc_helper.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
---- a/port/jemalloc_helper.h
-+++ b/port/jemalloc_helper.h
-@@ -5,7 +5,7 @@
- 
- #pragma once
- 
--#if defined(__clang__)
-+#if defined(__clang__) && defined(__GLIBC__)
- // glibc's `posix_memalign()` declaration specifies `throw()` while clang's
- // declaration does not. There is a hack in clang to make its re-declaration
- // compatible with glibc's if they are declared consecutively. That hack breaks
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch
deleted file mode 100644
index 86c1bff..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch
+++ /dev/null
@@ -1,44 +0,0 @@
-From 89c032a9b4011385c0b504ea61e5df0db71f0ff5 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Wed, 16 Jun 2021 19:06:02 -0700
-Subject: [PATCH] range_tree: Implement toku_time_now for rv32/rv64 in asm
-
-Upstream-Status: Pending
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- .../range_tree/lib/portability/toku_time.h    | 19 +++++++++++++++++++
- 1 file changed, 19 insertions(+)
-
-diff --git a/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h b/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-index 4425a4a2e..4ac964f85 100644
---- a/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-+++ b/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-@@ -133,6 +133,25 @@ static inline tokutime_t toku_time_now(void) {
-   return result;
- #elif defined(__powerpc__)
-   return __ppc_get_timebase();
-+#elif defined(__riscv) // RISC-V
-+#if __riscv_xlen == 32
-+  uint32_t lo, hi0, hi1;
-+  __asm __volatile__(
-+      "rdcycleh %0\n"
-+      "rdcycle %1\n"
-+      "rdcycleh %2\n"
-+      "sub %0, %0, %2\n"
-+      "seqz %0, %0\n"
-+      "sub %0, zero, %0\n"
-+      "and %1, %1, %0\n"
-+      : "=r"(hi0), "=r"(lo), "=r"(hi1));
-+  return ((uint64_t)hi1 << 32) | lo;
-+#else
-+  uint64_t result;
-+  __asm __volatile__("rdcycle %0" : "=r"(result));
-+  return result;
-+#endif
-+
- #else
- #error No timer implementation for this platform
- #endif
--- 
-2.32.0
-
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/arm.patch b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/arm.patch
index d428a66..5737677 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/arm.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/arm.patch
@@ -3,7 +3,7 @@
 Signed-off-by: Khem Raj <raj.khem@gmail.com>
 --- a/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
 +++ b/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-@@ -161,6 +161,20 @@ static inline tokutime_t toku_time_now(v
+@@ -164,6 +164,20 @@ static inline tokutime_t toku_time_now(v
    struct timeval tv;
    gettimeofday(&tv, nullptr);
    return (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/mips.patch b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/mips.patch
index db2305d..0104d54 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/mips.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/mips.patch
@@ -3,11 +3,10 @@
 Signed-off-by: Khem Raj <raj.khem@gmail.com>
 --- a/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
 +++ b/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-@@ -155,7 +155,12 @@ static inline tokutime_t toku_time_now(v
-   __asm __volatile__("rdcycle %0" : "=r"(result));
-   return result;
- #endif
--
+@@ -158,6 +158,12 @@ static inline tokutime_t toku_time_now(v
+   uint64_t cycles;
+   asm volatile("rdcycle %0" : "=r"(cycles));
+   return cycles;
 +#elif defined(__mips__)
 +  // mips apparently only allows rdtsc for superusers, so we fall
 +  // back to gettimeofday.  It's possible clock_gettime would be better.
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/ppc64.patch b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/ppc64.patch
index bc40f1b..45deef3 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/ppc64.patch
+++ b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/files/ppc64.patch
@@ -23,6 +23,6 @@
 +  uint64_t result;
 +  asm volatile("mfspr %0, 268" : "=r"(result));
 +  return result;
- #elif defined(__riscv) // RISC-V
- #if __riscv_xlen == 32
-   uint32_t lo, hi0, hi1;
+ #elif defined(__s390x__)
+   uint64_t result;
+   asm volatile("stckf %0" : "=Q"(result) : : "cc");
diff --git a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/rocksdb_6.20.3.bb b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/rocksdb_7.5.3.bb
similarity index 87%
rename from meta-openembedded/meta-oe/recipes-dbs/rocksdb/rocksdb_6.20.3.bb
rename to meta-openembedded/meta-oe/recipes-dbs/rocksdb/rocksdb_7.5.3.bb
index 2ad2d38..d4f0346 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/rocksdb/rocksdb_6.20.3.bb
+++ b/meta-openembedded/meta-oe/recipes-dbs/rocksdb/rocksdb_7.5.3.bb
@@ -6,15 +6,12 @@
                     file://COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263 \
                     file://LICENSE.leveldb;md5=fb04ff57a14f308f2eed4a9b87d45837"
 
-SRCREV = "8608d75d85f8e1b3b64b73a4fb6d19baec61ba5c"
-SRCBRANCH = "6.20.fb"
+SRCREV = "540d5aae516265170564ec27b3e67a54a11b7045"
+SRCBRANCH = "7.5.fb"
 
 SRC_URI = "git://github.com/facebook/${BPN}.git;branch=${SRCBRANCH};protocol=https \
            file://0001-cmake-Add-check-for-atomic-support.patch \
            file://0001-cmake-Use-exported-target-for-bz2.patch \
-           file://0001-folly-Use-SYS_futex-for-syscall.patch \
-           file://0001-jemalloc_helper-Limit-the-mm_malloc.h-hack-to-glibc-.patch \
-           file://0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch \
            file://ppc64.patch \
            file://mips.patch \
            file://arm.patch \
diff --git a/meta-openembedded/meta-oe/recipes-dbs/soci/soci/0001-Do-not-use-std-shuffle-with-clang-15.patch b/meta-openembedded/meta-oe/recipes-dbs/soci/soci/0001-Do-not-use-std-shuffle-with-clang-15.patch
new file mode 100644
index 0000000..eb29627
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-dbs/soci/soci/0001-Do-not-use-std-shuffle-with-clang-15.patch
@@ -0,0 +1,32 @@
+From e5f72c656829402c6f70e7416039bc18f0c26485 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 30 Aug 2022 22:17:14 -0700
+Subject: [PATCH] Do not use std::shuffle with clang 15
+
+This fails to compile although its preferred approach for c++11 and
+newer
+
+See
+https://github.com/SOCI/soci/issues/984
+
+Upstream-Status: Inappropriate [Workaround]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ cmake/SociConfig.cmake | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/cmake/SociConfig.cmake b/cmake/SociConfig.cmake
+index 492e1837..f24fd9a6 100644
+--- a/cmake/SociConfig.cmake
++++ b/cmake/SociConfig.cmake
+@@ -94,6 +94,7 @@ else()
+     set(SOCI_CXX11 ON)
+     set(SOCI_CXX_VERSION_FLAGS "-std=c++11")
+     add_definitions(-DCATCH_CONFIG_CPP11_NO_IS_ENUM)
++    add_definitions(-DCATCH_CONFIG_CPP11_NO_SHUFFLE)
+ 
+     set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SOCI_GCC_CLANG_COMMON_FLAGS} ${SOCI_CXX_VERSION_FLAGS}")
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-dbs/soci/soci_4.0.3.bb b/meta-openembedded/meta-oe/recipes-dbs/soci/soci_4.0.3.bb
index ff47e50..71ab80c 100644
--- a/meta-openembedded/meta-oe/recipes-dbs/soci/soci_4.0.3.bb
+++ b/meta-openembedded/meta-oe/recipes-dbs/soci/soci_4.0.3.bb
@@ -5,7 +5,9 @@
 SECTION = "libs"
 DEPENDS = "boost"
 
-SRC_URI = "${SOURCEFORGE_MIRROR}/project/${BPN}/${BPN}/${BP}/${BP}.tar.gz"
+SRC_URI = "${SOURCEFORGE_MIRROR}/project/${BPN}/${BPN}/${BP}/${BP}.tar.gz \
+           file://0001-Do-not-use-std-shuffle-with-clang-15.patch \
+           "
 SRC_URI[sha256sum] = "615e5f7e4b52007f3a3b4050a99aadf6346b56b5098eb08b3a650836083c6a33"
 
 TESTCONFIG = '-DSOCI_TEST_EMPTY_CONNSTR="dummy" -DSOCI_TEST_SQLITE3_CONNSTR="test.db" \
diff --git a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/0001-shell.c-Fix-format-not-a-string-literal-warning.patch b/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/0001-shell.c-Fix-format-not-a-string-literal-warning.patch
deleted file mode 100644
index c5d7c0c..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/0001-shell.c-Fix-format-not-a-string-literal-warning.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From 443980ddc82fb40e2e1f9544f2be169bd23dd246 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sat, 17 Jun 2017 16:49:24 -0700
-Subject: [PATCH] shell.c:  Fix format not a string literal warning
-
-src/shell.c:695:20: error: format not a string literal and no format arguments [-Werror=format-security]
-|      fprintf(stderr,zHelp);
-|                     ^~~~~
-
-Upstream-Status: Pending
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- src/shell.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/shell.c b/src/shell.c
-index bb46c49..3c6fe0f 100644
---- a/src/shell.c
-+++ b/src/shell.c
-@@ -692,7 +692,7 @@ static int do_meta_command(char *zLine, struct callback_data *p){
-   }else
- 
-   if( c=='h' && strncmp(azArg[0], "help", n)==0 ){
--    fprintf(stderr,zHelp);
-+    fprintf(stderr, "%s", zHelp);
-   }else
- 
-   if( c=='i' && strncmp(azArg[0], "indices", n)==0 && nArg>1 ){
--- 
-2.13.1
-
diff --git a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/mainmk_build_dynamic.patch b/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/mainmk_build_dynamic.patch
deleted file mode 100644
index 914422f..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/mainmk_build_dynamic.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-diff -urN sqlite-2.8.17.orig/main.mk sqlite-2.8.17/main.mk
---- sqlite-2.8.17.orig/main.mk	2005-04-23 22:43:23.000000000 +0000
-+++ sqlite-2.8.17/main.mk	2009-04-02 20:10:36.000000000 +0000
-@@ -139,7 +139,7 @@
- # This is the default Makefile target.  The objects listed here
- # are what get build when you type just "make" with no arguments.
- #
--all:	sqlite.h config.h libsqlite.a sqlite$(EXE)
-+all:   sqlite.h config.h libsqlite.so sqlite$(EXE)
- 
- # Generate the file "last_change" which contains the date of change
- # of the most recently modified source code file
-@@ -148,13 +148,12 @@
- 	cat $(SRC) | grep '$$Id: ' | sort +4 | tail -1 \
-           | awk '{print $$5,$$6}' >last_change
- 
--libsqlite.a:	$(LIBOBJ)
--	$(AR) libsqlite.a $(LIBOBJ)
--	$(RANLIB) libsqlite.a
--
--sqlite$(EXE):	$(TOP)/src/shell.c libsqlite.a sqlite.h
--	$(TCCX) $(READLINE_FLAGS) -o sqlite$(EXE) $(TOP)/src/shell.c \
--		libsqlite.a $(LIBREADLINE) $(THREADLIB)
-+libsqlite.so:	$(LIBOBJ)
-+	$(CC) $(LDFLAGS) -shared -o libsqlite.so -Wl,-soname,libsqlite.so.0 $(LIBOBJ)
-+
-+sqlite$(EXE):	$(TOP)/src/shell.c libsqlite.so sqlite.h
-+	$(TCCX) $(LDFLAGS) $(READLINE_FLAGS) -o sqlite$(EXE) $(TOP)/src/shell.c \
-+		-lsqlite $(LIBREADLINE) $(THREADLIB)
- 
- sqlite_analyzer$(EXE):	$(TOP)/src/tclsqlite.c libsqlite.a $(TESTSRC) \
- 			$(TOP)/tool/spaceanal.tcl
diff --git a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/mainmk_no_tcl.patch b/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/mainmk_no_tcl.patch
deleted file mode 100644
index b84b648..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/mainmk_no_tcl.patch
+++ /dev/null
@@ -1,20 +0,0 @@
-diff -bur sqlite-2.8.17~orig/main.mk sqlite-2.8.17/main.mk
---- sqlite-2.8.17~orig/main.mk	2006-02-13 04:09:33.000000000 -0600
-+++ sqlite-2.8.17/main.mk	2006-02-13 04:15:42.000000000 -0600
-@@ -58,7 +58,7 @@
-          expr.o func.o hash.o insert.o \
-          main.o opcodes.o os.o pager.o parse.o pragma.o printf.o random.o \
-          select.o table.o tokenize.o trigger.o update.o util.o \
--         vacuum.o vdbe.o vdbeaux.o where.o tclsqlite.o
-+         vacuum.o vdbe.o vdbeaux.o where.o
- 
- # All of the source code files.
- #
-@@ -91,7 +91,6 @@
-   $(TOP)/src/sqlite.h.in \
-   $(TOP)/src/sqliteInt.h \
-   $(TOP)/src/table.c \
--  $(TOP)/src/tclsqlite.c \
-   $(TOP)/src/tokenize.c \
-   $(TOP)/src/trigger.c \
-   $(TOP)/src/update.c \
diff --git a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/sqlite.pc b/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/sqlite.pc
deleted file mode 100644
index 6bc742e..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite-2.8.17/sqlite.pc
+++ /dev/null
@@ -1,12 +0,0 @@
-# Package Information for pkg-config
-
-prefix=/usr
-exec_prefix=${prefix}
-libdir=${exec_prefix}/lib
-includedir=${prefix}/include
-
-Name: SQLite
-Description: SQL database engine
-Version: 2.8.17
-Libs: -L${libdir} -lsqlite
-Cflags: -I${includedir}
diff --git a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite_2.8.17.bb b/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite_2.8.17.bb
deleted file mode 100644
index bb62afa..0000000
--- a/meta-openembedded/meta-oe/recipes-dbs/sqlite/sqlite_2.8.17.bb
+++ /dev/null
@@ -1,61 +0,0 @@
-SUMMARY = "An Embeddable SQL Database Engine"
-HOMEPAGE = "http://www.sqlite.org/"
-SECTION = "libs"
-DEPENDS = "readline ncurses"
-LICENSE = "PD"
-LIC_FILES_CHKSUM = "file://src/main.c;beginline=1;endline=10;md5=e98469a8efa024a38ad5b2e4b92f4a96"
-
-PR = "r7"
-
-SRC_URI = "http://www.hwaci.com/sw/sqlite/sqlite-${PV}.tar.gz \
-           file://mainmk_build_dynamic.patch \
-           file://mainmk_no_tcl.patch \
-           file://sqlite.pc \
-           file://0001-shell.c-Fix-format-not-a-string-literal-warning.patch \
-           "
-
-SOURCES = "attach.o auth.o btree.o btree_rb.o build.o copy.o date.o delete.o \
-           expr.o func.o hash.o insert.o main.o opcodes.o os.o pager.o \
-           parse.o pragma.o printf.o random.o select.o table.o tokenize.o \
-           trigger.o update.o util.o vacuum.o vdbe.o vdbeaux.o where.o"
-
-inherit autotools pkgconfig
-
-do_configure() {
-    echo "main.mk is patched, no need to configure"
-    # make pkgconfig.bbclass pick this up
-    cp ${WORKDIR}/sqlite.pc ${S}
-}
-
-do_compile() {
-    oe_runmake -f ${S}/Makefile.linux-gcc \
-             TOP="${S}" \
-             BCC="${BUILD_CC}" \
-             TCC="${CC}" \
-             OPTS="-fPIC -D'INTPTR_TYPE=int'" \
-             TCL_FLAGS= LIBTCL= \
-             READLINE_FLAGS="-DHAVE_READLINE=1 -I${STAGING_INCDIR}" \
-             LIBREADLINE="-L. -L${STAGING_LIBDIR} -lreadline -lncurses"
-}
-
-do_install() {
-    install -d ${D}${libdir} ${D}${bindir}
-    install sqlite ${D}${bindir}
-    install -m 0755 libsqlite.so ${D}${libdir}/libsqlite.so.0.8.6
-    ln -sf libsqlite.so.0.8.6 ${D}${libdir}/libsqlite.so
-    ln -sf libsqlite.so.0.8.6 ${D}${libdir}/libsqlite.so.0
-    ln -sf libsqlite.so.0.8.6 ${D}${libdir}/libsqlite.so.0.8
-    install -d ${D}${includedir}
-    install -m 0644 sqlite.h ${D}${includedir}/sqlite.h
-    install -d ${D}${libdir}/pkgconfig
-    install -m 0644 ${S}/sqlite.pc ${D}${libdir}/pkgconfig/sqlite.pc
-}
-
-PACKAGES += "${PN}-bin"
-FILES:${PN}-bin = "${bindir}/*"
-FILES:${PN} = "${libdir}/*.so.*"
-
-SRC_URI[md5sum] = "838dbac20b56d2c4292e98848505a05b"
-SRC_URI[sha256sum] = "3f35ebfb67867fb5b583a03e480f900206af637efe7179b32294a6a0cf806f37"
-
-BBCLASSEXTEND = "native"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/abseil-cpp/abseil-cpp_git.bb b/meta-openembedded/meta-oe/recipes-devtools/abseil-cpp/abseil-cpp_git.bb
index b3dc06c..af28da5 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/abseil-cpp/abseil-cpp_git.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/abseil-cpp/abseil-cpp_git.bb
@@ -7,8 +7,8 @@
 LICENSE = "Apache-2.0"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=df52c6edb7adc22e533b2bacc3bd3915"
 
-PV = "20220623.0"
-SRCREV = "273292d1cfc0a94a65082ee350509af1d113344d"
+PV = "20220623.1"
+SRCREV = "8c0b94e793a66495e0b1f34a5eb26bd7dc672db0"
 BRANCH = "lts_2022_06_23"
 SRC_URI = "git://github.com/abseil/abseil-cpp;branch=${BRANCH};protocol=https \
            file://0001-absl-always-use-asm-sgidefs.h.patch             \
diff --git a/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools/core/0001-memory.h-Always-define-strlcpy-for-glibc-based-syste.patch b/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools/core/0001-memory.h-Always-define-strlcpy-for-glibc-based-syste.patch
new file mode 100644
index 0000000..82e9715
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools/core/0001-memory.h-Always-define-strlcpy-for-glibc-based-syste.patch
@@ -0,0 +1,35 @@
+From db3a3714be07c8ab51b9ae7b035e4afe9f39c645 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 7 Sep 2022 13:20:22 -0700
+Subject: [PATCH] memory.h: Always define strlcpy for glibc based systems
+
+android-config.h file includes on compiler cmdline sets HAVE_STRLCPY
+unconditionally, since bionic supports it, its no big deal on android
+and also no problem when using musl since implementation exists for musl
+too, but glibc does not provide this. So either we include libbsd or use
+the implementation provided by android-tools here. We are currently
+using the in tree implementation for systems which do not provide it
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ include/cutils/memory.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/include/cutils/memory.h b/include/cutils/memory.h
+index e725cdd032..9e99353c58 100644
+--- a/include/cutils/memory.h
++++ b/include/cutils/memory.h
+@@ -30,7 +30,7 @@ void android_memset16(uint16_t* dst, uint16_t value, size_t size);
+ /* size is given in bytes and must be multiple of 4 */
+ void android_memset32(uint32_t* dst, uint32_t value, size_t size);
+ 
+-#if !HAVE_STRLCPY
++#if !HAVE_STRLCPY || defined(__GLIBC__)
+ /* Declaration of strlcpy() for platforms that don't already have it. */
+ size_t strlcpy(char *dst, const char *src, size_t size);
+ #endif
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools/core/b64_pton_function_decl.patch b/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools/core/b64_pton_function_decl.patch
new file mode 100644
index 0000000..80410f1
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools/core/b64_pton_function_decl.patch
@@ -0,0 +1,14 @@
+Add protoype declaration for  b64_pton
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+--- a/adb/adb_auth_client.c
++++ b/adb/adb_auth_client.c
+@@ -29,6 +29,7 @@
+ 
+ #define TRACE_TAG TRACE_AUTH
+ 
++extern int b64_pton(const char* src, uint8_t* target, size_t targsize);
+ 
+ struct adb_public_key {
+     struct listnode node;
diff --git a/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools_5.1.1.r37.bb b/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools_5.1.1.r37.bb
index 2639360..abd140c 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools_5.1.1.r37.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/android-tools/android-tools_5.1.1.r37.bb
@@ -39,11 +39,13 @@
     file://core/0011-Remove-bionic-specific-calls.patch;patchdir=system/core \
     file://core/0012-Fix-implicit-declaration-of-stlcat-strlcopy-function.patch;patchdir=system/core \
     file://core/adb_libssl_11.diff;patchdir=system/core \
+    file://core/b64_pton_function_decl.patch;patchdir=system/core \
     file://core/0013-adb-Support-riscv64.patch;patchdir=system/core \
     file://core/0014-add-u3-ss-descriptor-support-for-adb.patch;patchdir=system/core \
     file://core/0015-libsparse-Split-off-most-of-sparse_file_read_normal-.patch;patchdir=system/core \
     file://core/0016-libsparse-Add-hole-mode-to-sparse_file_read.patch;patchdir=system/core \
     file://core/0017-img2simg-Add-support-for-converting-holes-to-don-t-c.patch;patchdir=system/core \
+    file://core/0001-memory.h-Always-define-strlcpy-for-glibc-based-syste.patch;patchdir=system/core \
     file://extras/0001-ext4_utils-remove-selinux-extensions.patch;patchdir=system/extras \
     file://extras/0002-ext4_utils-add-o-argument-to-preserve-ownership.patch;patchdir=system/extras \
     file://libselinux/0001-Remove-bionic-specific-calls.patch;patchdir=external/libselinux \
diff --git a/meta-openembedded/meta-oe/recipes-devtools/ctags/ctags_5.9.20220821.0.bb b/meta-openembedded/meta-oe/recipes-devtools/ctags/ctags_5.9.20220925.0.bb
similarity index 95%
rename from meta-openembedded/meta-oe/recipes-devtools/ctags/ctags_5.9.20220821.0.bb
rename to meta-openembedded/meta-oe/recipes-devtools/ctags/ctags_5.9.20220925.0.bb
index 31f4935..f5bacfd 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/ctags/ctags_5.9.20220821.0.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/ctags/ctags_5.9.20220925.0.bb
@@ -14,7 +14,7 @@
 
 inherit autotools-brokensep pkgconfig manpages
 
-SRCREV = "40551e21c507c2426a323373f3ff200799150429"
+SRCREV = "299fe525048358ecdfecb9ca91505333c0fb14f4"
 SRC_URI = "git://github.com/universal-ctags/ctags;branch=master;protocol=https"
 
 S = "${WORKDIR}/git"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers_2.0.6.bb b/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers.bb
similarity index 84%
rename from meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers_2.0.6.bb
rename to meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers.bb
index 3ca4772..f4a8b02 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers_2.0.6.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers.bb
@@ -2,6 +2,9 @@
 HOMEPAGE = "https://github.com/google/flatbuffers"
 SECTION = "console/tools"
 LICENSE = "Apache-2.0"
+LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=3b83ef96387f14655fc854ddc3c6bd57"
+
+require flatbuffers.inc
 
 PACKAGE_BEFORE_PN = "${PN}-compiler"
 
@@ -10,10 +13,6 @@
 RDEPENDS:${PN}-compiler = "${PN}"
 RDEPENDS:${PN}-dev += "${PN}-compiler"
 
-LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=3b83ef96387f14655fc854ddc3c6bd57"
-
-SRCREV = "615616cb5549a34bdf288c04bc1b94bd7a65c396"
-SRC_URI = "git://github.com/google/flatbuffers.git;branch=master;protocol=https"
 S = "${WORKDIR}/git"
 
 CVE_CHECK_IGNORE += "CVE-2020-35864"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers.inc b/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers.inc
new file mode 100644
index 0000000..ebdd944
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/flatbuffers.inc
@@ -0,0 +1,3 @@
+PV = "2.0.8"
+SRCREV = "06c5c7ed0bd987a918cf88caafb094f22cdd1721"
+SRC_URI = "git://github.com/google/flatbuffers.git;branch=master;protocol=https"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/python3-flatbuffers_2.0.6.bb b/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/python3-flatbuffers.bb
similarity index 72%
rename from meta-openembedded/meta-oe/recipes-devtools/flatbuffers/python3-flatbuffers_2.0.6.bb
rename to meta-openembedded/meta-oe/recipes-devtools/flatbuffers/python3-flatbuffers.bb
index 7c05e76..fb5b86d 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/python3-flatbuffers_2.0.6.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/flatbuffers/python3-flatbuffers.bb
@@ -2,11 +2,10 @@
 HOMEPAGE = "https://github.com/google/flatbuffers"
 SECTION = "console/tools"
 LICENSE = "Apache-2.0"
-
 LIC_FILES_CHKSUM = "file://../LICENSE.txt;md5=3b83ef96387f14655fc854ddc3c6bd57"
 
-SRCREV = "615616cb5549a34bdf288c04bc1b94bd7a65c396"
-SRC_URI = "git://github.com/google/flatbuffers.git;branch=master;protocol=https"
+require flatbuffers.inc
+
 S = "${WORKDIR}/git/python"
 
 RDEPENDS:${PN} = "flatbuffers"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/geany/geany-plugins/0001-scope-Use-0-instead-of-NULL-for-gboolean.patch b/meta-openembedded/meta-oe/recipes-devtools/geany/geany-plugins/0001-scope-Use-0-instead-of-NULL-for-gboolean.patch
new file mode 100644
index 0000000..bb03fa6
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/geany/geany-plugins/0001-scope-Use-0-instead-of-NULL-for-gboolean.patch
@@ -0,0 +1,34 @@
+From 9ee9388bc66e6cf68db96b2014dca2115f745dc9 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 30 Aug 2022 22:33:46 -0700
+Subject: [PATCH] scope: Use 0 instead of NULL for gboolean
+
+Fixes warnings with clang 15+
+
+scope/src/stack.c:168:11: error: incompatible pointer to integer conversion initializing 'gboolean' (aka 'int') with an expression of type 'void *' [-Wint-conversion]
+        gboolean entry = NULL;
+                 ^       ~~~~
+1 error generated.
+
+Upstream-Status: Submitted [https://github.com/geany/geany-plugins/pull/1191]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ scope/src/stack.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/scope/src/stack.c b/scope/src/stack.c
+index b03909fe..041dd415 100644
+--- a/scope/src/stack.c
++++ b/scope/src/stack.c
+@@ -165,7 +165,7 @@ void on_stack_follow(GArray *nodes)
+ gboolean stack_entry(void)
+ {
+ 	GtkTreeIter iter;
+-	gboolean entry = NULL;
++	gboolean entry = 0;
+ 
+ 	if (gtk_tree_selection_get_selected(selection, NULL, &iter))
+ 	{
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/geany/geany-plugins_1.38.bb b/meta-openembedded/meta-oe/recipes-devtools/geany/geany-plugins_1.38.bb
index fa4cccc..1ed2993 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/geany/geany-plugins_1.38.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/geany/geany-plugins_1.38.bb
@@ -33,6 +33,7 @@
     file://0001-Use-pkg-config-to-find-gpgme.patch \
     file://0001-git-changebar-Adjust-structs-for-libgit2-1.4.x.patch \
     file://0001-geany.m4-Do-not-tinker-with-pkg-config-paths.patch \
+    file://0001-scope-Use-0-instead-of-NULL-for-gboolean.patch \
 "
 SRC_URI[sha256sum] = "1c578a7ebb390aa8882f195acd3d8da3ceb73925d291b28dec90cd3e5fd20586"
 
diff --git a/meta-openembedded/meta-oe/recipes-devtools/jq/jq/0001-configure-Pass-_XOPEN_SOURCE-when-checking-for-strpt.patch b/meta-openembedded/meta-oe/recipes-devtools/jq/jq/0001-configure-Pass-_XOPEN_SOURCE-when-checking-for-strpt.patch
new file mode 100644
index 0000000..e4d6ebb
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/jq/jq/0001-configure-Pass-_XOPEN_SOURCE-when-checking-for-strpt.patch
@@ -0,0 +1,40 @@
+From 40bbd419ad8d1bd9cbe8b17063c323f8a40ab327 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 6 Sep 2022 09:59:31 -0700
+Subject: [PATCH 1/2] configure: Pass _XOPEN_SOURCE when checking for strptime
+
+Include sys/time.h for gettimeofday since thats where its in glibc
+
+Upstream-Status: Submitted [https://github.com/stedolan/jq/pull/2480]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 5 ++++-
+ 1 file changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index a2cd99e..95afe06 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -139,7 +139,10 @@ AC_FUNC_ALLOCA
+ 
+ AC_FIND_FUNC([isatty], [c], [#include <unistd.h>], [0])
+ AC_FIND_FUNC([_isatty], [c], [#include <io.h>], [0])
++OLD_CFLAGS=$CFLAGS
++CFLAGS="$CFLAGS -D_XOPEN_SOURCE"
+ AC_FIND_FUNC([strptime], [c], [#include <time.h>], [0, 0, 0])
++CFLAGS=$OLD_CFLAGS
+ AC_FIND_FUNC([strftime], [c], [#include <time.h>], [0, 0, 0, 0])
+ AC_FIND_FUNC([setenv], [c], [#include <stdlib.h>], [0, 0, 0])
+ AC_FIND_FUNC([timegm], [c], [#include <time.h>], [0])
+@@ -147,7 +150,7 @@ AC_FIND_FUNC([gmtime_r], [c], [#include <time.h>], [0, 0])
+ AC_FIND_FUNC([gmtime], [c], [#include <time.h>], [0])
+ AC_FIND_FUNC([localtime_r], [c], [#include <time.h>], [0, 0])
+ AC_FIND_FUNC([localtime], [c], [#include <time.h>], [0])
+-AC_FIND_FUNC([gettimeofday], [c], [#include <time.h>], [0, 0])
++AC_FIND_FUNC([gettimeofday], [c], [#include <sys/time.h>], [0, 0])
+ AC_CHECK_MEMBER([struct tm.tm_gmtoff], [AC_DEFINE([HAVE_TM_TM_GMT_OFF],1,[Define to 1 if the system has the tm_gmt_off field in struct tm])],
+                 [], [[#include <time.h>]])
+ AC_CHECK_MEMBER([struct tm.__tm_gmtoff], [AC_DEFINE([HAVE_TM___TM_GMT_OFF],1,[Define to 1 if the system has the __tm_gmt_off field in struct tm])],
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/jq/jq/0002-builtin-Replace-_BSD_SOURCE-with-_DEFAULT_SOURCE.patch b/meta-openembedded/meta-oe/recipes-devtools/jq/jq/0002-builtin-Replace-_BSD_SOURCE-with-_DEFAULT_SOURCE.patch
new file mode 100644
index 0000000..d2f999a
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/jq/jq/0002-builtin-Replace-_BSD_SOURCE-with-_DEFAULT_SOURCE.patch
@@ -0,0 +1,30 @@
+From cda1734bed3b048c01452c798877d05b8c2f4c15 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 6 Sep 2022 10:00:59 -0700
+Subject: [PATCH 2/2] builtin: Replace _BSD_SOURCE with _DEFAULT_SOURCE
+
+newer glibc has remove _BSD_SOURCE and wants it to be replaced with _DEFAULT_SOURCE
+
+Fixes
+/usr/include/features.h:194:3: warning: "_BSD_SOURCE and _SVID_SOURCE are deprecated, use _DEFAULT_SOURCE" [-W#warnings]
+warning "_BSD_SOURCE and _SVID_SOURCE are deprecated, use _DEFAULT_SOURCE"
+
+Uptream-Status: Submitted [https://github.com/stedolan/jq/pull/2480]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/builtin.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/builtin.c b/src/builtin.c
+index 1c6b08c..2a31496 100644
+--- a/src/builtin.c
++++ b/src/builtin.c
+@@ -1,4 +1,4 @@
+-#define _BSD_SOURCE
++#define _DEFAULT_SOURCE
+ #define _GNU_SOURCE
+ #ifndef __sun__
+ # define _XOPEN_SOURCE
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/jq/jq/run-ptest b/meta-openembedded/meta-oe/recipes-devtools/jq/jq/run-ptest
new file mode 100644
index 0000000..0e4c707
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/jq/jq/run-ptest
@@ -0,0 +1,11 @@
+#!/bin/sh
+
+for test in optionaltest mantest jqtest onigtest shtest utf8test base64test; do
+	./tests/${test}
+	if [ $? -eq 0 ]; then
+		echo "PASS: ${test}"
+	else
+		echo "FAIL: ${test}"
+	fi
+done
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/jq/jq_git.bb b/meta-openembedded/meta-oe/recipes-devtools/jq/jq_git.bb
index 8b0218c..c6634cd 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/jq/jq_git.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/jq/jq_git.bb
@@ -9,11 +9,15 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=2814b59e00e7918c864fa3b6bbe049b4"
 
 PV = "1.6+git${SRCPV}"
-SRC_URI = "git://github.com/stedolan/jq;protocol=https;branch=master"
-SRCREV = "a9f97e9e61a910a374a5d768244e8ad63f407d3e"
+SRC_URI = "git://github.com/stedolan/jq;protocol=https;branch=master \
+    file://0001-configure-Pass-_XOPEN_SOURCE-when-checking-for-strpt.patch \
+    file://0002-builtin-Replace-_BSD_SOURCE-with-_DEFAULT_SOURCE.patch \
+    file://run-ptest \
+    "
+SRCREV = "cff5336ec71b6fee396a95bb0e4bea365e0cd1e8"
 S = "${WORKDIR}/git"
 
-inherit autotools-brokensep
+inherit autotools-brokensep ptest
 
 PACKAGECONFIG ?= "oniguruma"
 
@@ -25,4 +29,12 @@
     --disable-valgrind \
 "
 
+do_install_ptest() {
+    cp -rf ${B}/tests ${D}${PTEST_PATH}
+    cp -rf ${B}/.libs ${D}${PTEST_PATH}
+    # libjq.so.* is packaged in the main jq component, so remove it from ptest
+    rm -f ${D}${PTEST_PATH}/.libs/libjq.so.*
+    ln -sf ${bindir}/jq ${D}${PTEST_PATH}
+}
+
 BBCLASSEXTEND = "native"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/libgee/libgee_0.20.5.bb b/meta-openembedded/meta-oe/recipes-devtools/libgee/libgee_0.20.6.bb
similarity index 84%
rename from meta-openembedded/meta-oe/recipes-devtools/libgee/libgee_0.20.5.bb
rename to meta-openembedded/meta-oe/recipes-devtools/libgee/libgee_0.20.6.bb
index adf73d4..f7b54b9 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/libgee/libgee_0.20.5.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/libgee/libgee_0.20.6.bb
@@ -18,4 +18,4 @@
     done
 }
 
-SRC_URI[archive.sha256sum] = "31863a8957d5a727f9067495cabf0a0889fa5d3d44626e54094331188d5c1518"
+SRC_URI[archive.sha256sum] = "1bf834f5e10d60cc6124d74ed3c1dd38da646787fbf7872220b8b4068e476d4d"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/luajit/luajit/0001-Use-builtin-for-clear_cache.patch b/meta-openembedded/meta-oe/recipes-devtools/luajit/luajit/0001-Use-builtin-for-clear_cache.patch
new file mode 100644
index 0000000..2c41180
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/luajit/luajit/0001-Use-builtin-for-clear_cache.patch
@@ -0,0 +1,29 @@
+From ca8f7d968a212f2da64492faac4f80384a5ba395 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Sun, 11 Sep 2022 13:23:19 -0700
+Subject: [PATCH] Use builtin for clear_cache
+
+This makes it compile on mips/clang and also portable across platforms
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/lj_mcode.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/lj_mcode.c b/src/lj_mcode.c
+index 163aada4..471897da 100644
+--- a/src/lj_mcode.c
++++ b/src/lj_mcode.c
+@@ -46,7 +46,7 @@ void lj_mcode_sync(void *start, void *end)
+ #elif LJ_TARGET_PPC
+   lj_vm_cachesync(start, end);
+ #elif defined(__GNUC__) || defined(__clang__)
+-  __clear_cache(start, end);
++  __builtin___clear_cache(start, end);
+ #else
+ #error "Missing builtin to flush instruction cache"
+ #endif
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/luajit/luajit_git.bb b/meta-openembedded/meta-oe/recipes-devtools/luajit/luajit_git.bb
index 3f3939e..dd1cd51 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/luajit/luajit_git.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/luajit/luajit_git.bb
@@ -6,6 +6,7 @@
 SRC_URI = "git://luajit.org/git/luajit-2.0.git;protocol=http;branch=v2.1 \
            file://0001-Do-not-strip-automatically-this-leaves-the-stripping.patch \
            file://clang.patch \
+           file://0001-Use-builtin-for-clear_cache.patch \
            "
 
 # Set PV to a version tag and date (YYMMDD) associated with SRCREV if it is later.
diff --git a/meta-openembedded/meta-oe/recipes-devtools/makeself/makeself_2.4.5.bb b/meta-openembedded/meta-oe/recipes-devtools/makeself/makeself_2.4.5.bb
index e0dfc3d..4cfe2ec 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/makeself/makeself_2.4.5.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/makeself/makeself_2.4.5.bb
@@ -9,7 +9,7 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263"
 
 SRC_URI = "\
-    git://git@github.com/megastep/makeself.git;protocol=https;branch=master \
+    git://github.com/megastep/${BPN}.git;protocol=https;branch=master \
 "
 
 SRCREV = "5742be6410bfad2c619fb1e98bf795e8fa0913c7"
@@ -21,7 +21,6 @@
 
 do_install() {
     install -d ${D}${bindir}
-    install -m 0755 ${S}/makeself.1 ${D}${bindir}/
     install -m 0755 ${S}/makeself.sh ${D}${bindir}/
     install -m 0755 ${S}/makeself-header.sh ${D}${bindir}/
 }
diff --git a/meta-openembedded/meta-oe/recipes-devtools/mcpp/files/0001-configure-Fix-checks-for-system-headers.patch b/meta-openembedded/meta-oe/recipes-devtools/mcpp/files/0001-configure-Fix-checks-for-system-headers.patch
new file mode 100644
index 0000000..d9bdc01
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/mcpp/files/0001-configure-Fix-checks-for-system-headers.patch
@@ -0,0 +1,44 @@
+From c1e9f2f3d086e0df3c10a2468fd7b37fd0c5038c Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 7 Sep 2022 00:02:08 -0700
+Subject: [PATCH] configure: Fix checks for system headers
+
+Define _DEFAULT_SOURCE in system.c so unistd.h can expose readlink API
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 2 +-
+ src/system.c | 2 ++
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index cdf1eba..6fc81e6 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -152,7 +152,7 @@ fi
+ 
+ dnl Checks for header files.
+ 
+-AC_CHECK_HEADERS( [unistd.h, stdint.h, inttypes.h])
++AC_CHECK_HEADERS( [unistd.h stdint.h inttypes.h])
+ 
+ dnl Checks for typedefs, and compiler characteristics.
+ 
+diff --git a/src/system.c b/src/system.c
+index 4e008fa..98631a6 100644
+--- a/src/system.c
++++ b/src/system.c
+@@ -36,6 +36,8 @@
+  *      1. specify the constants in "configed.H" or "noconfig.H",
+  *      2. append the system-dependent routines in this file.
+  */
++
++#define _DEFAULT_SOURCE
+ #if PREPROCESSED
+ #include    "mcpp.H"
+ #else
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/mcpp/mcpp_2.7.2.bb b/meta-openembedded/meta-oe/recipes-devtools/mcpp/mcpp_2.7.2.bb
index f8125f7..9445856 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/mcpp/mcpp_2.7.2.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/mcpp/mcpp_2.7.2.bb
@@ -5,6 +5,7 @@
 
 SRC_URI = "${SOURCEFORGE_MIRROR}/${BPN}/${BPN}-${PV}.tar.gz \
            file://ice-mcpp.patch \
+           file://0001-configure-Fix-checks-for-system-headers.patch \
            file://CVE-2019-14274.patch"
 SRC_URI[md5sum] = "512de48c87ab023a69250edc7a0c7b05"
 SRC_URI[sha256sum] = "3b9b4421888519876c4fc68ade324a3bbd81ceeb7092ecdbbc2055099fcb8864"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/nlohmann-fifo/nlohmann-fifo_git.bb b/meta-openembedded/meta-oe/recipes-devtools/nlohmann-fifo/nlohmann-fifo_git.bb
index ace3215..b5d05d4 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/nlohmann-fifo/nlohmann-fifo_git.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/nlohmann-fifo/nlohmann-fifo_git.bb
@@ -8,7 +8,7 @@
 
 PV = "1.0.0+git${SRCPV}"
 
-SRCREV = "0dfbf5dacbb15a32c43f912a7e66a54aae39d0f9"
+SRCREV = "d732aaf9a315415ae8fd7eb11e3a4c1f80e42a48"
 
 UPSTREAM_CHECK_COMMITS = "1"
 
@@ -20,6 +20,9 @@
 
 BBCLASSEXTEND = "native nativesdk"
 
+# See https://github.com/SOCI/soci/issues/984
+CXXFLAGS:append:toolchain-clang:runtime-llvm = " -DCATCH_CONFIG_CPP11_NO_SHUFFLE"
+
 do_install() {
     install -d ${D}${includedir}
     install -m 0644 ${S}/src/fifo_map.hpp ${D}${includedir} 
diff --git a/meta-openembedded/meta-oe/recipes-devtools/octave/octave_7.1.0.bb b/meta-openembedded/meta-oe/recipes-devtools/octave/octave_7.2.0.bb
similarity index 93%
rename from meta-openembedded/meta-oe/recipes-devtools/octave/octave_7.1.0.bb
rename to meta-openembedded/meta-oe/recipes-devtools/octave/octave_7.2.0.bb
index c9982a7..eb96a62 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/octave/octave_7.1.0.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/octave/octave_7.2.0.bb
@@ -32,7 +32,7 @@
     ${GNU_MIRROR}/octave/${BPN}-${PV}.tar.gz \
     file://fix-blas-library-integer-size.patch \
 "
-SRC_URI[sha256sum] = "d4a9d81f3f67b4a6e07cb7a80dcb10ad5e9176fcc30762c70a81580a64b8b0b6"
+SRC_URI[sha256sum] = "b12cb652587d31c5c382b39ed73463c22a5259ecb2fa6b323a27da409222dacc"
 
 # Note: Qt5Help is required for gui -> qttools(-native) must be build with
 # clang in PACKAGECONFIG
diff --git a/meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl/0001-CheckLib.pm-don-t-execute-the-binary.patch b/meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl/0001-CheckLib.pm-don-t-execute-the-binary.patch
index 78a7e63..8d4920b 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl/0001-CheckLib.pm-don-t-execute-the-binary.patch
+++ b/meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl/0001-CheckLib.pm-don-t-execute-the-binary.patch
@@ -29,18 +29,18 @@
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/lib/Devel/CheckLib.pm b/lib/Devel/CheckLib.pm
-index e45cfb4..d228bb5 100644
+index 2e5a252..5c2f0b0 100644
 --- a/lib/Devel/CheckLib.pm
 +++ b/lib/Devel/CheckLib.pm
-@@ -424,7 +424,7 @@ sub assert_lib {
-             my $absexefile = File::Spec->rel2abs($exefile);
-             $absexefile = '"'.$absexefile.'"' if $absexefile =~ m/\s/;
-             if (!$not_execute && system($absexefile) != 0) {
--                push @wrongresult, $lib;
-+                print "Checking the lib $lib\n";
+@@ -403,7 +403,7 @@ sub assert_lib {
+             if ($execute) {
+                 my $retval = system($absexefile);
+                 warn "# return value: $retval\n" if $args{debug};
+-                push @wrongresult, $lib if $retval != 0;
++                print "Checking the lib $lib\n" if $retval != 0;
              }
-             else {
-                 if ($analyze_binary) {
+             push @wronganalysis, $lib
+                 if $analyze_binary and !$analyze_binary->($lib, $exefile);
 -- 
-2.17.1
+2.25.1
 
diff --git a/meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl_1.14.bb b/meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl_1.16.bb
similarity index 87%
rename from meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl_1.14.bb
rename to meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl_1.16.bb
index 0863bb8..74a09e7 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl_1.14.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/perl/libdev-checklib-perl_1.16.bb
@@ -11,8 +11,7 @@
 SRC_URI = "https://cpan.metacpan.org/modules/by-module/Devel/Devel-CheckLib-${PV}.tar.gz \
            file://0001-CheckLib.pm-don-t-execute-the-binary.patch \
 "
-SRC_URI[md5sum] = "3519cbf9fe5ec3404449d5330ee5537f"
-SRC_URI[sha256sum] = "f21c5e299ad3ce0fdc0cb0f41378dca85a70e8d6c9a7599f0e56a957200ec294"
+SRC_URI[sha256sum] = "869d38c258e646dcef676609f0dd7ca90f085f56cf6fd7001b019a5d5b831fca"
 
 S = "${WORKDIR}/Devel-CheckLib-${PV}"
 
diff --git a/meta-openembedded/meta-oe/recipes-devtools/perl/libio-pty-perl/0001-Tty.xs-Do-not-mark-strlcpy-as-static.patch b/meta-openembedded/meta-oe/recipes-devtools/perl/libio-pty-perl/0001-Tty.xs-Do-not-mark-strlcpy-as-static.patch
new file mode 100644
index 0000000..07c7690
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/perl/libio-pty-perl/0001-Tty.xs-Do-not-mark-strlcpy-as-static.patch
@@ -0,0 +1,45 @@
+From fae771aefc593a0ef798bc0c1e21b0524eb85e2d Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 20:32:35 -0700
+Subject: [PATCH] Tty.xs: Do not mark strlcpy as static
+
+Some libcs e.g. musl do not provide implementation of strlcpy but they
+do provide the signature in string.h, if we mark it static here then it
+conflicts with the libc define and compiler may warn/error
+
+Fixes
+Tty.xs:190:1: error: static declaration of 'strlcpy' follows non-static declaration
+strlcpy(                                                                                                                 ^
+/mnt/b/yoe/master/build/tmp/work/core2-64-yoe-linux-musl/libio-pty-perl/1.16-r0/recipe-sysroot/usr/include/string.h:86:8: note: previous declaration is here
+size_t strlcpy (char *, const char *, size_t);                                                                                  ^
+
+Upstream-Status: Submitted [https://github.com/toddr/IO-Tty/pull/33]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ Tty.xs | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/Tty.xs b/Tty.xs
+index aa638f4..4bab39d 100644
+--- a/Tty.xs
++++ b/Tty.xs
+@@ -186,11 +186,11 @@ mysignal(int sig, mysig_t act)
+  * will be copied.  Always NUL terminates (unless siz == 0).
+  * Returns strlen(src); if retval >= siz, truncation occurred.
+  */
+-static size_t
+-strlcpy(dst, src, siz)
+-        char *dst;
+-        const char *src;
+-        size_t siz;
++size_t
++strlcpy(
++        char *dst,
++        const char *src,
++        size_t siz)
+ {
+         register char *d = dst;
+         register const char *s = src;
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/perl/libio-pty-perl_1.16.bb b/meta-openembedded/meta-oe/recipes-devtools/perl/libio-pty-perl_1.16.bb
index e714851..0f1c71b 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/perl/libio-pty-perl_1.16.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/perl/libio-pty-perl_1.16.bb
@@ -3,7 +3,9 @@
 LICENSE = "Artistic-1.0 | GPL-1.0-or-later"
 LIC_FILES_CHKSUM = "file://META.yml;beginline=11;endline=12;md5=b2562f94907eeb42e8ce9d45f628e587"
 
-SRC_URI = "http://www.cpan.org/modules/by-module/IO/IO-Tty-${PV}.tar.gz"
+SRC_URI = "http://www.cpan.org/modules/by-module/IO/IO-Tty-${PV}.tar.gz \
+           file://0001-Tty.xs-Do-not-mark-strlcpy-as-static.patch \
+           "
 
 SRC_URI[md5sum] = "5ee30bf7c76f00cc69f92388ad776e2a"
 SRC_URI[sha256sum] = "8f1a09c070738adc695df903f2e7f74308dd8d991b914c0bc390a0e6021294dd"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/php/php_8.1.9.bb b/meta-openembedded/meta-oe/recipes-devtools/php/php_8.1.10.bb
similarity index 98%
rename from meta-openembedded/meta-oe/recipes-devtools/php/php_8.1.9.bb
rename to meta-openembedded/meta-oe/recipes-devtools/php/php_8.1.10.bb
index 03756e0..9fc39e8 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/php/php_8.1.9.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/php/php_8.1.10.bb
@@ -33,7 +33,7 @@
           "
 
 S = "${WORKDIR}/php-${PV}"
-SRC_URI[sha256sum] = "9ebb0e2e571db6fd5930428dcb2d19ed3e050338ec1f1347c282cae92fc086ff"
+SRC_URI[sha256sum] = "2de8e0402285f7c56887defe651922308aded58ba60befcf3b77720209e31f10"
 
 CVE_CHECK_IGNORE += "\
     CVE-2007-2728 \
diff --git a/meta-openembedded/meta-oe/recipes-devtools/pmtools/pmtools/pmtools-switch-to-dynamic-buffer-for-huge-ACPI-table.patch b/meta-openembedded/meta-oe/recipes-devtools/pmtools/pmtools/pmtools-switch-to-dynamic-buffer-for-huge-ACPI-table.patch
index 7ccdab0..3be1898 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/pmtools/pmtools/pmtools-switch-to-dynamic-buffer-for-huge-ACPI-table.patch
+++ b/meta-openembedded/meta-oe/recipes-devtools/pmtools/pmtools/pmtools-switch-to-dynamic-buffer-for-huge-ACPI-table.patch
@@ -18,22 +18,41 @@
  madt/madt.c | 18 +++++++++++++++++-
  1 file changed, 17 insertions(+), 1 deletion(-)
 
-diff --git a/madt/madt.c b/madt/madt.c
-index aed965c..8770cd5 100644
 --- a/madt/madt.c
 +++ b/madt/madt.c
-@@ -51,7 +51,9 @@ get_next_entry(acpi_table_entry_header * entry_header)
+@@ -34,14 +34,16 @@ typedef unsigned long long u64;
+ //#include <sys/mman.h>
+ #include <stdio.h> // fread
+ #include <stdlib.h> // malloc
++#include <string.h> // memset/memcpy
+ 
+ #include "./tables.c"
+ 
+ int verbose = 0;
+ /*
+-/* read standard input
++ * read standard input
+  * write decoded madt to standard output
+  */
++size_t
+ get_next_entry(acpi_table_entry_header * entry_header)
+ {
+ 	size_t retval;
+@@ -51,9 +53,11 @@ get_next_entry(acpi_table_entry_header *
  	return retval;
  }
  
 -u8	buffer[1024];
-+
+ 
+-main()
 +u8	buf[1024];
 +u8	*buffer = buf;
- 
- main()
++int
++main(int argc, char *argv[])
  {
-@@ -75,6 +77,17 @@ main()
+ 	size_t retval;
+ 	struct acpi_table_madt *madt_header;
+@@ -75,6 +79,17 @@ main()
  
  	if (verbose) printf("header.length %d\n", madt_header->header.length);
  
@@ -51,7 +70,7 @@
  	acpi_table_print((void*)&(buffer[bytes_read]), 0);
  
  	bytes_read = sizeof(struct acpi_table_madt);
-@@ -118,6 +131,9 @@ done:
+@@ -118,6 +133,9 @@ done:
  		printf("Checksum 0x%x != 0; 0x%x in header ERROR\n", csum,
  			 madt_header->header.checksum);
  
@@ -61,6 +80,3 @@
  	return 0;
  }
  
--- 
-1.8.1.2
-
diff --git a/meta-openembedded/meta-oe/recipes-devtools/unifex/unifex_git.bb b/meta-openembedded/meta-oe/recipes-devtools/unifex/unifex_git.bb
deleted file mode 100644
index 85fe39b..0000000
--- a/meta-openembedded/meta-oe/recipes-devtools/unifex/unifex_git.bb
+++ /dev/null
@@ -1,24 +0,0 @@
-DESCRIPTION = "C++ Unified Executors library"
-HOMEPAGE = "https://github.com/facebookexperimental/libunifex"
-SECTION = "libs"
-LICENSE = "Apache-2.0-with-LLVM-exception"
-LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=5b86506074cb3cdc493b4f96b73b2909"
-
-SRC_URI = "git://github.com/facebookexperimental/libunifex.git;branch=main;protocol=https"
-SRCREV = "9df21c58d34ce8a1cd3b15c3a7347495e29417a0"
-
-S = "${WORKDIR}/git"
-
-inherit cmake pkgconfig
-
-PACKAGECONFIG ??= "liburing"
-PACKAGECONFIG[liburing] = ",,liburing"
-
-EXTRA_OECMAKE += " \
-    -DBUILD_SHARED_LIBS=ON \
-    -DBUILD_TESTING=OFF \
-    -DCMAKE_CXX_STANDARD=20 \
-    -DUNIFEX_BUILD_EXAMPLES=OFF \
-    "
-
-BBCLASSEXTEND = "native nativesdk"
diff --git a/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0001-Define-prototype-for-safe_flock.patch b/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0001-Define-prototype-for-safe_flock.patch
new file mode 100644
index 0000000..11f1c18
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0001-Define-prototype-for-safe_flock.patch
@@ -0,0 +1,105 @@
+From c512c877a7ca933bee980dcc1268a7319f233d59 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 00:07:47 -0700
+Subject: [PATCH] Define prototype for safe_flock
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/osdep/unix/env_unix.c | 3 +++
+ src/osdep/unix/mbx.c      | 2 ++
+ src/osdep/unix/os_lnx.h   | 2 ++
+ src/osdep/unix/os_slx.h   | 3 +++
+ src/osdep/unix/unix.c     | 4 ++++
+ 5 files changed, 14 insertions(+)
+
+diff --git a/src/osdep/unix/env_unix.c b/src/osdep/unix/env_unix.c
+index 6b2c447..cefefca 100644
+--- a/src/osdep/unix/env_unix.c
++++ b/src/osdep/unix/env_unix.c
+@@ -59,6 +59,9 @@
+ #define S_IXOTH (S_IEXEC >> 6)
+ #endif
+ 
++
++extern int safe_flock (int fd,int op);
++
+ /* c-client environment parameters */
+ 
+ static char *myUserName = NIL;	/* user name */
+diff --git a/src/osdep/unix/mbx.c b/src/osdep/unix/mbx.c
+index 1ece5d8..c8a45a5 100644
+--- a/src/osdep/unix/mbx.c
++++ b/src/osdep/unix/mbx.c
+@@ -41,12 +41,14 @@ extern int errno;		/* just in case */
+ #include "mail.h"
+ #include "osdep.h"
+ #include <pwd.h>
++#include <utime.h>
+ #include <sys/stat.h>
+ #include <sys/time.h>
+ #include "misc.h"
+ #include "dummy.h"
+ #include "fdstring.h"
+ 
++extern int safe_flock (int fd,int op);
+ 
+ /* Build parameters */
+ 
+diff --git a/src/osdep/unix/os_lnx.h b/src/osdep/unix/os_lnx.h
+index b5f39ff..22c216b 100644
+--- a/src/osdep/unix/os_lnx.h
++++ b/src/osdep/unix/os_lnx.h
+@@ -57,6 +57,8 @@
+ 
+ #define direct dirent
+ 
++extern int safe_flock (int fd,int op);
++
+ #define flock safe_flock
+ 
+ 
+diff --git a/src/osdep/unix/os_slx.h b/src/osdep/unix/os_slx.h
+index b5f39ff..c9adbcd 100644
+--- a/src/osdep/unix/os_slx.h
++++ b/src/osdep/unix/os_slx.h
+@@ -46,11 +46,14 @@
+ #include <sys/types.h>
+ #include <dirent.h>
+ #include <time.h>		/* for struct tm */
++#include <utime.h>		/* for struct tm */
+ #include <fcntl.h>
+ #include <syslog.h>
+ #include <sys/file.h>
+ 
+ 
++extern int safe_flock (int fd,int op);
++
+ /* Linux gets this wrong */
+ 
+ #define setpgrp setpgid
+diff --git a/src/osdep/unix/unix.c b/src/osdep/unix/unix.c
+index be3c437..86be3f9 100644
+--- a/src/osdep/unix/unix.c
++++ b/src/osdep/unix/unix.c
+@@ -45,6 +45,7 @@ extern int errno;		/* just in case */
+ #include "mail.h"
+ #include "osdep.h"
+ #include <time.h>
++#include <utime.h>
+ #include <sys/stat.h>
+ #include "unix.h"
+ #include "pseudo.h"
+@@ -52,6 +53,9 @@ extern int errno;		/* just in case */
+ #include "misc.h"
+ #include "dummy.h"
+ 
++
++extern int safe_flock (int fd,int op);
++
+ /* UNIX I/O stream local data */
+ 
+ typedef struct unix_local {
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0001-Do-not-build-mtest.patch b/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0001-Do-not-build-mtest.patch
new file mode 100644
index 0000000..fd2f30c
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0001-Do-not-build-mtest.patch
@@ -0,0 +1,38 @@
+From f92becaf97be16a28013693cd99bac92c54074f2 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 7 Sep 2022 13:54:58 -0700
+Subject: [PATCH 1/2] Do not build mtest
+
+its a test utility which is not generally used. We need to disable it
+because it uses gets() function which is not available in glibc, if we
+want to use it then port it to use something like fgets
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ Makefile | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/Makefile b/Makefile
+index cf6d405..1e2d0fb 100644
+--- a/Makefile
++++ b/Makefile
+@@ -669,7 +669,6 @@ an ua:
+ 	$(TOOLS)/$@ "$(LN)" src/ansilib c-client
+ 	$(TOOLS)/$@ "$(LN)" src/charset c-client
+ 	$(TOOLS)/$@ "$(LN)" src/osdep/$(SYSTEM) c-client
+-	$(TOOLS)/$@ "$(LN)" src/mtest mtest
+ 	$(TOOLS)/$@ "$(LN)" src/ipopd ipopd
+ 	$(TOOLS)/$@ "$(LN)" src/imapd imapd
+ 	$(TOOLS)/$@ "$(LN)" src/mailutil mailutil
+@@ -706,7 +705,6 @@ rebuildclean:
+ 
+ bundled:
+ 	@echo Building bundled tools...
+-	$(CD) mtest;$(MAKE)
+ 	$(CD) ipopd;$(MAKE)
+ 	$(CD) imapd;$(MAKE)
+ 	$(CD) mailutil;$(MAKE)
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0002-tmail-Include-ctype.h-for-isdigit.patch b/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0002-tmail-Include-ctype.h-for-isdigit.patch
new file mode 100644
index 0000000..5778a00
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap/0002-tmail-Include-ctype.h-for-isdigit.patch
@@ -0,0 +1,26 @@
+From fbd00d93cc07fa5da20414b355fffe628dcd37b3 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 7 Sep 2022 13:57:19 -0700
+Subject: [PATCH 2/2] tmail: Include ctype.h for isdigit
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/tmail/tmail.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/src/tmail/tmail.c b/src/tmail/tmail.c
+index ed5fc58..a9e3645 100644
+--- a/src/tmail/tmail.c
++++ b/src/tmail/tmail.c
+@@ -26,6 +26,7 @@
+  * Last Edited:	30 October 2008
+  */
+ 
++#include <ctype.h>		/* for isdigit */
+ #include <stdio.h>
+ #include <pwd.h>
+ #include <errno.h>
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap_2007f.bb b/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap_2007f.bb
index df90b62..9fb12b2 100644
--- a/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap_2007f.bb
+++ b/meta-openembedded/meta-oe/recipes-devtools/uw-imap/uw-imap_2007f.bb
@@ -11,6 +11,9 @@
            file://imap-2007e-shared.patch \
            file://imap-2007f-format-security.patch \
            file://0001-Support-OpenSSL-1.1.patch \
+           file://0001-Define-prototype-for-safe_flock.patch \
+           file://0001-Do-not-build-mtest.patch \
+           file://0002-tmail-Include-ctype.h-for-isdigit.patch \
            "
 
 SRC_URI[md5sum] = "2126fd125ea26b73b20f01fcd5940369"
@@ -25,7 +28,7 @@
 PACKAGECONFIG ??= "${@bb.utils.filter('DISTRO_FEATURES', 'pam', d)}"
 PACKAGECONFIG[pam] = ",,libpam"
 
-EXTRA_OEMAKE = "CC='${CC}' ARRC='${AR} -rc' RANLIB='${RANLIB}'"
+EXTRA_OEMAKE = "CC='${CC} -std=c99 -D_GNU_SOURCE' ARRC='${AR} -rc' RANLIB='${RANLIB}'"
 
 HEADERS = "src/c-client/*.h src/osdep/unix/*.h c-client/auths.c c-client/linkage.c c-client/linkage.h c-client/osdep.h"
 
diff --git a/meta-openembedded/meta-oe/recipes-extended/collectd/collectd_5.12.0.bb b/meta-openembedded/meta-oe/recipes-extended/collectd/collectd_5.12.0.bb
index dd97796..5dc6458 100644
--- a/meta-openembedded/meta-oe/recipes-extended/collectd/collectd_5.12.0.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/collectd/collectd_5.12.0.bb
@@ -51,6 +51,7 @@
 PACKAGECONFIG[ldap] = "--enable-openldap --with-libldap,--disable-openldap --without-libldap, openldap"
 PACKAGECONFIG[rrdtool] = "--enable-rrdtool,--disable-rrdtool,rrdtool"
 PACKAGECONFIG[rrdcached] = "--enable-rrdcached,--disable-rrdcached,rrdcached"
+PACKAGECONFIG[python] = "--enable-python,--disable-python"
 
 EXTRA_OECONF = " \
                 ${FPLAYOUT} \
diff --git a/meta-openembedded/meta-oe/recipes-extended/enscript/enscript/0001-getopt-Include-string.h-for-strcmp-stcncmp-functions.patch b/meta-openembedded/meta-oe/recipes-extended/enscript/enscript/0001-getopt-Include-string.h-for-strcmp-stcncmp-functions.patch
new file mode 100644
index 0000000..a080b3a
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/enscript/enscript/0001-getopt-Include-string.h-for-strcmp-stcncmp-functions.patch
@@ -0,0 +1,27 @@
+From faec0206611f8ea4ca6f70987866077ac8c3c6c1 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 21:24:27 -0700
+Subject: [PATCH] getopt: Include string.h for strcmp/stcncmp functions
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ compat/getopt.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/compat/getopt.c b/compat/getopt.c
+index 752f28a..9b984b4 100644
+--- a/compat/getopt.c
++++ b/compat/getopt.c
+@@ -43,6 +43,7 @@
+ #endif
+ 
+ #include <stdio.h>
++#include <string.h> /* strcmp */
+ 
+ /* Comment out all this code if we are using the GNU C Library, and are not
+    actually compiling the library itself.  This code is part of the GNU C
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/enscript/enscript_1.6.6.bb b/meta-openembedded/meta-oe/recipes-extended/enscript/enscript_1.6.6.bb
index 3f00621..9490ee0 100644
--- a/meta-openembedded/meta-oe/recipes-extended/enscript/enscript_1.6.6.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/enscript/enscript_1.6.6.bb
@@ -13,6 +13,7 @@
 SRC_URI = "${GNU_MIRROR}/${BPN}/${BP}.tar.gz \
            file://enscript-autoconf.patch \
            file://0001-Fix-builds-with-recent-gettext.patch \
+           file://0001-getopt-Include-string.h-for-strcmp-stcncmp-functions.patch \
            "
 
 inherit autotools gettext
diff --git a/meta-openembedded/meta-oe/recipes-extended/fluentbit/fluentbit/0001-Revert-Remove-unused-variable-in-mpi_mul_hlp.patch b/meta-openembedded/meta-oe/recipes-extended/fluentbit/fluentbit/0001-Revert-Remove-unused-variable-in-mpi_mul_hlp.patch
deleted file mode 100644
index 8a165dc..0000000
--- a/meta-openembedded/meta-oe/recipes-extended/fluentbit/fluentbit/0001-Revert-Remove-unused-variable-in-mpi_mul_hlp.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From af6cefba8c2675f58b75f93785337ab23054568c Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Thu, 18 Aug 2022 23:35:23 -0700
-Subject: [PATCH] Revert Remove unused variable in mpi_mul_hlp()
-
-This reverts
-https://github.com/Mbed-TLS/mbedtls/commit/e7f14a3090e6595eb3c8d821704ad9c90f6d3712
-
-Which helps in compiling the x86 asm code.
-
-Upstream-Status: Pending
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- lib/mbedtls-2.28.0/library/bignum.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/lib/mbedtls-2.28.0/library/bignum.c b/lib/mbedtls-2.28.0/library/bignum.c
-index 9c256ae..62e7f76 100644
---- a/lib/mbedtls-2.28.0/library/bignum.c
-+++ b/lib/mbedtls-2.28.0/library/bignum.c
-@@ -1392,7 +1392,7 @@ void mpi_mul_hlp( size_t i,
-                   mbedtls_mpi_uint *d,
-                   mbedtls_mpi_uint b )
- {
--    mbedtls_mpi_uint c = 0;
-+    mbedtls_mpi_uint c = 0, t = 0;
- 
- #if defined(MULADDC_HUIT)
-     for( ; i >= 8; i -= 8 )
-@@ -1443,6 +1443,8 @@ void mpi_mul_hlp( size_t i,
-     }
- #endif /* MULADDC_HUIT */
- 
-+    t++;
-+
-     while( c != 0 )
-     {
-         *d += c; c = ( *d < c ); d++;
--- 
-2.37.2
-
diff --git a/meta-openembedded/meta-oe/recipes-extended/fluentbit/fluentbit_1.9.7.bb b/meta-openembedded/meta-oe/recipes-extended/fluentbit/fluentbit_1.9.7.bb
index a1f8794..56ba72b 100644
--- a/meta-openembedded/meta-oe/recipes-extended/fluentbit/fluentbit_1.9.7.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/fluentbit/fluentbit_1.9.7.bb
@@ -22,8 +22,8 @@
            file://0004-Use-correct-type-to-store-return-from-flb_kv_item_cr.patch \
            file://0005-stackdriver-Fix-return-type-mismatch.patch \
            file://0006-monkey-Fix-TLS-detection-testcase.patch \
-           file://0001-Revert-Remove-unused-variable-in-mpi_mul_hlp.patch \
            "
+SRC_URI:remove:x86 = "file://0002-mbedtls-Remove-unused-variable.patch"
 SRC_URI:append:libc-musl = "\
            file://0001-Use-posix-strerror_r-with-musl.patch \
            file://0002-chunkio-Link-with-fts-library-with-musl.patch \
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0001-common-utils-Include-string.h-for-strcasestr.patch b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0001-common-utils-Include-string.h-for-strcasestr.patch
new file mode 100644
index 0000000..659eca4
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0001-common-utils-Include-string.h-for-strcasestr.patch
@@ -0,0 +1,44 @@
+From 20984c73bea8c3df00f297176edd4f6d47c31b55 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 17:49:20 -0700
+Subject: [PATCH 1/4] common/utils: Include string.h for strcasestr
+
+Also define _GNU_SOURCE for the same
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ common/utils.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/common/utils.c
++++ b/common/utils.c
+@@ -1,9 +1,11 @@
++#define _GNU_SOURCE
+ #include "utils.h"
+ #include "string.h"
+ #include <dlfcn.h>
+ #include <sys/stat.h>
+ #include <errno.h>
+ #include <stdlib.h>
++#include <string.h> /* strcasestr */
+ 
+ extern int errno;
+ 
+--- a/protocol/hp_ipp.c
++++ b/protocol/hp_ipp.c
+@@ -18,12 +18,13 @@ Boston, MA 02110-1301, USA.
+ 
+ \******************************************************************************/
+ 
+-
++#define _GNU_SOURCE
+ #include <cups/cups.h>
+ #include <cups/language.h>
+ #include <cups/ppd.h>
+ #include <syslog.h>
+ #include <stdarg.h>
++#include <string.h> /* strcasecmp */
+ #include <sys/types.h>
+ #include <pwd.h>
+ #include <sys/stat.h> 
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0002-Add-ImageProcessor-only-when-DISBALE_IMAGEPROCESSOR_.patch b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0002-Add-ImageProcessor-only-when-DISBALE_IMAGEPROCESSOR_.patch
new file mode 100644
index 0000000..2cfe125
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0002-Add-ImageProcessor-only-when-DISBALE_IMAGEPROCESSOR_.patch
@@ -0,0 +1,32 @@
+From 3d53d02af7c45763eb33f7bbe5f9e389fbcb7e21 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 17:55:48 -0700
+Subject: [PATCH 2/4] Add ImageProcessor only when DISBALE_IMAGEPROCESSOR_BUILD
+ is not set
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ Makefile.am | 6 +++++-
+ 1 file changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/Makefile.am b/Makefile.am
+index 5f75759..73421b1 100644
+--- a/Makefile.am
++++ b/Makefile.am
+@@ -597,7 +597,11 @@ hpcups_SOURCES = prnt/hpcups/HPCupsFilter.cpp prnt/hpcups/HPCupsFilter.h prnt/hp
+ 	prnt/hpcups/ImageProcessor.h
+ 
+ hpcups_CXXFLAGS = $(APDK_ENDIAN_FLAG) $(DBUS_CFLAGS)
+-hpcups_LDADD = -L./prnt/hpcups/ -ljpeg -ldl -lImageProcessor -lcups -lcupsimage -lz $(DBUS_LIBS)
++hpcups_LDADD = -L./prnt/hpcups/ -ljpeg -ldl -lcups -lcupsimage -lz $(DBUS_LIBS)
++if !DISBALE_IMAGEPROCESSOR_BUILD
++hpcups_LDADD += "-lImageProcessor"
++endif #DISABLE_IMAGEPROCESSOR
++
+ #else
+ #hpcupsdir = $(cupsfilterdir)
+ #hpcups_PROGRAMS = hpcups
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0003-pserror.c-Define-column-to-be-int-explcitly.patch b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0003-pserror.c-Define-column-to-be-int-explcitly.patch
new file mode 100644
index 0000000..78325ac
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0003-pserror.c-Define-column-to-be-int-explcitly.patch
@@ -0,0 +1,21 @@
+From a27d6264671e7201b5d78bcc9200e7d946429979 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 17:57:53 -0700
+Subject: [PATCH 3/4] pserror.c: Define column to be int explcitly
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ prnt/hpps/pserror.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/prnt/hpps/pserror.c
++++ b/prnt/hpps/pserror.c
+@@ -24,7 +24,7 @@ extern char *program ;	/* Defined by mai
+ void message(int flags, char *format, ...)
+ {
+   va_list args ;
+-  static column = 0 ;		/* current screen column for message wrap */
++  static int column = 0 ;	/* current screen column for message wrap */
+   char msgbuf[MAX_MESSAGE] ;	/* buffer in which to put the message */
+   char *bufptr = msgbuf ;	/* message buffer pointer */
+ 
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0004-Define-missing-prototype-for-functions.patch b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0004-Define-missing-prototype-for-functions.patch
new file mode 100644
index 0000000..276d025
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0004-Define-missing-prototype-for-functions.patch
@@ -0,0 +1,53 @@
+From 33454817880fa57b2226dd40b724e5c3d6074aca Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 17:58:33 -0700
+Subject: [PATCH 4/4] Define missing prototype for functions
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ prnt/cupsext/cupsext.c | 1 +
+ protocol/hp_ipp.c      | 4 ++--
+ 2 files changed, 3 insertions(+), 2 deletions(-)
+
+--- a/prnt/cupsext/cupsext.c
++++ b/prnt/cupsext/cupsext.c
+@@ -101,6 +101,11 @@ typedef int Py_ssize_t;
+ #define _STRINGIZE(x) #x
+ #define STRINGIZE(x) _STRINGIZE(x)
+ 
++void _releaseCupsInstance(void);
++int addCupsPrinter(char *name, char *device_uri, char *location, char *ppd_file, char *model, char *info);
++int setDefaultCupsPrinter(char *pr_name);
++int delCupsPrinter(char *pr_name);
++int controlCupsPrinter(char *pr_name, int op);
+ 
+ //static http_t * http = NULL;     /* HTTP object */
+ 
+--- a/protocol/hp_ipp.c
++++ b/protocol/hp_ipp.c
+@@ -22,6 +22,7 @@ Boston, MA 02110-1301, USA.
+ #include <cups/cups.h>
+ #include <cups/language.h>
+ #include <cups/ppd.h>
++#include <stdio.h>
+ #include <syslog.h>
+ #include <stdarg.h>
+ #include <string.h> /* strcasecmp */
+@@ -42,7 +43,7 @@ Boston, MA 02110-1301, USA.
+ #define STRINGIZE(x) _STRINGIZE(x)
+ 
+ 
+-http_t* acquireCupsInstance()
++http_t* acquireCupsInstance(void)
+ {
+     if ( http == NULL)
+     {
+@@ -53,7 +54,7 @@ http_t* acquireCupsInstance()
+ }
+ 
+ 
+-void _releaseCupsInstance()
++void _releaseCupsInstance(void)
+ {
+     if (http)
+     {
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0005-hp_ipp.c-Add-printf-format-to-snprintf-calls.patch b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0005-hp_ipp.c-Add-printf-format-to-snprintf-calls.patch
new file mode 100644
index 0000000..d844e49
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0005-hp_ipp.c-Add-printf-format-to-snprintf-calls.patch
@@ -0,0 +1,62 @@
+From 4b3014df3990d90d6929510f2bde073171503329 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 18:18:44 -0700
+Subject: [PATCH] hp_ipp.c: Add printf format to snprintf calls
+
+Avoid -Wformat warnings
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ protocol/hp_ipp.c | 14 +++++++-------
+ 1 file changed, 7 insertions(+), 7 deletions(-)
+
+diff --git a/protocol/hp_ipp.c b/protocol/hp_ipp.c
+index 597d9b9..a027baf 100644
+--- a/protocol/hp_ipp.c
++++ b/protocol/hp_ipp.c
+@@ -112,7 +112,7 @@ int addCupsPrinter(char *name, char *device_uri, char *location, char *ppd_file,
+      }
+ 
+      if ( info == NULL )
+-        snprintf( info,sizeof(info), name );
++        snprintf( info,sizeof(info), "%s", name );
+ 
+      sprintf( printer_uri, "ipp://localhost/printers/%s", name );
+ 
+@@ -513,27 +513,27 @@ int __parsePrinterAttributes(ipp_t *response, printer_t **printer_list)
+ 
+              if ( strcmp(attr_name, "printer-name") == 0 &&
+                                         val_tag == IPP_TAG_NAME ) {
+-                  snprintf(t_printer->name, sizeof(t_printer->name),ippGetString(attr, 0, NULL) );
++                  snprintf(t_printer->name, sizeof(t_printer->name), "%s", ippGetString(attr, 0, NULL) );
+              }
+              else if ( strcmp(attr_name, "device-uri") == 0 &&
+                                          val_tag == IPP_TAG_URI ) {
+-                  snprintf(t_printer->device_uri,sizeof(t_printer->device_uri), ippGetString(attr, 0, NULL) );
++                  snprintf(t_printer->device_uri,sizeof(t_printer->device_uri), "%s", ippGetString(attr, 0, NULL) );
+              }
+              else if ( strcmp(attr_name, "printer-uri-supported") == 0 &&
+                                                  val_tag == IPP_TAG_URI ) {
+-                  snprintf(t_printer->printer_uri,sizeof(t_printer->printer_uri), ippGetString(attr, 0, NULL) );
++                  snprintf(t_printer->printer_uri,sizeof(t_printer->printer_uri), "%s", ippGetString(attr, 0, NULL) );
+              }
+              else if ( strcmp(attr_name, "printer-info") == 0 &&
+                                         val_tag == IPP_TAG_TEXT ) {
+-                  snprintf(t_printer->info,sizeof(t_printer->info), ippGetString(attr, 0, NULL) );
++                  snprintf(t_printer->info,sizeof(t_printer->info), "%s", ippGetString(attr, 0, NULL) );
+              }
+              else if ( strcmp(attr_name, "printer-location") == 0 &&
+                                            val_tag == IPP_TAG_TEXT ) {
+-                  snprintf(t_printer->location,sizeof(t_printer->location),ippGetString(attr, 0, NULL) );
++                  snprintf(t_printer->location,sizeof(t_printer->location), "%s", ippGetString(attr, 0, NULL) );
+              }
+              else if ( strcmp(attr_name, "printer-make-and-model") == 0 &&
+                                                   val_tag == IPP_TAG_TEXT ) {
+-                  snprintf(t_printer->make_model,sizeof(t_printer->make_model),ippGetString(attr, 0, NULL) );
++                  snprintf(t_printer->make_model,sizeof(t_printer->make_model), "%s", ippGetString(attr, 0, NULL) );
+              } 
+              else if ( strcmp(attr_name, "printer-state") == 0 &&
+                                              val_tag == IPP_TAG_ENUM ) {
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0006-Workaround-patch-for-missing-Python3-transition-of-t.patch b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0006-Workaround-patch-for-missing-Python3-transition-of-t.patch
new file mode 100644
index 0000000..5d78bb3
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/0006-Workaround-patch-for-missing-Python3-transition-of-t.patch
@@ -0,0 +1,130 @@
+From: Till Kamppeter <till.kamppeter@gmail.com>
+Date: Fri, 22 Jul 2016 09:33:04 +0200
+Subject: Workaround patch for missing Python3 transition of the old
+ (pre-USB-storage) photo memory card support (pcardext) as this part builds
+ in Python3 environments but with pointer-related warnings which are fatal
+ errors for Ubuntu's build servers. The patch silences the warnings but the
+ memory card support is dropped in Python3 environments. This patch is
+ supplied by the HPLIP upstream developers and will be replaced by a more
+ proper solution in the next upstream release of HPLIP (see LP: #1275353)
+
+---
+ pcard/pcardext/pcardext.c | 59 +++++++++++++++++++++++++++++++++++++----------
+ pcard/photocard.py        |  2 +-
+ unload.py                 |  5 ++++
+ 3 files changed, 53 insertions(+), 13 deletions(-)
+
+--- a/pcard/pcardext/pcardext.c
++++ b/pcard/pcardext/pcardext.c
+@@ -20,7 +20,7 @@ pcardext - Python extension for HP photo
+ Requires:
+ Python 2.2+
+ 
+-Author: Don Welch
++Author: Don Welch
+ 
+ \*****************************************************************************/
+ 
+@@ -41,9 +41,37 @@ typedef int Py_ssize_t;
+ 
+ int verbose=0;
+ 
++#if PY_MAJOR_VERSION >= 3
++  #define MOD_ERROR_VAL NULL
++  #define MOD_SUCCESS_VAL(val) val
++  #define MOD_INIT(name) PyMODINIT_FUNC PyInit_##name(void)
++  #define PyInt_AS_LONG PyLong_AS_LONG
++  #define MOD_DEF(ob, name, doc, methods) \
++          static struct PyModuleDef moduledef = { \
++            PyModuleDef_HEAD_INIT, name, doc, -1, methods, }; \
++          ob = PyModule_Create(&moduledef);
++
++
++  #define PY_String_Bytes  PyBytes_FromStringAndSize
++  #define PY_AsString_Bytes  PyBytes_AsStringAndSize
++
++#else
++  #define MOD_ERROR_VAL
++  #define MOD_SUCCESS_VAL(val)
++  #define MOD_INIT(name) void init##name(void)
++  #define MOD_DEF(ob, name, doc, methods)         \
++        ob = Py_InitModule3(name, methods, doc);
++
++  #define PY_String_Bytes PyString_FromStringAndSize
++  #define PY_AsString_Bytes PyString_AsStringAndSize
++
++#endif
++
+ PyObject * readsectorFunc = NULL;
+ PyObject * writesectorFunc = NULL;
+ 
++
++
+ int ReadSector(int sector, int nsector, void *buf, int size)
+ {
+     PyObject * result;
+@@ -59,9 +87,13 @@ int ReadSector(int sector, int nsector,
+         if( result )
+         {
+             Py_ssize_t len = 0;
+-            PyString_AsStringAndSize( result, &result_str, &len );
++
++            //PyString_AsStringAndSize( result, &result_str, &len );
++            //PyBytes_AsStringAndSize( result, &result_str, &len );
++            PY_AsString_Bytes( result, &result_str, &len );
+             
+-            if( len < nsector*FAT_HARDSECT )
++
++	    if( len < nsector*FAT_HARDSECT )
+             {
+                 goto abort;
+             }
+@@ -208,7 +240,9 @@ PyObject * pcardext_read( PyObject * sel
+     
+     if( FatReadFileExt( name, offset, len, buffer ) == len )
+     {
+-        return PyString_FromStringAndSize( (char *)buffer, len );
++        // return PyString_FromStringAndSize( (char *)buffer, len );
++        return PY_String_Bytes( (char *)buffer, len );
++        // return PyBytes_FromStringAndSize( (char *)buffer, len );
+     }
+     else
+     {
+@@ -236,14 +270,15 @@ static PyMethodDef pcardext_methods[] =
+ 
+ static char pcardext_documentation[] = "Python extension for HP photocard services";
+ 
+-void initpcardext( void )
+-{
+-    PyObject * mod = Py_InitModule4( "pcardext", pcardext_methods, 
+-                                     pcardext_documentation, (PyObject*)NULL, 
+-                                     PYTHON_API_VERSION );
+-                     
+-    if (mod == NULL)
+-      return;
++MOD_INIT(pcardext)  {
++
++  PyObject* mod ;
++  MOD_DEF(mod, "pcardext", pcardext_documentation, pcardext_methods);
++  if (mod == NULL)
++    return MOD_ERROR_VAL;
++
++  return MOD_SUCCESS_VAL(mod);
++
+ }
+ 
+ 
+--- a/unload.py
++++ b/unload.py
+@@ -44,6 +44,11 @@ except ImportError:
+ 
+ # Local
+ from base.g import *
++from base.sixext import PY3
++if PY3:
++    log.error("This functionality is not spported in python3 environment.")
++    sys.exit(1)
++
+ from base import device, utils, tui, module
+ from prnt import cups
+ 
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/999-remove-lImageProcessor.patch b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/999-remove-lImageProcessor.patch
deleted file mode 100644
index aee4ac5..0000000
--- a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/999-remove-lImageProcessor.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-# ../bin/ld: cannot find -lImageProcessor
---- a/Makefile.am
-+++ b/Makefile.am
-@@ -590,11 +590,10 @@ hpcups_SOURCES = prnt/hpcups/HPCupsFilte
- 	prnt/hpcups/flate_colorspace.h prnt/hpcups/RunLenEncoding.h prnt/hpcups/common_defines.h \
- 	prnt/hpcups/genPCLm.h \
- 	common/utils.c common/utils.h prnt/hpcups/Hbpl1_Wrapper.cpp prnt/hpcups/genPCLm.cpp \
--	prnt/hpcups/genJPEGStrips.cpp prnt/hpcups/RunLenEncoding.cpp \
--	prnt/hpcups/ImageProcessor.h
-+	prnt/hpcups/genJPEGStrips.cpp prnt/hpcups/RunLenEncoding.cpp
- 
- hpcups_CXXFLAGS = $(APDK_ENDIAN_FLAG) $(DBUS_CFLAGS)
--hpcups_LDADD = -L./prnt/hpcups/ -ljpeg -ldl -lImageProcessor -lcups -lcupsimage -lz $(DBUS_LIBS)
-+hpcups_LDADD = -L./prnt/hpcups/ -ljpeg -ldl -lcups -lcupsimage -lz $(DBUS_LIBS)
- #else
- #hpcupsdir = $(cupsfilterdir)
- #hpcups_PROGRAMS = hpcups
---- a/prnt/hpcups/HPCupsFilter.cpp
-+++ b/prnt/hpcups/HPCupsFilter.cpp
-@@ -637,16 +637,10 @@ int HPCupsFilter::processRasterData(cups
- 
- 
-     sprintf(hpPreProcessedRasterFile, "%s/hp_%s_cups_SwapedPagesXXXXXX",CUPS_TMP_DIR, m_JA.user_name);
--    image_processor_t* imageProcessor = imageProcessorCreate();
- 
-     while (cupsRasterReadHeader2(cups_raster, &cups_header))
-     {
- 
--        IMAGE_PROCESSOR_ERROR result = imageProcessorStartPage(imageProcessor, &cups_header);
--        if (result != IPE_SUCCESS){
--            dbglog("DEBUG: imageProcessorStartPage failed result = %d\n", result);
--        }
--
-         current_page_number++;
- 
-         if (current_page_number == 1) {
-@@ -745,11 +739,6 @@ int HPCupsFilter::processRasterData(cups
-             color_raster = rgbRaster;
-             black_raster = kRaster;
- 
--            result = imageProcessorProcessLine(imageProcessor, m_pPrinterBuffer, cups_header.cupsBytesPerLine);
--            if (result != IPE_SUCCESS){
--                dbglog("DEBUG: imageProcessorProcessLine failed result = %d\n", result);
--            }
--
- 
-             if ((y == 0) && !is_ljmono) {
-                 //For ljmono, make sure that first line is not a blankRaster line.Otherwise printer
-@@ -780,11 +769,6 @@ int HPCupsFilter::processRasterData(cups
-             }
-         }  // for() loop end
- 
--        result = imageProcessorEndPage(imageProcessor);
--        if (result != IPE_SUCCESS){
--                dbglog("DEBUG: imageProcessorEndPage failed result = %d\n", result);
--        }
--
- 
-         m_Job.NewPage();
-         if (err != NO_ERROR) {
-@@ -800,8 +784,6 @@ int HPCupsFilter::processRasterData(cups
-         rgbRaster = NULL;
-     }
- 
--    imageProcessorDestroy(imageProcessor);
--
-     unlink(hpPreProcessedRasterFile);
-     return ret_status;
- }
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/configure.patch b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/configure.patch
index 8fe77c5..571d5fe 100644
--- a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/configure.patch
+++ b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip/configure.patch
@@ -1,10 +1,9 @@
 --- a/configure.in
 +++ b/configure.in
-@@ -27,8 +27,7 @@
+@@ -30,7 +30,7 @@
+ AC_INIT([HP Linux Imaging and Printing], [3.22.6], [3.22.6], [hplip])
  
- #AC_PREREQ(2.59)
- AC_INIT([HP Linux Imaging and Printing], [3.19.12], [3.19.12], [hplip])
--#AM_INIT_AUTOMAKE([1.9 foreign])
+ #AM_INIT_AUTOMAKE([1.9 foreign])
 -AM_INIT_AUTOMAKE
 +AM_INIT_AUTOMAKE([foreign])
  AC_DISABLE_STATIC
diff --git a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip_3.19.12.bb b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip_3.22.6.bb
similarity index 80%
rename from meta-openembedded/meta-oe/recipes-extended/hplip/hplip_3.19.12.bb
rename to meta-openembedded/meta-oe/recipes-extended/hplip/hplip_3.22.6.bb
index ac845ff..b746006 100644
--- a/meta-openembedded/meta-oe/recipes-extended/hplip/hplip_3.19.12.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/hplip/hplip_3.22.6.bb
@@ -5,14 +5,18 @@
 SRC_URI = "${SOURCEFORGE_MIRROR}/${BPN}/${BP}.tar.gz \
            file://configure.patch \
            file://fix-libusb-paths.patch \
-           file://999-remove-lImageProcessor.patch \
            file://600-fix.patch \
            file://030-replace_unsafe_memcpy_with_memmove.patch \
            file://050-fix-glibcisms.patch \
            file://hplip-3.19.6-fix-return.patch \
+           file://0001-common-utils-Include-string.h-for-strcasestr.patch \
+           file://0002-Add-ImageProcessor-only-when-DISBALE_IMAGEPROCESSOR_.patch \
+           file://0003-pserror.c-Define-column-to-be-int-explcitly.patch \
+           file://0004-Define-missing-prototype-for-functions.patch \
+           file://0005-hp_ipp.c-Add-printf-format-to-snprintf-calls.patch \
+           file://0006-Workaround-patch-for-missing-Python3-transition-of-t.patch \
 "
-SRC_URI[md5sum] = "d72bc77d791c150c2c22b84e9553bab3"
-SRC_URI[sha256sum] = "b7f398502fb659e0de8e54976237e3c6a64fec0b3c36054a515876f7b006b255"
+SRC_URI[sha256sum] = "27ed0d492febb0b47c656234820d3ce573b24ff5b62e3bf4b2c47f82868d6bb4"
 
 DEPENDS += "cups python3 libusb"
 
@@ -25,6 +29,7 @@
 
 EXTRA_OECONF += "\
         LIBUSBINCLUDEROOT=${STAGING_INCDIR} \
+        --enable-cups-drv-install \
         --enable-cups-ppd-install \
         --disable-network-build \
         --disable-doc-build \
@@ -39,6 +44,7 @@
         --enable-foomatic-drv-install \
         --disable-foomatic-ppd-install \
         --disable-foomatic-rip-hplip-install \
+        --disable-imageProcessor_build \
         --with-cupsbackenddir=${libexecdir}/cups/backend \
         --with-cupsfilterdir=${libexecdir}/cups/filter \
 "
diff --git a/meta-openembedded/meta-oe/recipes-extended/jansson/jansson_2.14.bb b/meta-openembedded/meta-oe/recipes-extended/jansson/jansson_2.14.bb
index 945b31f..94bb99a 100644
--- a/meta-openembedded/meta-oe/recipes-extended/jansson/jansson_2.14.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/jansson/jansson_2.14.bb
@@ -10,6 +10,6 @@
 UPSTREAM_CHECK_URI = "https://github.com/akheron/${BPN}/releases"
 UPSTREAM_CHECK_REGEX = "${BPN}-(?P<pver>\d+(\.\d+)+)\.tar"
 
-inherit autotools pkgconfig
+inherit cmake pkgconfig
 
 BBCLASSEXTEND = "native"
diff --git a/meta-openembedded/meta-oe/recipes-extended/libblockdev/files/0001-lvm-Do-not-include-duplicate-entries-in-bd_lvm_lvs-o.patch b/meta-openembedded/meta-oe/recipes-extended/libblockdev/files/0001-lvm-Do-not-include-duplicate-entries-in-bd_lvm_lvs-o.patch
deleted file mode 100644
index e608358..0000000
--- a/meta-openembedded/meta-oe/recipes-extended/libblockdev/files/0001-lvm-Do-not-include-duplicate-entries-in-bd_lvm_lvs-o.patch
+++ /dev/null
@@ -1,100 +0,0 @@
-From d10fb2c0ee60c97f4dfeab4506a347c26cb389df Mon Sep 17 00:00:00 2001
-From: Vojtech Trefny <vtrefny@redhat.com>
-Date: Tue, 7 Dec 2021 15:50:45 +0800
-Subject: [PATCH] lvm: Do not include duplicate entries in bd_lvm_lvs output
-
-We use "-o segtypes" for the "lvs" command which means multisegment
-LVs will be twice in the output.
-
-Signed-off-by: Vojtech Trefny <vtrefny@redhat.com>
-
-Upstream-Status: Backport [https://github.com/storaged-project/libblockdev/pull/671]
-Signed-off-by: Hongxu Jia <hongxu.jia@windriver.com>
----
- src/plugins/lvm.c | 17 +++++++++++++++--
- tests/lvm_test.py | 41 +++++++++++++++++++++++++++++++++++++++++
- 2 files changed, 56 insertions(+), 2 deletions(-)
-
-diff --git a/src/plugins/lvm.c b/src/plugins/lvm.c
-index 2be1dbd..acd5b84 100644
---- a/src/plugins/lvm.c
-+++ b/src/plugins/lvm.c
-@@ -1810,8 +1810,21 @@ BDLVMLVdata** bd_lvm_lvs (const gchar *vg_name, GError **error) {
-         if (table && (num_items == 15)) {
-             /* valid line, try to parse and record it */
-             lvdata = get_lv_data_from_table (table, TRUE);
--            if (lvdata)
--                g_ptr_array_add (lvs, lvdata);
-+            if (lvdata) {
-+                /* ignore duplicate entries in lvs output, these are caused by multi segments LVs */
-+                for (gsize i = 0; i < lvs->len; i++) {
-+                    if (g_strcmp0 (((BDLVMLVdata *) g_ptr_array_index (lvs, i))->lv_name, lvdata->lv_name) == 0) {
-+                        g_debug("Duplicate LV entry for '%s' found in lvs output",
-+                                  lvdata->lv_name);
-+                        bd_lvm_lvdata_free (lvdata);
-+                        lvdata = NULL;
-+                        break;
-+                    }
-+                }
-+
-+                if (lvdata)
-+                    g_ptr_array_add (lvs, lvdata);
-+            }
-         } else
-             if (table)
-                 g_hash_table_destroy (table);
-diff --git a/tests/lvm_test.py b/tests/lvm_test.py
-index eb94c91..ab0de21 100644
---- a/tests/lvm_test.py
-+++ b/tests/lvm_test.py
-@@ -915,6 +915,47 @@ class LvmTestLVs(LvmPVVGLVTestCase):
-         lvs = BlockDev.lvm_lvs("testVG")
-         self.assertEqual(len(lvs), 1)
- 
-+class LvmTestLVsMultiSegment(LvmPVVGLVTestCase):
-+    def _clean_up(self):
-+        try:
-+            BlockDev.lvm_lvremove("testVG", "testLV2", True, None)
-+        except:
-+            pass
-+
-+        LvmPVVGLVTestCase._clean_up(self)
-+
-+    def test_lvs(self):
-+        """Verify that it's possible to gather info about LVs"""
-+
-+        succ = BlockDev.lvm_pvcreate(self.loop_dev, 0, 0, None)
-+        self.assertTrue(succ)
-+
-+        succ = BlockDev.lvm_vgcreate("testVG", [self.loop_dev], 0, None)
-+        self.assertTrue(succ)
-+
-+        succ = BlockDev.lvm_lvcreate("testVG", "testLV", 10 * 1024**2)
-+        self.assertTrue(succ)
-+
-+        lvs = BlockDev.lvm_lvs("testVG")
-+        self.assertEqual(len(lvs), 1)
-+        self.assertListEqual([lv.lv_name for lv in lvs], ["testLV"])
-+
-+        # add second LV
-+        succ = BlockDev.lvm_lvcreate("testVG", "testLV2", 10 * 1024**2)
-+        self.assertTrue(succ)
-+
-+        lvs = BlockDev.lvm_lvs("testVG")
-+        self.assertEqual(len(lvs), 2)
-+        self.assertListEqual([lv.lv_name for lv in lvs], ["testLV", "testLV2"])
-+
-+        # by resizing the first LV we will create two segments
-+        succ = BlockDev.lvm_lvresize("testVG", "testLV", 20 * 1024**2, None)
-+        self.assertTrue(succ)
-+
-+        lvs = BlockDev.lvm_lvs("testVG")
-+        self.assertEqual(len(lvs), 2)
-+        self.assertListEqual([lv.lv_name for lv in lvs], ["testLV", "testLV2"])
-+
- class LvmPVVGthpoolTestCase(LvmPVVGTestCase):
-     def _clean_up(self):
-         try:
--- 
-2.27.0
-
diff --git a/meta-openembedded/meta-oe/recipes-extended/libblockdev/libblockdev_2.26.bb b/meta-openembedded/meta-oe/recipes-extended/libblockdev/libblockdev_2.28.bb
similarity index 93%
rename from meta-openembedded/meta-oe/recipes-extended/libblockdev/libblockdev_2.26.bb
rename to meta-openembedded/meta-oe/recipes-extended/libblockdev/libblockdev_2.28.bb
index 3e6df56..44b4e7d 100644
--- a/meta-openembedded/meta-oe/recipes-extended/libblockdev/libblockdev_2.26.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/libblockdev/libblockdev_2.28.bb
@@ -11,9 +11,8 @@
 inherit autotools gobject-introspection pkgconfig
 
 SRC_URI = "git://github.com/storaged-project/libblockdev;branch=2.x-branch;protocol=https \
-           file://0001-lvm-Do-not-include-duplicate-entries-in-bd_lvm_lvs-o.patch \
 "
-SRCREV = "47ff12242c89e36a33259d18b7068b26c3bb1c64"
+SRCREV = "1412dc51c8f76bf8d9a6008228737db4a9a26d69"
 S = "${WORKDIR}/git"
 
 FILES:${PN} += "${libdir}/python2.7/dist-packages ${libdir}/python3.*/site-packages"
diff --git a/meta-openembedded/meta-oe/recipes-extended/libcec/libcec_6.0.2.bb b/meta-openembedded/meta-oe/recipes-extended/libcec/libcec_6.0.2.bb
index cd58689..599416c 100644
--- a/meta-openembedded/meta-oe/recipes-extended/libcec/libcec_6.0.2.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/libcec/libcec_6.0.2.bb
@@ -29,6 +29,9 @@
 PACKAGE_BEFORE_PN += "${PN}-examples-python ${PN}-examples"
 FILES:${PN}-examples-python = "${bindir}/py*"
 FILES:${PN}-examples = "${bindir}"
+# cec-client doesn't link with libcec, but uses LibCecInitialise to dlopen libcec, so do_package
+# cannot add the runtime dependency automatically
+RDEPENDS:${PN}-examples = "${PN}"
 RDEPENDS:${PN}-examples-python = "python3-${BPN} python3-core"
 
 # Create the wrapper for python3
diff --git a/meta-openembedded/meta-oe/recipes-extended/libgxim/libgxim_0.5.0.bb b/meta-openembedded/meta-oe/recipes-extended/libgxim/libgxim_0.5.0.bb
index 80ab730..2d93936 100644
--- a/meta-openembedded/meta-oe/recipes-extended/libgxim/libgxim_0.5.0.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/libgxim/libgxim_0.5.0.bb
@@ -23,7 +23,7 @@
 LIC_FILES_CHKSUM = "\
 file://COPYING;md5=2d5025d4aa3495befef8f17206a5b0a1"
 
-EXTRA_OECONF = " --disable-static --disable-rebuilds --enable-compile-warnings=minimum"
+EXTRA_OECONF = "--enable-debug --disable-static --disable-rebuilds --enable-compile-warnings=minimum"
 DEPENDS += "gtk+ glib-2.0 glib-2.0-native ruby-native intltool-native gnome-common-native"
 
 inherit features_check autotools pkgconfig gettext
diff --git a/meta-openembedded/meta-oe/recipes-extended/libpwquality/files/0001-fix-musl-build.patch b/meta-openembedded/meta-oe/recipes-extended/libpwquality/files/0001-fix-musl-build.patch
new file mode 100644
index 0000000..2cbdd6c
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/libpwquality/files/0001-fix-musl-build.patch
@@ -0,0 +1,72 @@
+Do not use fgetpwent_r
+
+fgetpwent_r does not exist on musl
+
+Source: https://git.alpinelinux.org/aports/tree/community/libpwquality/0001-fix-musl-build.patch
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+--- a/src/pam_pwquality.c
++++ b/src/pam_pwquality.c
+@@ -19,6 +19,7 @@
+ #include <stdio.h>
+ #include <pwd.h>
+ #include <errno.h>
++#include <security/pam_modutil.h>
+ #include "pwquality.h"
+ 
+ /*
+@@ -43,8 +44,6 @@ struct module_options {
+ 
+ #define CO_RETRY_TIMES  1
+ 
+-#define PATH_PASSWD "/etc/passwd"
+-
+ static int
+ _pam_parse (pam_handle_t *pamh, struct module_options *opt,
+             int argc, const char **argv)
+@@ -98,44 +97,7 @@ static int
+ check_local_user (pam_handle_t *pamh,
+                   const char *user)
+ {
+-        struct passwd pw, *pwp;
+-        char buf[4096];
+-        int found = 0;
+-        FILE *fp;
+-        int errn;
+-
+-        fp = fopen(PATH_PASSWD, "r");
+-        if (fp == NULL) {
+-                pam_syslog(pamh, LOG_ERR, "unable to open %s: %s",
+-                           PATH_PASSWD, pam_strerror(pamh, errno));
+-                return -1;
+-        }
+-
+-        for (;;) {
+-                errn = fgetpwent_r(fp, &pw, buf, sizeof (buf), &pwp);
+-                if (errn == ERANGE) {
+-                        pam_syslog(pamh, LOG_WARNING, "%s contains very long lines; corrupted?",
+-                                   PATH_PASSWD);
+-                        /* we can continue here as next call will read further */
+-                        continue;
+-                }
+-                if (errn != 0)
+-                        break;
+-                if (strcmp(pwp->pw_name, user) == 0) {
+-                        found = 1;
+-                        break;
+-                }
+-        }
+-
+-        fclose (fp);
+-
+-        if (errn != 0 && errn != ENOENT) {
+-                pam_syslog(pamh, LOG_ERR, "unable to enumerate local accounts: %s",
+-                           pam_strerror(pamh, errn));
+-                return -1;
+-        } else {
+-                return found;
+-        }
++	return pam_modutil_check_user_in_passwd(pamh, user, NULL) == PAM_SUCCESS;
+ }
+ 
+ PAM_EXTERN int
diff --git a/meta-openembedded/meta-oe/recipes-extended/libpwquality/libpwquality_1.4.4.bb b/meta-openembedded/meta-oe/recipes-extended/libpwquality/libpwquality_1.4.4.bb
index f892cc6..a6887b0 100644
--- a/meta-openembedded/meta-oe/recipes-extended/libpwquality/libpwquality_1.4.4.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/libpwquality/libpwquality_1.4.4.bb
@@ -8,6 +8,7 @@
 SRC_URI = "https://github.com/${SRCNAME}/${SRCNAME}/releases/download/${SRCNAME}-${PV}/${SRCNAME}-${PV}.tar.bz2 \
            file://add-missing-python-include-dir-for-cross.patch \
 "
+SRC_URI:append:libc-musl = " file://0001-fix-musl-build.patch "
 
 SRC_URI[md5sum] = "1fe43f6641dbf1e1766e2a02cf68a9c3"
 SRC_URI[sha256sum] = "d43baf23dc6887fe8f8e9b75cabaabc5f4bbbaa0f9eff44278d276141752a545"
diff --git a/meta-openembedded/meta-oe/recipes-extended/libuio/libuio/0001-include-fcntl.h-for-O_RDWR-define.patch b/meta-openembedded/meta-oe/recipes-extended/libuio/libuio/0001-include-fcntl.h-for-O_RDWR-define.patch
deleted file mode 100644
index 3cae5c9..0000000
--- a/meta-openembedded/meta-oe/recipes-extended/libuio/libuio/0001-include-fcntl.h-for-O_RDWR-define.patch
+++ /dev/null
@@ -1,38 +0,0 @@
-From d39c78d322585a32f9a55c67c25a99602ce08b12 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sat, 1 Apr 2017 09:01:33 -0700
-Subject: [PATCH] include fcntl.h for O_RDWR define
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- tools/lsuio.c | 1 +
- tools/rwuio.c | 1 +
- 2 files changed, 2 insertions(+)
-
-diff --git a/tools/lsuio.c b/tools/lsuio.c
-index 58eb922..4769446 100644
---- a/tools/lsuio.c
-+++ b/tools/lsuio.c
-@@ -19,6 +19,7 @@
- #include <stdio.h>
- #include <sys/types.h>
- #include <getopt.h>
-+#include <fcntl.h>
- 
- #include "system.h"
- #include "uio_helper.h"
-diff --git a/tools/rwuio.c b/tools/rwuio.c
-index aef9e90..ebc71e1 100644
---- a/tools/rwuio.c
-+++ b/tools/rwuio.c
-@@ -20,6 +20,7 @@
- */
- #include <stdio.h>
- #include <stdlib.h>
-+#include <fcntl.h>
- #include <sys/types.h>
- #include <getopt.h>
- 
--- 
-2.12.1
-
diff --git a/meta-openembedded/meta-oe/recipes-extended/libuio/libuio/replace_inline_with_static-inline.patch b/meta-openembedded/meta-oe/recipes-extended/libuio/libuio/replace_inline_with_static-inline.patch
deleted file mode 100644
index fd314bf..0000000
--- a/meta-openembedded/meta-oe/recipes-extended/libuio/libuio/replace_inline_with_static-inline.patch
+++ /dev/null
@@ -1,121 +0,0 @@
-By default, gcc-5 uses C99 inline semantics, this semantics doesn't 
-generate externally visible function for inline functions. This results in 
-below error, when an another translation unit (TU) tries to link with the 
-inline function,
-
--- snip --
-| lsuio.o: In function `main':
-| <...>/libuio/0.2.1-r0/git/tools/lsuio.c:85: undefined reference to `uio_mmap'
-| collect2: error: ld returned 1 exit status
-| make[2]: *** [lsuio] Error 1
--- CUT --
-
-To solve this error and make libuio to compile with both 4.x and 5.x,
-
-1. We can remove 'uio_mmap' inline function definition in uio_mmap.c, and move
-   that definition into uio_helper.h file (which is included by lsuio.c) and 
-   replace inline with "static inline". Similarly it can be done to other 
-   uio_single_munmap and uio_munmap inline functions
-
-2. Add 'extern' keyword in front of inline functions declaration, to make 
-   inlined function as externally visible function, and to link with other TUs.
-
-Going with option 1.
-
-Upstream-Status: Pending
-
-Signed-off-by: Jagadeesh Krishnanjanappa <jkrishnanjanappa@mvista.com>
-
---- git_org/src/uio_helper.h	2015-10-20 02:37:04.183075855 -0400
-+++ git/src/uio_helper.h	2015-10-20 03:34:24.659970136 -0400
-@@ -61,11 +61,11 @@ struct uio_info_t {
- 
- /* function prototypes */
- 
--inline char* uio_lib_name(void);
--inline char* uio_lib_version(void);
--inline int uio_lib_ifcurrent(void);
--inline int uio_lib_ifrevision(void);
--inline int uio_lib_ifage(void);
-+static inline char* uio_lib_name(void);
-+static inline char* uio_lib_version(void);
-+static inline int uio_lib_ifcurrent(void);
-+static inline int uio_lib_ifrevision(void);
-+static inline int uio_lib_ifage(void);
- 
- int uio_get_mem_size(struct uio_info_t* info, int map_num);
- int uio_get_mem_addr(struct uio_info_t* info, int map_num);
-@@ -76,10 +76,30 @@ int uio_get_all_info(struct uio_info_t* 
- int uio_get_device_attributes(struct uio_info_t* info);
- 
- void* uio_single_mmap(struct uio_info_t* info, int map_num, int fd);
--inline void uio_mmap(struct uio_info_t* info, int fd);
- 
--inline void uio_single_munmap(struct uio_info_t* info, int map_num);
--inline void uio_munmap(struct uio_info_t* info);
-+static inline void uio_mmap(struct uio_info_t* info, int fd);
-+static inline void uio_mmap(struct uio_info_t* info, int fd)
-+{
-+        int map_num;
-+        if (!fd) return;
-+        for (map_num= 0; map_num < MAX_UIO_MAPS; map_num++)
-+                uio_single_mmap(info, map_num, fd);
-+}
-+
-+static inline void uio_single_munmap(struct uio_info_t* info, int map_num);
-+static inline void uio_single_munmap(struct uio_info_t* info, int map_num)
-+{
-+        munmap(info->maps[map_num].internal_addr, info->maps[map_num].size);
-+        info->maps[map_num].mmap_result = UIO_MMAP_NOT_DONE;
-+}
-+
-+static inline void uio_munmap(struct uio_info_t* info);
-+static inline void uio_munmap(struct uio_info_t* info)
-+{
-+        int i;
-+        for (i = 0; i < MAX_UIO_MAPS; i++)
-+                uio_single_munmap(info, i);
-+}
- 
- void uio_free_dev_attrs(struct uio_info_t* info);
- void uio_free_info(struct uio_info_t* info);
---- git_org/src/uio_mmap.c	2015-10-20 02:37:04.183075855 -0400
-+++ git/src/uio_mmap.c	2015-10-20 03:34:45.060003208 -0400
-@@ -22,11 +22,3 @@
- #include <stdlib.h>
- 
- #include "uio_helper.h"
--
--inline void uio_mmap(struct uio_info_t* info, int fd)
--{
--	int map_num;
--	if (!fd) return;
--	for (map_num= 0; map_num < MAX_UIO_MAPS; map_num++)
--		uio_single_mmap(info, map_num, fd);
--}
---- git_org/src/uio_munmap.c	2015-10-20 02:37:04.183075855 -0400
-+++ git/src/uio_munmap.c	2015-10-20 03:34:59.636026835 -0400
-@@ -22,10 +22,3 @@
- #include <stdlib.h>
- 
- #include "uio_helper.h"
--
--inline void uio_munmap(struct uio_info_t* info)
--{
--	int i;
--	for (i = 0; i < MAX_UIO_MAPS; i++)
--		uio_single_munmap(info, i);
--}
-\ No newline at end of file
---- git_org/src/uio_single_munmap.c	2015-10-20 02:37:04.183075855 -0400
-+++ git/src/uio_single_munmap.c	2015-10-20 03:52:55.005763023 -0400
-@@ -24,9 +24,3 @@
- #include <sys/mman.h>
- 
- #include "uio_helper.h"
--
--inline void uio_single_munmap(struct uio_info_t* info, int map_num)
--{
--	munmap(info->maps[map_num].internal_addr, info->maps[map_num].size);
--	info->maps[map_num].mmap_result = UIO_MMAP_NOT_DONE;
--}
diff --git a/meta-openembedded/meta-oe/recipes-extended/libuio/libuio_0.2.1.bb b/meta-openembedded/meta-oe/recipes-extended/libuio/libuio_0.2.1.bb
index e22b25a..65bf9ba 100644
--- a/meta-openembedded/meta-oe/recipes-extended/libuio/libuio_0.2.1.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/libuio/libuio_0.2.1.bb
@@ -3,17 +3,14 @@
 LICENSE = "GPL-2.0-only"
 LIC_FILES_CHKSUM = "file://COPYING;md5=393a5ca445f6965873eca0259a17f833"
 
-SRC_URI = "git://git.code.sf.net/p/libuio/code;branch=master \
-           file://replace_inline_with_static-inline.patch \
-           file://0001-include-fcntl.h-for-O_RDWR-define.patch \
-           "
+SRCREV = "17d96e8f9a5bce7cee5e2222855ab46a246dba51"
+
+SRC_URI = "git://git.code.sf.net/p/libuio/code;branch=master;protocol=https"
+
+PV .= "+0.2.2+git${SRCPV}"
 
 inherit autotools
 
-SRCREV = "ed4f07ea147ac403c28105ab44d01bbf524d36f9"
-
-PV .= "+git${SRCPV}"
-
 S = "${WORKDIR}/git"
 
 PACKAGES += "${PN}-tools"
diff --git a/meta-openembedded/meta-oe/recipes-extended/libx86-1/libx86-1.1/0001-Fix-type-of-the-void-pointer-assignment.patch b/meta-openembedded/meta-oe/recipes-extended/libx86-1/libx86-1.1/0001-Fix-type-of-the-void-pointer-assignment.patch
new file mode 100644
index 0000000..ea3c306
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/libx86-1/libx86-1.1/0001-Fix-type-of-the-void-pointer-assignment.patch
@@ -0,0 +1,33 @@
+From c782e208021409e9b78acb2200abd4319072e78a Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 00:28:05 -0700
+Subject: [PATCH] Fix type of the void pointer assignment
+
+Fixes build with clang
+
+x86-common.c:216:9: error: incompatible integer to pointer conversion assigning to 'void *' from 'long' [-Wint-conversion]
+        offset = mem_info.offset - REAL_MEM_BASE;
+               ^ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ x86-common.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/x86-common.c b/x86-common.c
+index 137bc3c..6f737ed 100644
+--- a/x86-common.c
++++ b/x86-common.c
+@@ -213,7 +213,7 @@ void *LRMI_common_init(int high_page)
+ 	if (!real_mem_init(high_page))
+ 		return NULL;
+ 
+-	offset = mem_info.offset - REAL_MEM_BASE;
++	offset = (void*)(mem_info.offset - REAL_MEM_BASE);
+ 
+ 	/*
+ 	 Map the Interrupt Vectors (0x0 - 0x400) + BIOS data (0x400 - 0x502)
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/libx86-1/libx86-1_1.1.bb b/meta-openembedded/meta-oe/recipes-extended/libx86-1/libx86-1_1.1.bb
index d303147..9dc91f3 100644
--- a/meta-openembedded/meta-oe/recipes-extended/libx86-1/libx86-1_1.1.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/libx86-1/libx86-1_1.1.bb
@@ -11,6 +11,7 @@
            file://libx86-mmap-offset.patch \
            file://0001-assume-zero-is-valid-address.patch \
            file://makefile-add-ldflags.patch \
+           file://0001-Fix-type-of-the-void-pointer-assignment.patch \
 "
 
 SRC_URI[md5sum] = "41bee1f8e22b82d82b5f7d7ba51abc2a"
diff --git a/meta-openembedded/meta-oe/recipes-extended/minifi-cpp/files/0001-civetweb-Disable-lto.patch b/meta-openembedded/meta-oe/recipes-extended/minifi-cpp/files/0001-civetweb-Disable-lto.patch
new file mode 100644
index 0000000..ec4e6ff
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/minifi-cpp/files/0001-civetweb-Disable-lto.patch
@@ -0,0 +1,33 @@
+From 120c708d51f72ade4a31d3d8f35bcfad7b12e723 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 6 Sep 2022 20:38:03 -0700
+Subject: [PATCH] civetweb: Disable lto
+
+lto does not work everywhere, therefore disable it atleast in the cmake
+file, we can still enable it via environment
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ thirdparty/civetweb-1.10/CMakeLists.txt | 4 ----
+ 1 file changed, 4 deletions(-)
+
+--- a/thirdparty/civetweb-1.10/CMakeLists.txt
++++ b/thirdparty/civetweb-1.10/CMakeLists.txt
+@@ -298,7 +298,6 @@ add_c_compiler_flag(/WX)
+ add_c_compiler_flag(-pedantic-errors)
+ add_c_compiler_flag(-fvisibility=hidden)
+ add_c_compiler_flag(-fstack-protector-strong RELEASE)
+-add_c_compiler_flag(-flto RELEASE)
+ 
+ add_c_compiler_flag(-fstack-protector-all DEBUG)
+ if (MINGW)
+@@ -361,7 +360,6 @@ if (CIVETWEB_ENABLE_CXX)
+   add_cxx_compiler_flag(-pedantic-errors)
+   add_cxx_compiler_flag(-fvisibility=hidden)
+   add_cxx_compiler_flag(-fstack-protector-strong RELEASE)
+-  add_cxx_compiler_flag(-flto RELEASE)
+ 
+   add_cxx_compiler_flag(-fstack-protector-all DEBUG)
+   if (MINGW)
diff --git a/meta-openembedded/meta-oe/recipes-extended/minifi-cpp/minifi-cpp_0.7.0.bb b/meta-openembedded/meta-oe/recipes-extended/minifi-cpp/minifi-cpp_0.7.0.bb
index 671d320..b843f72 100644
--- a/meta-openembedded/meta-oe/recipes-extended/minifi-cpp/minifi-cpp_0.7.0.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/minifi-cpp/minifi-cpp_0.7.0.bb
@@ -30,6 +30,7 @@
             file://0001-civetweb-CMakeLists.txt-do-not-search-gcc-ar-and-gcc.patch \
             file://0001-cxxopts-Add-limits-header.patch \
             file://0001-Fix-build-with-libc.patch \
+            file://0001-civetweb-Disable-lto.patch \
             file://minifi.service \
             file://systemd-volatile.conf \
             file://sysvinit-volatile.conf \
@@ -57,6 +58,7 @@
     -DSKIP_TESTS=ON \
     -DGCC_AR=${STAGING_BINDIR_TOOLCHAIN}/${AR} \
     -DGCC_RANLIB=${STAGING_BINDIR_TOOLCHAIN}/${RANLIB} \
+    -DDISABLE_PYTHON_SCRIPTING=ON \
     "
 EXTRA_OECMAKE:append:toolchain-clang = " -DCMAKE_RANLIB=${STAGING_BINDIR_TOOLCHAIN}/${TARGET_PREFIX}llvm-ranlib"
 LDFLAGS:append:toolchain-clang = " -fuse-ld=lld"
@@ -79,6 +81,14 @@
 do_install[cleandirs] += "${WORKDIR}/minifi-install"
 PSEUDO_CONSIDER_PATHS .= ",${WORKDIR}/minifi-install"
 
+do_configure:prepend:libc-musl() {
+    sed -i -e 's/-DHAVE_GLIBC_STRERROR_R=1/-DHAVE_GLIBC_STRERROR_R=0/' ${S}/CMakeLists.txt
+    sed -i -e 's/-DHAVE_POSIX_STRERROR_R=0/-DHAVE_POSIX_STRERROR_R=1/' ${S}/CMakeLists.txt
+}
+
+CFLAGS:append:libc-glibc = " -D_GNU_SOURCE"
+CXXFLAGS:append:libc-glibc = " -D_GNU_SOURCE"
+
 do_install() {
     DESTDIR='${WORKDIR}/minifi-install' cmake_runcmake_build --target ${OECMAKE_TARGET_INSTALL}
     MINIFI_BIN=${bindir}
diff --git a/meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91/py-3.11.patch b/meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91/py-3.11.patch
new file mode 100644
index 0000000..71da822
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91/py-3.11.patch
@@ -0,0 +1,211 @@
+
+# HG changeset patch
+# User ahochheiden <ahochheiden@mozilla.com>
+# Date 1654151264 0
+# Node ID f54162b2c1f2fe52c6137ab2c3469a1944f58b27
+# Parent  6e7776492240c27732840d65a33dcc440fa1aba0
+Bug 1769631 - Remove 'U' from 'mode' parameters for various 'open' calls to ensure Python3.11 compatibility r=firefox-build-system-reviewers,glandium
+
+The 'U' flag represents "universal newline". It has been deprecated
+since Python3.3. Since then "universal newline" is the default when a
+file is opened in text mode (not bytes). In Python3.11 using the 'U'
+flag throws errors. There should be no harm in removing 'U' from 'open'
+everywhere it is used, and doing allows the use of Python3.11.
+
+For more reading see: https://docs.python.org/3.11/whatsnew/3.11.html#changes-in-the-python-api
+
+Differential Revision: https://phabricator.services.mozilla.com/D147721
+
+Upstream-Status: Backport [https://hg.mozilla.org/mozilla-central/rev/f54162b2c1f2fe52c6137ab2c3469a1944f58b27]
+Signed-off-by: Alexander Kanavin <alex@linutronix.de>
+
+diff --git a/dom/base/usecounters.py b/dom/base/usecounters.py
+--- a/dom/base/usecounters.py
++++ b/dom/base/usecounters.py
+@@ -3,17 +3,17 @@
+ # file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ 
+ import collections
+ import re
+ 
+ 
+ def read_conf(conf_filename):
+     # Can't read/write from a single StringIO, so make a new one for reading.
+-    stream = open(conf_filename, "rU")
++    stream = open(conf_filename, "r")
+ 
+     def parse_counters(stream):
+         for line_num, line in enumerate(stream):
+             line = line.rstrip("\n")
+             if not line or line.startswith("//"):
+                 # empty line or comment
+                 continue
+             m = re.match(r"method ([A-Za-z0-9]+)\.([A-Za-z0-9]+)$", line)
+diff --git a/python/mozbuild/mozbuild/action/process_define_files.py b/python/mozbuild/mozbuild/action/process_define_files.py
+--- a/python/mozbuild/mozbuild/action/process_define_files.py
++++ b/python/mozbuild/mozbuild/action/process_define_files.py
+@@ -31,17 +31,17 @@ def process_define_file(output, input):
+ 
+     config = PartialConfigEnvironment(topobjdir)
+ 
+     if mozpath.basedir(
+         path, [mozpath.join(topsrcdir, "js/src")]
+     ) and not config.substs.get("JS_STANDALONE"):
+         config = PartialConfigEnvironment(mozpath.join(topobjdir, "js", "src"))
+ 
+-    with open(path, "rU") as input:
++    with open(path, "r") as input:
+         r = re.compile(
+             "^\s*#\s*(?P<cmd>[a-z]+)(?:\s+(?P<name>\S+)(?:\s+(?P<value>\S+))?)?", re.U
+         )
+         for l in input:
+             m = r.match(l)
+             if m:
+                 cmd = m.group("cmd")
+                 name = m.group("name")
+diff --git a/python/mozbuild/mozbuild/backend/base.py b/python/mozbuild/mozbuild/backend/base.py
+--- a/python/mozbuild/mozbuild/backend/base.py
++++ b/python/mozbuild/mozbuild/backend/base.py
+@@ -267,17 +267,17 @@ class BuildBackend(LoggingMixin):
+         If an exception is raised, |mach build| will fail with a
+         non-zero exit code.
+         """
+         self._write_purgecaches(config)
+ 
+         return status
+ 
+     @contextmanager
+-    def _write_file(self, path=None, fh=None, readmode="rU"):
++    def _write_file(self, path=None, fh=None, readmode="r"):
+         """Context manager to write a file.
+ 
+         This is a glorified wrapper around FileAvoidWrite with integration to
+         update the summary data on this instance.
+ 
+         Example usage:
+ 
+             with self._write_file('foo.txt') as fh:
+diff --git a/python/mozbuild/mozbuild/preprocessor.py b/python/mozbuild/mozbuild/preprocessor.py
+--- a/python/mozbuild/mozbuild/preprocessor.py
++++ b/python/mozbuild/mozbuild/preprocessor.py
+@@ -526,17 +526,17 @@ class Preprocessor:
+             if not options.output:
+                 raise Preprocessor.Error(
+                     self, "--depend doesn't work with stdout", None
+                 )
+             depfile = get_output_file(options.depend)
+ 
+         if args:
+             for f in args:
+-                with io.open(f, "rU", encoding="utf-8") as input:
++                with io.open(f, "r", encoding="utf-8") as input:
+                     self.processFile(input=input, output=out)
+             if depfile:
+                 mk = Makefile()
+                 mk.create_rule([six.ensure_text(options.output)]).add_dependencies(
+                     self.includes
+                 )
+                 mk.dump(depfile)
+                 depfile.close()
+@@ -855,17 +855,17 @@ class Preprocessor:
+         self.checkLineNumbers = False
+         if isName:
+             try:
+                 args = _to_text(args)
+                 if filters:
+                     args = self.applyFilters(args)
+                 if not os.path.isabs(args):
+                     args = os.path.join(self.curdir, args)
+-                args = io.open(args, "rU", encoding="utf-8")
++                args = io.open(args, "r", encoding="utf-8")
+             except Preprocessor.Error:
+                 raise
+             except Exception:
+                 raise Preprocessor.Error(self, "FILE_NOT_FOUND", _to_text(args))
+         self.checkLineNumbers = bool(
+             re.search("\.(js|jsm|java|webidl)(?:\.in)?$", args.name)
+         )
+         oldFile = self.context["FILE"]
+@@ -909,17 +909,17 @@ class Preprocessor:
+ 
+     def do_error(self, args):
+         raise Preprocessor.Error(self, "Error: ", _to_text(args))
+ 
+ 
+ def preprocess(includes=[sys.stdin], defines={}, output=sys.stdout, marker="#"):
+     pp = Preprocessor(defines=defines, marker=marker)
+     for f in includes:
+-        with io.open(f, "rU", encoding="utf-8") as input:
++        with io.open(f, "r", encoding="utf-8") as input:
+             pp.processFile(input=input, output=output)
+     return pp.includes
+ 
+ 
+ # Keep this module independently executable.
+ if __name__ == "__main__":
+     pp = Preprocessor()
+     pp.handleCommandLine(None, True)
+diff --git a/python/mozbuild/mozbuild/util.py b/python/mozbuild/mozbuild/util.py
+--- a/python/mozbuild/mozbuild/util.py
++++ b/python/mozbuild/mozbuild/util.py
+@@ -231,17 +231,17 @@ class FileAvoidWrite(BytesIO):
+     enabled by default because it a) doesn't make sense for binary files b)
+     could add unwanted overhead to calls.
+ 
+     Additionally, there is dry run mode where the file is not actually written
+     out, but reports whether the file was existing and would have been updated
+     still occur, as well as diff capture if requested.
+     """
+ 
+-    def __init__(self, filename, capture_diff=False, dry_run=False, readmode="rU"):
++    def __init__(self, filename, capture_diff=False, dry_run=False, readmode="r"):
+         BytesIO.__init__(self)
+         self.name = filename
+         assert type(capture_diff) == bool
+         assert type(dry_run) == bool
+         assert "r" in readmode
+         self._capture_diff = capture_diff
+         self._write_to_file = not dry_run
+         self.diff = None
+diff --git a/python/mozbuild/mozpack/files.py b/python/mozbuild/mozpack/files.py
+--- a/python/mozbuild/mozpack/files.py
++++ b/python/mozbuild/mozpack/files.py
+@@ -549,17 +549,17 @@ class PreprocessedFile(BaseFile):
+         self.defines = defines
+         self.extra_depends = list(extra_depends or [])
+         self.silence_missing_directive_warnings = silence_missing_directive_warnings
+ 
+     def inputs(self):
+         pp = Preprocessor(defines=self.defines, marker=self.marker)
+         pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings)
+ 
+-        with _open(self.path, "rU") as input:
++        with _open(self.path, "r") as input:
+             with _open(os.devnull, "w") as output:
+                 pp.processFile(input=input, output=output)
+ 
+         # This always yields at least self.path.
+         return pp.includes
+ 
+     def copy(self, dest, skip_if_older=True):
+         """
+@@ -606,17 +606,17 @@ class PreprocessedFile(BaseFile):
+             return False
+ 
+         deps_out = None
+         if self.depfile:
+             deps_out = FileAvoidWrite(self.depfile)
+         pp = Preprocessor(defines=self.defines, marker=self.marker)
+         pp.setSilenceDirectiveWarnings(self.silence_missing_directive_warnings)
+ 
+-        with _open(self.path, "rU") as input:
++        with _open(self.path, "r") as input:
+             pp.processFile(input=input, output=dest, depfile=deps_out)
+ 
+         dest.close()
+         if self.depfile:
+             deps_out.close()
+ 
+         return True
+ 
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91_91.8.0.bb b/meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91_91.13.0.bb
similarity index 95%
rename from meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91_91.8.0.bb
rename to meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91_91.13.0.bb
index 8ade3bb..4c1aa34 100644
--- a/meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91_91.8.0.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/mozjs/mozjs-91_91.13.0.bb
@@ -15,8 +15,9 @@
            file://0006-Fix-build-on-powerpc.patch \
            file://0001-util.configure-fix-one-occasionally-reproduced-confi.patch \
            file://0001-rewrite-cargo-host-linker-in-python3.patch  \
+           file://py-3.11.patch \
            "
-SRC_URI[sha256sum] = "d483a853cbf5c7f93621093432e3dc0b7ed847f2a5318b964828d19f9f087f3a"
+SRC_URI[sha256sum] = "53be2bcde0b5ee3ec106bd8ba06b8ae95e7d489c484e881dfbe5360e4c920762"
 
 S = "${WORKDIR}/firefox-${@d.getVar("PV").replace("esr", "")}"
 
diff --git a/meta-openembedded/meta-oe/recipes-extended/nana/nana/0001-Include-stdlib.h-for-exit-and-abort-prototypes.patch b/meta-openembedded/meta-oe/recipes-extended/nana/nana/0001-Include-stdlib.h-for-exit-and-abort-prototypes.patch
new file mode 100644
index 0000000..f1712b1
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/nana/nana/0001-Include-stdlib.h-for-exit-and-abort-prototypes.patch
@@ -0,0 +1,52 @@
+From 0e32b1a07b1b032576c7b0a73d7f1a090a50dd23 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 10:40:52 -0700
+Subject: [PATCH] Include stdlib.h for exit and abort prototypes
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/I.c          | 1 +
+ src/nana_error.c | 1 +
+ src/nanafilter.c | 1 +
+ 3 files changed, 3 insertions(+)
+
+diff --git a/src/I.c b/src/I.c
+index e4577ae..4761af4 100644
+--- a/src/I.c
++++ b/src/I.c
+@@ -30,6 +30,7 @@
+ 
+ 
+ #include <stdio.h>
++#include <stdlib.h>
+ #include <I.h>
+ 
+ /*
+diff --git a/src/nana_error.c b/src/nana_error.c
+index 51f99f2..c3a67d2 100644
+--- a/src/nana_error.c
++++ b/src/nana_error.c
+@@ -30,6 +30,7 @@
+ 
+ #include <nana_error.h>
+ #include <stdio.h>
++#include <stdlib.h>
+ 
+ void nana_error(const char *message) {
+   fprintf(stderr, "nana_error: %s\n", message);
+diff --git a/src/nanafilter.c b/src/nanafilter.c
+index 191e8ef..1ab1978 100644
+--- a/src/nanafilter.c
++++ b/src/nanafilter.c
+@@ -33,6 +33,7 @@
+ static const char rcs[] ="Id: nanafilter.c,v 1.2 1998/06/10 06:58:55 pjm Exp ";
+ 
+ #include <stdio.h>
++#include <stdlib.h>
+ 
+ void do_input(void);
+ void do_string(void);
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/nana/nana_2.5.bb b/meta-openembedded/meta-oe/recipes-extended/nana/nana_2.5.bb
index e0b6f56..c109d4f 100644
--- a/meta-openembedded/meta-oe/recipes-extended/nana/nana_2.5.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/nana/nana_2.5.bb
@@ -7,6 +7,7 @@
 SRC_URI = "http://download.savannah.gnu.org/releases/${BPN}/${BP}.tar.gz \
     file://change-mandir-to-DESTDIR.patch \
     file://modify-acinclude.m4-and-configure.in.patch \
+    file://0001-Include-stdlib.h-for-exit-and-abort-prototypes.patch \
 "
 SRC_URI[md5sum] = "66c88aa0ad095b2e67673773135475f1"
 SRC_URI[sha256sum] = "fd1819ffea94b209513959447e4802afe2719600e7d161cd78b265a42812affa"
diff --git a/meta-openembedded/meta-oe/recipes-extended/pam/pam-plugin-ccreds/0001-configure-Check-for-function-from-libdb-during-confi.patch b/meta-openembedded/meta-oe/recipes-extended/pam/pam-plugin-ccreds/0001-configure-Check-for-function-from-libdb-during-confi.patch
new file mode 100644
index 0000000..c5a4235
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/pam/pam-plugin-ccreds/0001-configure-Check-for-function-from-libdb-during-confi.patch
@@ -0,0 +1,34 @@
+From 1915754179401b6ee00f0e2ffd844596778e43a2 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 12:05:02 -0700
+Subject: [PATCH] configure: Check for function from libdb during configure
+
+checking for main in AC_CHECK_LIB is not the right check to find out if
+a library exists or not, using a function provided by library is more
+appropriate and will help using newer compilers and autoconf in future
+
+Upstream-Status: Submitted [https://github.com/PADL/pam_ccreds/pull/2]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.in | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/configure.in b/configure.in
+index 3829d9f..3d52933 100644
+--- a/configure.in
++++ b/configure.in
+@@ -72,9 +72,9 @@ AC_CHECK_HEADERS(pam/pam_appl.h pam/pam_misc.h pam/pam_modules.h)
+ 
+ AC_CHECK_HEADERS(db.h)
+ 
+-AC_CHECK_LIB(db, main,[LIBS="-ldb $LIBS" found_db_lib=yes],,$LIBS)
++AC_CHECK_LIB(db, db_create,[LIBS="-ldb $LIBS" found_db_lib=yes],,$LIBS)
+ if test -z "$found_db_lib"; then
+-	AC_CHECK_LIB(db1, main,[LIBS="-ldb1 $LIBS" found_db_lib=yes],,$LIBS)
++	AC_CHECK_LIB(db1, db_create,[LIBS="-ldb1 $LIBS" found_db_lib=yes],,$LIBS)
+ fi
+ 
+ AC_CHECK_LIB(pam, pam_start)
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/pam/pam-plugin-ccreds_11.bb b/meta-openembedded/meta-oe/recipes-extended/pam/pam-plugin-ccreds_11.bb
index 7dd48b3..81d0746 100644
--- a/meta-openembedded/meta-oe/recipes-extended/pam/pam-plugin-ccreds_11.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/pam/pam-plugin-ccreds_11.bb
@@ -11,7 +11,9 @@
 
 SRCREV = "e2145df09469bf84878e4729b4ecd814efb797d1"
 
-SRC_URI = "git://github.com/PADL/pam_ccreds;branch=master;protocol=https"
+SRC_URI = "git://github.com/PADL/pam_ccreds;branch=master;protocol=https \
+           file://0001-configure-Check-for-function-from-libdb-during-confi.patch \
+           "
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-extended/pam/pam-ssh-agent-auth/0001-configure-Include-stdio.h-for-printf.patch b/meta-openembedded/meta-oe/recipes-extended/pam/pam-ssh-agent-auth/0001-configure-Include-stdio.h-for-printf.patch
new file mode 100644
index 0000000..c9e2760
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/pam/pam-ssh-agent-auth/0001-configure-Include-stdio.h-for-printf.patch
@@ -0,0 +1,37 @@
+From a0ae303fe0bcd81dfb1a649cc5e7a372d3bd878d Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 20:44:42 -0700
+Subject: [PATCH] configure: Include stdio.h for printf
+
+Fixes test for __progname
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+--- a/configure.ac
++++ b/configure.ac
+@@ -2791,7 +2791,9 @@ if test "x$ac_cv_have_control_in_msghdr"
+ fi
+ 
+ AC_CACHE_CHECK([if libc defines __progname], ac_cv_libc_defines___progname, [
+-	AC_TRY_LINK([],
++	AC_TRY_LINK([
++#include <stdio.h>
++],
+ 		[ extern char *__progname; printf("%s", __progname); ],
+ 		[ ac_cv_libc_defines___progname="yes" ],
+ 		[ ac_cv_libc_defines___progname="no" ]
+--- a/configure
++++ b/configure
+@@ -14838,7 +14838,7 @@ else
+ 
+ 	cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+ /* end confdefs.h.  */
+-
++#include <stdio.h>
+ int
+ main ()
+ {
diff --git a/meta-openembedded/meta-oe/recipes-extended/pam/pam-ssh-agent-auth_0.10.3.bb b/meta-openembedded/meta-oe/recipes-extended/pam/pam-ssh-agent-auth_0.10.3.bb
index 7065529..b5bcc63 100644
--- a/meta-openembedded/meta-oe/recipes-extended/pam/pam-ssh-agent-auth_0.10.3.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/pam/pam-ssh-agent-auth_0.10.3.bb
@@ -10,6 +10,7 @@
 SRC_URI = "http://sourceforge.net/projects/pamsshagentauth/files/pam_ssh_agent_auth/v${PV}/pam_ssh_agent_auth-${PV}.tar.bz2 \
            file://0001-Adapt-to-OpenSSL-1.1.1.patch \
            file://0002-Check-against-the-correct-OPENSSL_VERSION_NUMBER.patch \
+           file://0001-configure-Include-stdio.h-for-printf.patch \
            "
 SRC_URI[md5sum] = "8dbe90ab3625e545036333e6f51ccf1d"
 SRC_URI[sha256sum] = "3c53d358d6eaed1b211239df017c27c6f9970995d14102ae67bae16d4f47a763"
diff --git a/meta-openembedded/meta-oe/recipes-extended/polkit/polkit/0004-Make-netgroup-support-optional.patch b/meta-openembedded/meta-oe/recipes-extended/polkit/polkit/0004-Make-netgroup-support-optional.patch
index fa273d4..181aca1 100644
--- a/meta-openembedded/meta-oe/recipes-extended/polkit/polkit/0004-Make-netgroup-support-optional.patch
+++ b/meta-openembedded/meta-oe/recipes-extended/polkit/polkit/0004-Make-netgroup-support-optional.patch
@@ -1,4 +1,4 @@
-From 7ef2621ab7adcedc099ed39acfb73c6fa835cbc3 Mon Sep 17 00:00:00 2001
+From a334fac72112c01cd322f7c97ef7ca21457ab52f Mon Sep 17 00:00:00 2001
 From: "A. Wilcox" <AWilcox@Wilcox-Tech.com>
 Date: Sun, 15 May 2022 05:04:10 +0000
 Subject: [PATCH] Make netgroup support optional
@@ -37,7 +37,7 @@
  9 files changed, 43 insertions(+), 8 deletions(-)
 
 diff --git a/configure.ac b/configure.ac
-index 59858df..5a7fc11 100644
+index ca4b9f2..4c5d596 100644
 --- a/configure.ac
 +++ b/configure.ac
 @@ -100,7 +100,7 @@ AC_CHECK_LIB(expat,XML_ParserCreate,[EXPAT_LIBS="-lexpat"],
diff --git a/meta-openembedded/meta-oe/recipes-extended/redis/redis-7/redis.service b/meta-openembedded/meta-oe/recipes-extended/redis/redis-7/redis.service
index 36d2985..a52204c 100644
--- a/meta-openembedded/meta-oe/recipes-extended/redis/redis-7/redis.service
+++ b/meta-openembedded/meta-oe/recipes-extended/redis/redis-7/redis.service
@@ -9,6 +9,7 @@
 ExecStop=/usr/bin/redis-cli shutdown
 Restart=always
 LimitNOFILE=10032
+Type=notify
 
 [Install]
 WantedBy=multi-user.target
diff --git a/meta-openembedded/meta-oe/recipes-extended/redis/redis_7.0.4.bb b/meta-openembedded/meta-oe/recipes-extended/redis/redis_7.0.4.bb
index cde32e4..3516592 100644
--- a/meta-openembedded/meta-oe/recipes-extended/redis/redis_7.0.4.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/redis/redis_7.0.4.bb
@@ -35,7 +35,10 @@
 USERADD_PARAM:${PN}  = "--system --home-dir /var/lib/redis -g redis --shell /bin/false redis"
 GROUPADD_PARAM:${PN} = "--system redis"
 
-REDIS_ON_SYSTEMD = "${@bb.utils.contains('DISTRO_FEATURES', 'systemd', 'true', 'false', d)}"
+PACKAGECONFIG = "${@bb.utils.filter('DISTRO_FEATURES', 'systemd', d)}"
+PACKAGECONFIG[systemd] = "USE_SYSTEMD=yes,USE_SYSTEMD=no,systemd"
+
+EXTRA_OEMAKE += "${PACKAGECONFIG_CONFARGS}"
 
 do_compile:prepend() {
     (cd deps && oe_runmake hiredis lua linenoise)
@@ -55,8 +58,9 @@
     install -m 0644 ${WORKDIR}/redis.service ${D}${systemd_system_unitdir}
     sed -i 's!/usr/sbin/!${sbindir}/!g' ${D}${systemd_system_unitdir}/redis.service
 
-    if [ "${REDIS_ON_SYSTEMD}" = true ]; then
+    if ${@bb.utils.contains('DISTRO_FEATURES', 'systemd', 'true', 'false', d)}; then
         sed -i 's!daemonize yes!# daemonize yes!' ${D}/${sysconfdir}/redis/redis.conf
+        sed -i 's!supervised no!supervised systemd!' ${D}/${sysconfdir}/redis/redis.conf
     fi
 }
 
diff --git a/meta-openembedded/meta-oe/recipes-extended/rrdtool/rrdtool_1.8.0.bb b/meta-openembedded/meta-oe/recipes-extended/rrdtool/rrdtool_1.8.0.bb
index d953c11..961fe7c 100644
--- a/meta-openembedded/meta-oe/recipes-extended/rrdtool/rrdtool_1.8.0.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/rrdtool/rrdtool_1.8.0.bb
@@ -4,7 +4,7 @@
 LICENSE = "GPL-2.0-only"
 LIC_FILES_CHKSUM = "file://COPYRIGHT;md5=39df84cfd8a5e18bf988f277f7946676"
 
-DEPENDS = "libpng zlib cairo pango glib-2.0 libxml2 groff-native python3-setuptools-native"
+DEPENDS = "libpng zlib glib-2.0 libxml2 groff-native python3-setuptools-native"
 
 SRCREV = "3af04acd38bbc61bbdcdd931dcf234c971aa5336"
 PV = "1.8.0"
@@ -24,7 +24,7 @@
 
 EXTRA_AUTORECONF = "-I m4 --exclude=autopoint"
 
-PACKAGECONFIG ??= "perl ${@bb.utils.filter('DISTRO_FEATURES', 'systemd', d)}"
+PACKAGECONFIG ??= "perl graph ${@bb.utils.filter('DISTRO_FEATURES', 'systemd', d)}"
 
 PACKAGECONFIG[python] = "--enable-python=yes \
 am_cv_python_pythondir=${STAGING_LIBDIR}/python${PYTHON_BASEVERSION}/site-packages \
@@ -40,6 +40,8 @@
 
 PACKAGECONFIG[systemd] = "--with-systemdsystemunitdir=${systemd_unitdir}/system/,--without-systemdsystemunitdir,systemd,"
 
+PACKAGECONFIG[graph] = "--enable-rrd_graph,--disable-rrd_graph,pango cairo"
+
 EXTRA_OECONF = " \
     --enable-shared \
     --disable-libwrap \
diff --git a/meta-openembedded/meta-oe/recipes-extended/rsyslog/rsyslog_8.2206.0.bb b/meta-openembedded/meta-oe/recipes-extended/rsyslog/rsyslog_8.2208.0.bb
similarity index 98%
rename from meta-openembedded/meta-oe/recipes-extended/rsyslog/rsyslog_8.2206.0.bb
rename to meta-openembedded/meta-oe/recipes-extended/rsyslog/rsyslog_8.2208.0.bb
index a39de3a..5b30eca 100644
--- a/meta-openembedded/meta-oe/recipes-extended/rsyslog/rsyslog_8.2206.0.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/rsyslog/rsyslog_8.2208.0.bb
@@ -31,7 +31,7 @@
     file://0001-Include-sys-time-h.patch \
 "
 
-SRC_URI[sha256sum] = "a1377218b26c0767a7a3f67d166d5338af7c24b455d35ec99974e18e6845ba27"
+SRC_URI[sha256sum] = "14de68e7b8e5ab0c5d734f82e2dc9fff22cd7f4710ad690727eb10a7b9b3df5e"
 
 UPSTREAM_CHECK_URI = "https://github.com/rsyslog/rsyslog/releases"
 UPSTREAM_CHECK_REGEX = "(?P<pver>\d+(\.\d+)+)"
diff --git a/meta-openembedded/meta-oe/recipes-extended/sanlock/sanlock/0001-add-missing-system-header-string.h.patch b/meta-openembedded/meta-oe/recipes-extended/sanlock/sanlock/0001-add-missing-system-header-string.h.patch
new file mode 100644
index 0000000..5878847
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/sanlock/sanlock/0001-add-missing-system-header-string.h.patch
@@ -0,0 +1,41 @@
+From 272f9838f3495f5e419f77e000762c420754c96d Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 15:57:32 -0700
+Subject: [PATCH] add missing system header string.h
+
+This is needed for mem* function prototypes used in these sources
+
+Upstream-Status: Submitted [https://pagure.io/sanlock/issue/8]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/sanlock_sock.c | 1 +
+ wdmd/wdmd_sock.c   | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/src/sanlock_sock.c b/src/sanlock_sock.c
+index b83f9ae..e206f88 100644
+--- a/src/sanlock_sock.c
++++ b/src/sanlock_sock.c
+@@ -12,6 +12,7 @@
+ #include <stdio.h>
+ #include <stdint.h>
+ #include <stddef.h>
++#include <string.h>
+ #include <sys/socket.h>
+ #include <sys/un.h>
+ 
+diff --git a/wdmd/wdmd_sock.c b/wdmd/wdmd_sock.c
+index 45d9d9b..110ce9f 100644
+--- a/wdmd/wdmd_sock.c
++++ b/wdmd/wdmd_sock.c
+@@ -12,6 +12,7 @@
+ #include <stdio.h>
+ #include <stdint.h>
+ #include <stddef.h>
++#include <string.h>
+ #include <sys/socket.h>
+ #include <sys/un.h>
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/sanlock/sanlock_3.8.4.bb b/meta-openembedded/meta-oe/recipes-extended/sanlock/sanlock_3.8.4.bb
index a59a5c4..3b4ae31 100644
--- a/meta-openembedded/meta-oe/recipes-extended/sanlock/sanlock_3.8.4.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/sanlock/sanlock_3.8.4.bb
@@ -16,6 +16,7 @@
 SRC_URI = "git://pagure.io/sanlock.git;protocol=http;branch=master \
            file://0001-sanlock-Replace-cp-a-with-cp-R-no-dereference-preser.patch \
            file://setuptools.patch \
+           file://0001-add-missing-system-header-string.h.patch \
           "
 SRCREV = "a181e951376d49a82eef17920c8ebedec80b4823"
 
diff --git a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-Replace-need-for-error.h-when-it-does-not-exist.patch b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-Replace-need-for-error.h-when-it-does-not-exist.patch
index e723050..9bb9f44 100644
--- a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-Replace-need-for-error.h-when-it-does-not-exist.patch
+++ b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-Replace-need-for-error.h-when-it-does-not-exist.patch
@@ -16,8 +16,6 @@
  trace.c       |  4 +++-
  5 files changed, 24 insertions(+), 5 deletions(-)
 
-diff --git a/brokerUpc.c b/brokerUpc.c
-index 17cbd9b..fe2b347 100644
 --- a/brokerUpc.c
 +++ b/brokerUpc.c
 @@ -20,8 +20,11 @@
@@ -33,8 +31,6 @@
  #include "support.h"
  #include "native.h"
  #include <sfcCommon/utilft.h>
-diff --git a/configure.ac b/configure.ac
-index ab2964e..d4915a1 100644
 --- a/configure.ac
 +++ b/configure.ac
 @@ -517,7 +517,7 @@ fi
@@ -46,8 +42,6 @@
  AC_CHECK_HEADERS([cmpi/cmpimacs.h cmpi/cmpift.h cmpi/cmpidt.h],[],[AC_MSG_ERROR([Could not find required CPMI header.])])
  
  # Checks for typedefs, structures, and compiler characteristics.
-diff --git a/httpAdapter.c b/httpAdapter.c
-index 2719e6c..e768972 100644
 --- a/httpAdapter.c
 +++ b/httpAdapter.c
 @@ -71,7 +71,9 @@
@@ -61,11 +55,15 @@
  /* should probably go into cimRequest.h */
  #define CIM_PROTOCOL_ANY     0
  #define CIM_PROTOCOL_CIM_XML 1
-diff --git a/support.c b/support.c
-index c7bba8b..5b3eef1 100644
 --- a/support.c
 +++ b/support.c
-@@ -32,7 +32,11 @@
+@@ -27,16 +27,20 @@
+  *  @sa native.h
+  */
+ 
++#include "config.h"
+ #include <stdio.h>
+ #include <dlfcn.h>
  #include "support.h"
  #include <stdio.h>
  #include <stdlib.h>
@@ -77,7 +75,11 @@
  #include <errno.h>
  #include "native.h"
  #include "trace.h"
-@@ -331,17 +335,25 @@ loadQualifierDeclMI(const char *provider,
+-#include "config.h"
+ #include "control.h"
+ #include <pthread.h>
+ 
+@@ -331,17 +335,25 @@ loadQualifierDeclMI(const char *provider
    _SFCB_RETURN(NULL);
  };
  
@@ -104,8 +106,6 @@
  /**
   * flag to ensure MM is initialized only once
   */
-diff --git a/trace.c b/trace.c
-index d7f30db..438af46 100644
 --- a/trace.c
 +++ b/trace.c
 @@ -279,7 +279,9 @@ _sfcb_trap(int tn)
@@ -119,6 +119,3 @@
  sigHandler     *
  setSignal(int sn, sigHandler * sh, int flags)
  {
--- 
-2.14.1
-
diff --git a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-configure-Check-for-function-from-respective-library.patch b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-configure-Check-for-function-from-respective-library.patch
new file mode 100644
index 0000000..5ee368f
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-configure-Check-for-function-from-respective-library.patch
@@ -0,0 +1,72 @@
+From 366c4a1c8b7724241ad2b703e48615ca5affa32e Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 12:46:07 -0700
+Subject: [PATCH] configure: Check for function from respective library in
+ AC_CHECK_LIB
+
+This helps in doing correct checks especially with newer autoconf and
+toolchain
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 16 ++++++++--------
+ 1 file changed, 8 insertions(+), 8 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index d4915a1..6154514 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -332,8 +332,8 @@ if [test "$enable_tests"]; then
+ fi
+ 
+ if [test "$test_gcov" == "yes"]; then
+-    AC_CHECK_LIB(gcc,main)
+-    AC_CHECK_LIB(gcov,main)
++    AC_CHECK_LIB(gcc,_Unwind_GetIP)
++    AC_CHECK_LIB(gcov,gcov_write_summary)
+     AC_PATH_PROG(LCOV,lcov,yes,no)
+     AC_PATH_PROG(GENHTML,genhtml,yes,no)
+     if test "$LCOV" == "no" -o "$GENHTML" == "no" ; then
+@@ -400,7 +400,7 @@ fi
+ 
+ if test "$enable_pam" == "yes"; then
+    AC_DEFINE(HAVE_PAM,,[PAM support enabled.])
+-   AC_CHECK_LIB(pam,main,[SFCB_LIBPAM=-lpam],[AC_MSG_ERROR(Could not find required pam library.)])
++   AC_CHECK_LIB(pam,pam_start,[SFCB_LIBPAM=-lpam],[AC_MSG_ERROR(Could not find required pam library.)])
+    SFCB_CONF_BASICAUTHLIB=sfcBasicPAMAuthentication   
+    SFCB_CONF_DOBASICAUTH=true
+ else
+@@ -470,16 +470,16 @@ if test "$HAVE_UNZIP" = "no" ; then
+ fi
+ 
+ # Checks for libraries.
+-AC_CHECK_LIB(pthread,main)
+-AC_CHECK_LIB(dl,main)
+-AC_CHECK_LIB(z,main,[SFCB_LIBZ=-lz],[AC_MSG_ERROR([Could not find required libz])])
++AC_CHECK_LIB(pthread,pthread_create)
++AC_CHECK_LIB(dl,dlopen)
++AC_CHECK_LIB(z,inflate,[SFCB_LIBZ=-lz],[AC_MSG_ERROR([Could not find required libz])])
+ # Test for the newest function here to make sure it's up to date.
+ AC_CHECK_LIB(sfcUtil,invalid_uint,, \
+   [AC_MSG_ERROR([Function invalid_uint not found. Is the required version of sfcCommon installed?])])
+ if test "$enable_indications" = "yes" ; then
+    LOAD_INDICATION_PROVIDER=
+    AC_DEFINE(HAVE_INDICATIONS,1,[Indication support enabled.])
+-   AC_CHECK_LIB(curl,main)
++   AC_CHECK_LIB(curl,curl_easy_init)
+ else
+    LOAD_INDICATION_PROVIDER='#'
+ fi
+@@ -487,7 +487,7 @@ fi
+ AC_SUBST(LOAD_INDICATION_PROVIDER)
+ 
+ if test "$enable_ssl" = "yes"; then
+-   AC_CHECK_LIB(ssl,main)
++   AC_CHECK_LIB(ssl,SSL_CTX_new)
+    SFCB_CONF_HTTPS=true
+    SFCB_CONF_HTTP=false
+ else
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-include-missing-system-headers.patch b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-include-missing-system-headers.patch
new file mode 100644
index 0000000..c16e393
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb/0001-include-missing-system-headers.patch
@@ -0,0 +1,151 @@
+From c5b15ae9636a3b73407372cce87eb40ea78a68ea Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 15:51:31 -0700
+Subject: [PATCH] include missing system headers
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ brokerEnc.c         | 2 ++
+ brokerOs.c          | 1 +
+ mlog.c              | 1 +
+ mofc/backend_sfcb.c | 2 +-
+ sfcbdump.c          | 1 +
+ sfcbdumpP32onI32.c  | 1 +
+ sfcbsem.c           | 1 +
+ trace.c             | 3 ++-
+ trace.h             | 3 ++-
+ 9 files changed, 12 insertions(+), 3 deletions(-)
+
+diff --git a/brokerEnc.c b/brokerEnc.c
+index 9115e71..889afcd 100644
+--- a/brokerEnc.c
++++ b/brokerEnc.c
+@@ -25,6 +25,8 @@
+ #include "constClass.h"
+ #include <sfcCommon/utilft.h>
+ 
++#include <string.h> /* strcasecmp */
++
+ extern const char *opGetClassNameChars(const CMPIObjectPath * cop);
+ extern const char *opGetNameSpaceChars(const CMPIObjectPath * cop);
+ extern CMPIConstClass *getConstClass(const char *ns, const char *cn);
+diff --git a/brokerOs.c b/brokerOs.c
+index 8d73a0b..b1427fd 100644
+--- a/brokerOs.c
++++ b/brokerOs.c
+@@ -22,6 +22,7 @@
+ #include <pthread.h>
+ #include "native.h"
+ #include <stdlib.h>
++#include <string.h> /* strcmp */
+ 
+ static char    *
+ resolveFileName(const char *filename)
+diff --git a/mlog.c b/mlog.c
+index a2d9eb7..6d9cd29 100644
+--- a/mlog.c
++++ b/mlog.c
+@@ -26,6 +26,7 @@ const char     *_mlog_id =
+ #include <syslog.h>
+ #include <stdarg.h>
+ #include <stdio.h>
++#include <string.h> /* strcat */
+ #include <errno.h>
+ #include <signal.h>
+ #include "trace.h"              /* for setSignal() */
+diff --git a/mofc/backend_sfcb.c b/mofc/backend_sfcb.c
+index 614abcd..99d4061 100644
+--- a/mofc/backend_sfcb.c
++++ b/mofc/backend_sfcb.c
+@@ -29,7 +29,7 @@
+ #include "backend.h"
+ #include "objectpath.h"
+ #include <sys/utsname.h>
+-
++#include <string.h>
+ 
+ extern CMPIStatus sfcb_simpleArrayAdd(CMPIArray * array, CMPIValue * val, CMPIType type);
+ extern CMPIObjectPath *getObjectPath(char *path, char **msg);
+diff --git a/sfcbdump.c b/sfcbdump.c
+index 8a9c335..aa8559c 100644
+--- a/sfcbdump.c
++++ b/sfcbdump.c
+@@ -23,6 +23,7 @@
+ #include <errno.h>
+ #include <stddef.h>
+ #include <getopt.h>
++#include <string.h> /* strerror */
+ #include "objectImpl.h"
+ 
+ #define BINARY_NAME argv[0]
+diff --git a/sfcbdumpP32onI32.c b/sfcbdumpP32onI32.c
+index ccf87dc..3540751 100644
+--- a/sfcbdumpP32onI32.c
++++ b/sfcbdumpP32onI32.c
+@@ -22,6 +22,7 @@
+ #include <fcntl.h>
+ #include <errno.h>
+ #include <stddef.h>
++#include <string.h>
+ #include <getopt.h>
+ #include "objectImpl.h"
+ #include <byteswap.h>
+diff --git a/sfcbsem.c b/sfcbsem.c
+index 3f8de7f..1e6358b 100644
+--- a/sfcbsem.c
++++ b/sfcbsem.c
+@@ -21,6 +21,7 @@
+ 
+ /* includes */
+ #include <stdio.h>
++#include <string.h>
+ #include <getopt.h>
+ #include <errno.h>
+ 
+diff --git a/trace.c b/trace.c
+index 23597e1..c4f8011 100644
+--- a/trace.c
++++ b/trace.c
+@@ -25,6 +25,7 @@
+ #include "native.h"
+ #include <string.h>
+ #include <time.h>
++#include <pthread.h>
+ 
+ #include <sys/stat.h>
+ #include <sys/wait.h>
+@@ -50,7 +51,7 @@
+ 
+ char           *processName = NULL;
+ int             providerProcess = 0;
+-int             idleThreadId = 0;
++pthread_t       idleThreadId = 0;
+ int             terminating = 0;
+ int             colorTrace;
+ 
+diff --git a/trace.h b/trace.h
+index ea39850..52d408d 100644
+--- a/trace.h
++++ b/trace.h
+@@ -25,6 +25,7 @@
+ 
+ #include "mlog.h"
+ 
++#include <pthread.h>
+ extern unsigned long _sfcb_trace_mask;
+ /* use pointer indirect _sfcb_trace_mask to allow shared memory flag */
+ extern unsigned long *_ptr_sfcb_trace_mask;
+@@ -162,7 +163,7 @@ extern sigHandler *setSignal(int sn, sigHandler * sh, int flags);
+ 
+ extern char    *processName;
+ extern int      providerProcess;
+-extern int      idleThreadId;
++extern pthread_t      idleThreadId;
+ extern int      terminating;
+ 
+ #endif
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb_1.4.9.bb b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb_1.4.9.bb
index 4b9ae47..2a89a54 100644
--- a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb_1.4.9.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcb/sblim-sfcb_1.4.9.bb
@@ -27,6 +27,8 @@
            file://0001-Replace-need-for-error.h-when-it-does-not-exist.patch \
            file://sblim-sfcb-1.4.9-fix-sfcbinst2mof.patch \
            file://0001-Avoid-variable-definition-in-header-files.patch \
+           file://0001-configure-Check-for-function-from-respective-library.patch \
+           file://0001-include-missing-system-headers.patch \
 "
 
 SRC_URI[md5sum] = "28021cdabc73690a94f4f9d57254ce30"
diff --git a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcc/sblim-sfcc/0001-Fix-function-declararions.patch b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcc/sblim-sfcc/0001-Fix-function-declararions.patch
new file mode 100644
index 0000000..c498c55
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcc/sblim-sfcc/0001-Fix-function-declararions.patch
@@ -0,0 +1,80 @@
+From f97c26f5effd4372f7e03f9e4178d42a9ad8d4b3 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 13:33:16 -0700
+Subject: [PATCH] Fix function declararions
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ TEST/v2test_ec.c              | 2 ++
+ TEST/v2test_ein.c             | 1 +
+ backend/cimxml/cimXmlParser.c | 6 +++++-
+ backend/cimxml/grammar.c      | 2 ++
+ 4 files changed, 10 insertions(+), 1 deletion(-)
+
+diff --git a/TEST/v2test_ec.c b/TEST/v2test_ec.c
+index ad34dcc..d3e566d 100644
+--- a/TEST/v2test_ec.c
++++ b/TEST/v2test_ec.c
+@@ -6,6 +6,8 @@
+ #include "cmcimacs.h"
+ 
+ extern char *value2Chars(CMPIType type, CMPIValue * value);
++extern void showClass( CMPIConstClass * in_class );
++
+ /*
+  * comment out this define to use v2 http XML interface
+  */
+diff --git a/TEST/v2test_ein.c b/TEST/v2test_ein.c
+index c1b4692..5d5ef5d 100644
+--- a/TEST/v2test_ein.c
++++ b/TEST/v2test_ein.c
+@@ -6,6 +6,7 @@
+ #include "cmcimacs.h"
+ 
+ extern char *value2Chars(CMPIType type, CMPIValue * value);
++extern void showObjectPath( CMPIObjectPath * objectpath );
+ void showProperty( CMPIData , char * );
+ void showInstance( CMPIInstance * );
+ static char * CMPIState_str(CMPIValueState);
+diff --git a/backend/cimxml/cimXmlParser.c b/backend/cimxml/cimXmlParser.c
+index d1ab86e..9f5d1ca 100644
+--- a/backend/cimxml/cimXmlParser.c
++++ b/backend/cimxml/cimXmlParser.c
+@@ -34,6 +34,8 @@
+ 
+ #include <pthread.h>
+ 
++void startParsing(ParserControl *parm);
++
+ static int attrsOk(XmlBuffer * xb, const XmlElement * e, XmlAttr * r,
+                    const char *tag, int etag);
+ static char *getValue(XmlBuffer * xb, const char *v);
+@@ -1350,7 +1352,9 @@ ResponseHdr scanCimXmlResponse(const char *xmlData, CMPIObjectPath *cop)
+ 
+    control.heap = parser_heap_init();
+ 
+-   control.respHdr.rc = startParsing(&control);
++   control.respHdr.rc = 0;
++
++   startParsing(&control);
+ 
+    parser_heap_term(control.heap);
+ 
+diff --git a/backend/cimxml/grammar.c b/backend/cimxml/grammar.c
+index 6a0a969..a3dcdea 100644
+--- a/backend/cimxml/grammar.c
++++ b/backend/cimxml/grammar.c
+@@ -23,6 +23,8 @@
+ #include "sfcUtil/utilft.h"
+ #include "parserUtil.h"
+ 
++CMPIType guessType(char *val);
++void setClassMethods(CMPIConstClass *cls, XtokMethods *ms);
+ 
+ static int ct = 0;
+ static int dontLex = 0;
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcc/sblim-sfcc_2.2.8.bb b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcc/sblim-sfcc_2.2.8.bb
index df55a67..20bd2b6 100644
--- a/meta-openembedded/meta-oe/recipes-extended/sblim-sfcc/sblim-sfcc_2.2.8.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/sblim-sfcc/sblim-sfcc_2.2.8.bb
@@ -4,6 +4,7 @@
 
 SRC_URI = "http://netcologne.dl.sourceforge.net/project/sblim/${BPN}/${BPN}-${PV}.tar.bz2 \
            file://0001-cimxml-Include-sys-select.h-for-fd_set.patch \
+           file://0001-Fix-function-declararions.patch \
            "
 
 SRC_URI[md5sum] = "0bac0dec19f17ec065b6c332a56d7bae"
diff --git a/meta-openembedded/meta-oe/recipes-extended/scsirastools/scsirastools_1.6.6.bb b/meta-openembedded/meta-oe/recipes-extended/scsirastools/scsirastools_1.6.6.bb
index d3f2bd6..4fefd51 100644
--- a/meta-openembedded/meta-oe/recipes-extended/scsirastools/scsirastools_1.6.6.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/scsirastools/scsirastools_1.6.6.bb
@@ -20,8 +20,7 @@
 inherit autotools update-rc.d
 
 # mdadm Makefile has CC set to gcc, hence override CC to ${CC}
-EXTRA_OEMAKE += "CC='${CC}' sbindir=${base_sbindir}"
-
+EXTRA_OEMAKE += "CC='${CC}' CFLAGS='${CFLAGS} -D_LARGEFILE64_SOURCE=1' sbindir=${base_sbindir}"
 
 do_configure:append() {
 	oe_runmake -C mdadm.d mdadm-1.3.0
diff --git a/meta-openembedded/meta-oe/recipes-extended/sigrok/libsigrokdecode/0001-configure.ac-add-py-3.10-support.patch b/meta-openembedded/meta-oe/recipes-extended/sigrok/libsigrokdecode/0001-configure.ac-add-py-3.10-support.patch
index 85e49cc..a90f588 100644
--- a/meta-openembedded/meta-oe/recipes-extended/sigrok/libsigrokdecode/0001-configure.ac-add-py-3.10-support.patch
+++ b/meta-openembedded/meta-oe/recipes-extended/sigrok/libsigrokdecode/0001-configure.ac-add-py-3.10-support.patch
@@ -1,7 +1,7 @@
 From a5835dfe126bfe6ed0b8197c6578960835bf1fe8 Mon Sep 17 00:00:00 2001
 From: Alexander Kanavin <alex@linutronix.de>
 Date: Sun, 3 Oct 2021 22:08:50 +0200
-Subject: [PATCH] configure.ac: add py 3.10 support
+Subject: [PATCH] configure.ac: add py 3.10/11 support
 
 Upstream-Status: Pending
 Signed-off-by: Alexander Kanavin <alex@linutronix.de>
@@ -18,7 +18,7 @@
  # https://docs.python.org/3/whatsnew/3.8.html#debug-build-uses-the-same-abi-as-release-build
  SR_PKG_CHECK([python3], [SRD_PKGLIBS],
 -	[python-3.9-embed], [python-3.8-embed], [python-3.8 >= 3.8], [python-3.7 >= 3.7], [python-3.6 >= 3.6], [python-3.5 >= 3.5], [python-3.4 >= 3.4], [python-3.3 >= 3.3], [python-3.2 >= 3.2], [python3 >= 3.2])
-+	[python-3.10-embed], [python-3.9-embed], [python-3.8-embed], [python-3.8 >= 3.8], [python-3.7 >= 3.7], [python-3.6 >= 3.6], [python-3.5 >= 3.5], [python-3.4 >= 3.4], [python-3.3 >= 3.3], [python-3.2 >= 3.2], [python3 >= 3.2])
++	[python-3.11-embed], [python-3.10-embed], [python-3.9-embed], [python-3.8-embed], [python-3.8 >= 3.8], [python-3.7 >= 3.7], [python-3.6 >= 3.6], [python-3.5 >= 3.5], [python-3.4 >= 3.4], [python-3.3 >= 3.3], [python-3.2 >= 3.2], [python3 >= 3.2])
  AS_IF([test "x$sr_have_python3" = xno],
  	[AC_MSG_ERROR([Cannot find Python 3 development headers.])])
  
diff --git a/meta-openembedded/meta-oe/recipes-extended/uml-utilities/uml-utilities-20040406/0001-Add-missing-standard-headers-for-str-and-exit-APIs.patch b/meta-openembedded/meta-oe/recipes-extended/uml-utilities/uml-utilities-20040406/0001-Add-missing-standard-headers-for-str-and-exit-APIs.patch
new file mode 100644
index 0000000..9fce51a
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-extended/uml-utilities/uml-utilities-20040406/0001-Add-missing-standard-headers-for-str-and-exit-APIs.patch
@@ -0,0 +1,64 @@
+From a1c4716ceaed6333f8be01b5d4d971e64babcdd7 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 18:57:42 -0700
+Subject: [PATCH] Add missing standard headers for str* and exit APIs
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ jail/jail_uml             | Bin 19120 -> 19120 bytes
+ jail/jail_uml.c           |   1 +
+ port-helper/port-helper.c |   1 +
+ uml_router/port.c         |   1 +
+ uml_router/uml_switch.c   |   1 +
+ watchdog/uml_watchdog.c   |   1 +
+ 6 files changed, 5 insertions(+)
+
+--- a/jail/jail_uml.c
++++ b/jail/jail_uml.c
+@@ -1,5 +1,6 @@
+ #include <stdio.h>
+ #include <unistd.h>
++#include <stdlib.h> /* for exit */
+ #include <errno.h>
+ 
+ static void Usage(void)
+--- a/port-helper/port-helper.c
++++ b/port-helper/port-helper.c
+@@ -12,6 +12,7 @@ for read and write, and the console is f
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <string.h> /* memset */
+ #include <signal.h>
+ #include <errno.h>
+ #include <unistd.h>
+--- a/uml_router/port.c
++++ b/uml_router/port.c
+@@ -1,5 +1,6 @@
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <string.h> /* memcmp */
+ #include <unistd.h>
+ #include <errno.h>
+ #include <sys/socket.h>
+--- a/uml_router/uml_switch.c
++++ b/uml_router/uml_switch.c
+@@ -5,6 +5,7 @@
+ #include <stdio.h>
+ #include <errno.h>
+ #include <stdlib.h>
++#include <string.h> /* strcmp */
+ #include <signal.h>
+ #include <fcntl.h>
+ #include <stdint.h>
+--- a/watchdog/uml_watchdog.c
++++ b/watchdog/uml_watchdog.c
+@@ -1,5 +1,6 @@
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <string.h> /* strcmp */
+ #include <unistd.h>
+ #include <signal.h>
+ #include <errno.h>
diff --git a/meta-openembedded/meta-oe/recipes-extended/uml-utilities/uml-utilities_20040406.bb b/meta-openembedded/meta-oe/recipes-extended/uml-utilities/uml-utilities_20040406.bb
index 663a145..35ae5f7 100644
--- a/meta-openembedded/meta-oe/recipes-extended/uml-utilities/uml-utilities_20040406.bb
+++ b/meta-openembedded/meta-oe/recipes-extended/uml-utilities/uml-utilities_20040406.bb
@@ -7,6 +7,7 @@
            file://fix-ldflags.patch \
            file://unstrip.patch \
            file://0001-include-required-system-header-files-for-fd_set-and-.patch \
+           file://0001-Add-missing-standard-headers-for-str-and-exit-APIs.patch \
            "
 SRC_URI[md5sum] = "2c1ccd9efacbfb39e42d482b89b2550a"
 SRC_URI[sha256sum] = "4f179b1db021ef15ac7e9b2eed57c525db127a754c574f591c367460cded9f41"
diff --git a/meta-openembedded/meta-oe/recipes-gnome/gtk+/gtk+/0001-Fix-signature-of-create_menu-function.patch b/meta-openembedded/meta-oe/recipes-gnome/gtk+/gtk+/0001-Fix-signature-of-create_menu-function.patch
new file mode 100644
index 0000000..6b5f928
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-gnome/gtk+/gtk+/0001-Fix-signature-of-create_menu-function.patch
@@ -0,0 +1,50 @@
+From a703e2406fda3dc150574ae2f90cb5f4810d0601 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Mon, 29 Aug 2022 00:25:19 -0700
+Subject: [PATCH] Fix signature of create_menu() function
+
+Upstream-Status: Inappropriate [EOL]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ tests/testmenubars.c | 8 ++++----
+ 1 file changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/tests/testmenubars.c b/tests/testmenubars.c
+index 416a939..7f247f8 100644
+--- a/tests/testmenubars.c
++++ b/tests/testmenubars.c
+@@ -21,7 +21,7 @@
+ #include <gtk/gtk.h>
+ 
+ static GtkWidget *
+-create_menu (depth)
++create_menu (int depth)
+ {
+     GtkWidget *menu;
+     GtkWidget *menuitem;
+@@ -70,19 +70,19 @@ create_menubar (GtkPackDirection pack_dir,
+   menuitem = gtk_image_menu_item_new_from_stock (GTK_STOCK_HOME, NULL);
+   gtk_menu_shell_append (GTK_MENU_SHELL (menubar), menuitem);
+   gtk_label_set_angle (GTK_LABEL (GTK_BIN (menuitem)->child), angle);
+-  menu = create_menu (2, TRUE);
++  menu = create_menu (2);
+   gtk_menu_item_set_submenu (GTK_MENU_ITEM (menuitem), menu);
+ 
+   menuitem = gtk_menu_item_new_with_label ("foo");
+   gtk_menu_shell_append (GTK_MENU_SHELL (menubar), menuitem);
+   gtk_label_set_angle (GTK_LABEL (GTK_BIN (menuitem)->child), angle);
+-  menu = create_menu (2, TRUE);
++  menu = create_menu (2);
+   gtk_menu_item_set_submenu (GTK_MENU_ITEM (menuitem), menu);
+ 
+   menuitem = gtk_menu_item_new_with_label ("bar");
+   gtk_menu_shell_append (GTK_MENU_SHELL (menubar), menuitem);
+   gtk_label_set_angle (GTK_LABEL (GTK_BIN (menuitem)->child), angle);
+-  menu = create_menu (2, TRUE);
++  menu = create_menu (2);
+   gtk_menu_item_set_submenu (GTK_MENU_ITEM (menuitem), menu);
+ 
+   return menubar;
+-- 
+2.37.2
+
diff --git a/meta-openembedded/meta-oe/recipes-gnome/gtk+/gtk+_2.24.33.bb b/meta-openembedded/meta-oe/recipes-gnome/gtk+/gtk+_2.24.33.bb
index 082967e..5eac641 100644
--- a/meta-openembedded/meta-oe/recipes-gnome/gtk+/gtk+_2.24.33.bb
+++ b/meta-openembedded/meta-oe/recipes-gnome/gtk+/gtk+_2.24.33.bb
@@ -10,6 +10,7 @@
            file://doc-fixes.patch \
            file://strict-prototypes.patch \
            file://0001-Do-not-look-into-HOME-when-looking-for-gtk-modules.patch \
+           file://0001-Fix-signature-of-create_menu-function.patch \
            "
 
 SRC_URI[sha256sum] = "ac2ac757f5942d318a311a54b0c80b5ef295f299c2a73c632f6bfb1ff49cc6da"
diff --git a/meta-openembedded/meta-oe/recipes-gnome/libjcat/libjcat_0.1.11.bb b/meta-openembedded/meta-oe/recipes-gnome/libjcat/libjcat_0.1.12.bb
similarity index 94%
rename from meta-openembedded/meta-oe/recipes-gnome/libjcat/libjcat_0.1.11.bb
rename to meta-openembedded/meta-oe/recipes-gnome/libjcat/libjcat_0.1.12.bb
index bd17e44..ff573ff 100644
--- a/meta-openembedded/meta-oe/recipes-gnome/libjcat/libjcat_0.1.11.bb
+++ b/meta-openembedded/meta-oe/recipes-gnome/libjcat/libjcat_0.1.12.bb
@@ -11,7 +11,7 @@
     git://github.com/hughsie/libjcat.git;branch=main;protocol=https \
     file://run-ptest \
 "
-SRCREV = "ffa0b5a9a49a7880e9ab3ae3b061080f15e95e15"
+SRCREV = "898b80468288bc6278b73210783ba1f7c0219c71"
 S = "${WORKDIR}/git"
 
 inherit gobject-introspection gtk-doc meson ptest-gnome vala lib_package
diff --git a/meta-openembedded/meta-oe/recipes-gnome/libpeas/libpeas_1.32.0.bb b/meta-openembedded/meta-oe/recipes-gnome/libpeas/libpeas_1.34.0.bb
similarity index 88%
rename from meta-openembedded/meta-oe/recipes-gnome/libpeas/libpeas_1.32.0.bb
rename to meta-openembedded/meta-oe/recipes-gnome/libpeas/libpeas_1.34.0.bb
index 944b21d..1bf6752 100644
--- a/meta-openembedded/meta-oe/recipes-gnome/libpeas/libpeas_1.32.0.bb
+++ b/meta-openembedded/meta-oe/recipes-gnome/libpeas/libpeas_1.34.0.bb
@@ -12,7 +12,7 @@
 
 ANY_OF_DISTRO_FEATURES = "${GTK3DISTROFEATURES}"
 
-SRC_URI[archive.sha256sum] = "d625520fa02e8977029b246ae439bc218968965f1e82d612208b713f1dcc3d0e"
+SRC_URI[archive.sha256sum] = "4305f715dab4b5ad3e8007daec316625e7065a94e63e25ef55eb1efb964a7bf0"
 
 PACKAGECONFIG[python3] = "-Dpython3=true,-Dpython3=false,python3-pygobject"
 
diff --git a/meta-openembedded/meta-oe/recipes-gnome/libxmlb/libxmlb_0.3.9.bb b/meta-openembedded/meta-oe/recipes-gnome/libxmlb/libxmlb_0.3.10.bb
similarity index 91%
rename from meta-openembedded/meta-oe/recipes-gnome/libxmlb/libxmlb_0.3.9.bb
rename to meta-openembedded/meta-oe/recipes-gnome/libxmlb/libxmlb_0.3.10.bb
index 4c5f0cf..1f5310c 100644
--- a/meta-openembedded/meta-oe/recipes-gnome/libxmlb/libxmlb_0.3.9.bb
+++ b/meta-openembedded/meta-oe/recipes-gnome/libxmlb/libxmlb_0.3.10.bb
@@ -5,7 +5,7 @@
 SRC_URI = "git://github.com/hughsie/libxmlb.git;branch=main;protocol=https \
            file://run-ptest \
            "
-SRCREV = "c308e39ccad6673e216f92d08b3782a4ffcb23ad"
+SRCREV = "a6cac6a715d57c393bbddee4d0381ad943341af9"
 S = "${WORKDIR}/git"
 
 inherit gobject-introspection gtk-doc meson ptest-gnome lib_package
diff --git a/meta-openembedded/meta-oe/recipes-graphics/cairo/cairomm-1.16_1.16.1.bb b/meta-openembedded/meta-oe/recipes-graphics/cairo/cairomm-1.16_1.16.2.bb
similarity index 82%
rename from meta-openembedded/meta-oe/recipes-graphics/cairo/cairomm-1.16_1.16.1.bb
rename to meta-openembedded/meta-oe/recipes-graphics/cairo/cairomm-1.16_1.16.2.bb
index 61892d3..c1e271f 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/cairo/cairomm-1.16_1.16.1.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/cairo/cairomm-1.16_1.16.2.bb
@@ -9,7 +9,7 @@
 DEPENDS += "boost cairo libsigc++-3"
 
 SRC_URI = "https://www.cairographics.org/releases/cairomm-${PV}.tar.xz"
-SRC_URI[sha256sum] = "6f6060d8e98dd4b8acfee2295fddbdd38cf487c07c26aad8d1a83bb9bff4a2c6"
+SRC_URI[sha256sum] = "6a63bf98a97dda2b0f55e34d1b5f3fb909ef8b70f9b8d382cb1ff3978e7dc13f"
 
 S = "${WORKDIR}/cairomm-${PV}"
 
diff --git a/meta-openembedded/meta-oe/recipes-graphics/cglm/cglm_0.8.5.bb b/meta-openembedded/meta-oe/recipes-graphics/cglm/cglm_0.8.5.bb
new file mode 100644
index 0000000..5a437c1
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/cglm/cglm_0.8.5.bb
@@ -0,0 +1,25 @@
+SUMMARY = "OpenGL Mathematics Library for C"
+DESCRIPTION = "Highly optimized 2D|3D math library, also known as OpenGL Mathematics (glm) for `C`. \
+cglm provides lot of utils to help math operations to be fast and quick to write. It is community \
+friendly, feel free to bring any issues, bugs you faced."
+HOMEPAGE = "https://github.com/recp/cglm"
+BUGTRACKER = "https://github.com/recp/cglm/issues"
+SECTION = "libs"
+LICENSE = "MIT"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=8dc95c4110ba741c43832734b51b7de7"
+
+SRC_URI = "git://github.com/recp/cglm;branch=master;protocol=https"
+# Tag v0.8.5
+SRCREV = "7e5d1f435f628b873347eb052b7d6605b0b997f2"
+
+S = "${WORKDIR}/git"
+
+PACKAGECONFIG[build_tests] = "-Dbuild_tests=true,-Dbuild_tests=false,"
+
+PACKAGECONFIG ?= ""
+
+inherit meson pkgconfig
+
+EXTRA_OEMESON += "--buildtype release"
+
+BBCLASSEXTEND = "native"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/directfb/directfb-examples/0001-spacedream-Add-typecast-to-pthread_t-in-assignment.patch b/meta-openembedded/meta-oe/recipes-graphics/directfb/directfb-examples/0001-spacedream-Add-typecast-to-pthread_t-in-assignment.patch
new file mode 100644
index 0000000..f40b7f1
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/directfb/directfb-examples/0001-spacedream-Add-typecast-to-pthread_t-in-assignment.patch
@@ -0,0 +1,39 @@
+From 85a30903ea3ba4232379bbbcb54960307d5a2da0 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 18:49:03 -0700
+Subject: [PATCH] spacedream: Add typecast to pthread_t in assignment
+
+render_loop_thread is of type pthread_t, therefore -1 which is int can
+not be assigned to it. Do the needed typecast conversion
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/spacedream/main.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/spacedream/main.c b/src/spacedream/main.c
+index 430800f..8e1887c 100644
+--- a/src/spacedream/main.c
++++ b/src/spacedream/main.c
+@@ -205,7 +205,7 @@ void unload_stars()
+ int main( int argc, char *argv[] )
+ {
+      int                   quit = 0;
+-     pthread_t             render_loop_thread = -1;
++     pthread_t             render_loop_thread = (pthread_t)-1;
+ 
+      IDirectFBSurface     *primary;
+      IDirectFBEventBuffer *buffer;
+@@ -344,7 +344,7 @@ int main( int argc, char *argv[] )
+      pthread_cancel( render_loop_thread );
+      pthread_mutex_unlock( &render_start );
+      pthread_join( render_loop_thread, NULL );
+-     render_loop_thread = -1;
++     render_loop_thread = (pthread_t)-1;
+ 
+ 
+      unload_stars();
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/directfb/directfb-examples_1.7.0.bb b/meta-openembedded/meta-oe/recipes-graphics/directfb/directfb-examples_1.7.0.bb
index 7907c5c..398e339 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/directfb/directfb-examples_1.7.0.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/directfb/directfb-examples_1.7.0.bb
@@ -9,6 +9,7 @@
 SRC_URI = " \
            http://downloads.yoctoproject.org/mirror/sources/DirectFB-examples-${PV}.tar.gz \
            file://configure.in-Fix-string-argument-syntax.patch \
+           file://0001-spacedream-Add-typecast-to-pthread_t-in-assignment.patch \
           "
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=ecf6fd2b19915afc4da56043926ca18f"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/fvwm/fvwm_2.6.9.bb b/meta-openembedded/meta-oe/recipes-graphics/fvwm/fvwm_2.6.9.bb
index 75b24bf..123af4d 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/fvwm/fvwm_2.6.9.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/fvwm/fvwm_2.6.9.bb
@@ -75,6 +75,8 @@
 EXTRA_OEMAKE = " \
     V=1 \
 "
+# clang treats them as errors by default now starting with 15.0+
+CFLAGS += "-Wno-error=int-conversion -Wno-error=implicit-int"
 
 do_install:append() {
     install -d -m 0755 ${D}/${sysconfdir}/xdg/fvwm
diff --git a/meta-openembedded/meta-oe/recipes-graphics/glm/glm_0.9.9.8.bb b/meta-openembedded/meta-oe/recipes-graphics/glm/glm_0.9.9.8.bb
index 71a005a..9edcb2b 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/glm/glm_0.9.9.8.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/glm/glm_0.9.9.8.bb
@@ -22,6 +22,8 @@
 
 inherit cmake
 
+CXXFLAGS:append:toolchain-clang = " -Wno-error=invalid-utf8 -Wno-error=disabled-macro-expansion -Wno-error=reserved-identifier"
+
 do_install() {
     install -d ${D}${includedir} ${D}${docdir}/glm ${D}${libdir}/pkgconfig ${D}${libdir}/cmake/glm
     cp -R --no-dereference --preserve=mode,links ${S}/glm ${D}${includedir}
diff --git a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0001-configure.ac-remove-AM_PO_SUBDIRS.patch b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0001-configure.ac-remove-AM_PO_SUBDIRS.patch
index 7800fdd..eaef818 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0001-configure.ac-remove-AM_PO_SUBDIRS.patch
+++ b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0001-configure.ac-remove-AM_PO_SUBDIRS.patch
@@ -9,18 +9,13 @@
  configure.ac | 1 -
  1 file changed, 1 deletion(-)
 
-diff --git a/configure.ac b/configure.ac
-index bb5b795..93fbaf2 100644
 --- a/configure.ac
 +++ b/configure.ac
-@@ -64,7 +64,6 @@ GP_GETTEXT_HACK([],[Lutz Müller and others],[${MAIL_GPHOTO_TRANSLATION}])
+@@ -46,7 +46,6 @@ dnl i18n support
+ dnl ---------------------------------------------------------------------------
+ GP_GETTEXT_HACK([],[Lutz Müller and others],[${MAIL_GPHOTO_TRANSLATION}])
  ALL_LINGUAS="az cs da de en_GB es eu fi fr hu id is it ja nl pa pl pt_BR ro ru rw sk sr sv uk vi zh_CN zh_TW"
+-AM_PO_SUBDIRS()
  AM_GNU_GETTEXT_VERSION([0.14.1])
  AM_GNU_GETTEXT([external])
--AM_PO_SUBDIRS()
  AM_ICONV()
- GP_GETTEXT_FLAGS()
- 
--- 
-1.9.1
-
diff --git a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0001-gphoto2-Use-pthread_t-abstract-type-for-thead-IDs.patch b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0001-gphoto2-Use-pthread_t-abstract-type-for-thead-IDs.patch
new file mode 100644
index 0000000..a27c02c
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0001-gphoto2-Use-pthread_t-abstract-type-for-thead-IDs.patch
@@ -0,0 +1,39 @@
+From 23c67e93e51f700d0aeecfc08277e39f51201fc3 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 12:59:46 -0700
+Subject: [PATCH] gphoto2: Use pthread_t abstract type for thead IDs
+
+This is not a plain old datatype in every libc, e.g. with musl this
+would fail in type conversion
+
+Upstream-Status: Submitted [https://github.com/gphoto/gphoto2/pull/535]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ gphoto2/main.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/gphoto2/main.c b/gphoto2/main.c
+index 2bf5964..9a6b05d 100644
+--- a/gphoto2/main.c
++++ b/gphoto2/main.c
+@@ -1198,7 +1198,7 @@ thread_func (void *data)
+ 	pthread_cleanup_pop (1);
+ }
+ 
+-static unsigned int
++static pthread_t
+ start_timeout_func (Camera *camera, unsigned int timeout,
+ 		    CameraTimeoutFunc func, void __unused__ *data)
+ {
+@@ -1219,7 +1219,7 @@ start_timeout_func (Camera *camera, unsigned int timeout,
+ }
+ 
+ static void
+-stop_timeout_func (Camera __unused__ *camera, unsigned int id,
++stop_timeout_func (Camera __unused__ *camera, pthread_t id,
+ 		   void __unused__ *data)
+ {
+ 	pthread_t tid = id;
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0002-Look-for-popt-with-GP_CHECK_LIBRARY-function.patch b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0002-Look-for-popt-with-GP_CHECK_LIBRARY-function.patch
deleted file mode 100644
index e6c7dd6..0000000
--- a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2/0002-Look-for-popt-with-GP_CHECK_LIBRARY-function.patch
+++ /dev/null
@@ -1,32 +0,0 @@
-From 884d4c9bbd2dc147d614a5fabc25dbea7a71cd48 Mon Sep 17 00:00:00 2001
-From: Andreas Baak <andreas.baak@gmail.com>
-Date: Sun, 12 Jul 2015 02:13:58 -0700
-Subject: [PATCH] Look for popt with GP_CHECK_LIBRARY function
-
-The previously used proprietary function GP_CHECK_POPT
-defined in /.m4m/gp-check-opt.m4 is not ready for cross
-compilation since it looks for the library and headers
-in the host system. Yocto reports the following error:
-"QA Issue: gphoto2: The compile log indicates that host
-include and/or library paths were used."
-Using the GP_CHECK_LIBRARY function fixes this problem
----
- configure.ac | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/configure.ac b/configure.ac
-index 93fbaf2..6d661f0 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -285,7 +285,7 @@ GP_CONFIG_MSG([Text preview support],[$aa_msg])
- dnl ---------------------------------------------------------------------------
- dnl popt.h: Simplifies handling of command-line options enormously.
- dnl ---------------------------------------------------------------------------
--GP_CHECK_POPT([mandatory])
-+GP_CHECK_LIBRARY([POPT], [popt], [], [popt.h], [], [], [mandatory])
- 
- 
- dnl ---------------------------------------------------------------------------
--- 
-1.9.1
-
diff --git a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2_2.5.27.bb b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2_2.5.28.bb
similarity index 69%
rename from meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2_2.5.27.bb
rename to meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2_2.5.28.bb
index 84ff677..fe33940 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2_2.5.27.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/gphoto2_2.5.28.bb
@@ -8,11 +8,9 @@
 
 SRC_URI = "${SOURCEFORGE_MIRROR}/gphoto/gphoto2-${PV}.tar.bz2;name=gphoto2 \
            file://0001-configure.ac-remove-AM_PO_SUBDIRS.patch \
-           file://0002-Look-for-popt-with-GP_CHECK_LIBRARY-function.patch \
+           file://0001-gphoto2-Use-pthread_t-abstract-type-for-thead-IDs.patch \
 "
-
-SRC_URI[gphoto2.md5sum] = "92d39b383c0e0354b4d3af1af7b8b416"
-SRC_URI[gphoto2.sha256sum] = "30054e93a1bb59f501aabd5018713177ea04ce0cb28935319bd6ca80061e8d38"
+SRC_URI[gphoto2.sha256sum] = "2a648dcdf12da19e208255df4ebed3e7d2a02f905be4165f2443c984cf887375"
 
 inherit autotools pkgconfig gettext
 
diff --git a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2/0001-configure.ac-remove-AM_PO_SUBDIRS.patch b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2/0001-configure.ac-remove-AM_PO_SUBDIRS.patch
index 8093676..f93650f 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2/0001-configure.ac-remove-AM_PO_SUBDIRS.patch
+++ b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2/0001-configure.ac-remove-AM_PO_SUBDIRS.patch
@@ -19,30 +19,23 @@
  libgphoto2_port/configure.ac |    1 -
  2 files changed, 0 insertions(+), 2 deletions(-)
 
-diff --git a/configure.ac b/configure.ac
-index 2f8e1b8..2e90acf 100644
 --- a/configure.ac
 +++ b/configure.ac
-@@ -209,7 +209,6 @@ ALL_LINGUAS="cs da de es eu fr hu it ja nl pl ru sv uk vi zh_CN"
- GP_GETTEXT_HACK([${PACKAGE}-${LIBGPHOTO2_CURRENT_MIN}],[The gPhoto Team],[${MAIL_GPHOTO_TRANSLATION}])
- AM_GNU_GETTEXT_VERSION([0.14.1])
- AM_GNU_GETTEXT([external])
+@@ -282,7 +282,6 @@ dnl ------------------------------------
+ GP_GETTEXT_SETUP([GETTEXT_PACKAGE_LIBGPHOTO2],
+                  [${PACKAGE}-${LIBGPHOTO2_CURRENT_MIN}],
+                  [po])
 -AM_PO_SUBDIRS()
+ AM_GNU_GETTEXT_VERSION([0.19.1])
+ AM_GNU_GETTEXT([external])
  AM_ICONV()
- GP_GETTEXT_FLAGS()
- 
-diff --git a/libgphoto2_port/configure.ac b/libgphoto2_port/configure.ac
-index 0b66d58..df47357 100644
 --- a/libgphoto2_port/configure.ac
 +++ b/libgphoto2_port/configure.ac
-@@ -124,7 +124,6 @@ GP_GETTEXT_HACK([${PACKAGE}-${LIBGPHOTO2_PORT_CURRENT_MIN}],[Lutz Mueller and ot
- ALL_LINGUAS="cs da de es eu fi fr it ja nl pl pt_BR ru sk sr sv uk vi zh_CN zh_TW"
- AM_GNU_GETTEXT_VERSION([0.14.1])
- AM_GNU_GETTEXT([external])
+@@ -122,7 +122,6 @@ dnl ------------------------------------
+ GP_GETTEXT_SETUP([GETTEXT_PACKAGE_LIBGPHOTO2_PORT],
+                  [${PACKAGE}-${LIBGPHOTO2_PORT_CURRENT_MIN}],
+                  [po])
 -AM_PO_SUBDIRS()
+ AM_GNU_GETTEXT_VERSION([0.19.1])
+ AM_GNU_GETTEXT([external])
  AM_ICONV()
- GP_GETTEXT_FLAGS()
- 
--- 
-1.7.4.4
-
diff --git a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2_2.5.27.bb b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2_2.5.30.bb
similarity index 87%
rename from meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2_2.5.27.bb
rename to meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2_2.5.30.bb
index 41600be..0f2800a 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2_2.5.27.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/gphoto2/libgphoto2_2.5.30.bb
@@ -1,7 +1,7 @@
 SUMMARY = "libgphoto2 allows you to access digital cameras"
 SECTION = "libs"
 LICENSE = "LGPL-2.1-only"
-LIC_FILES_CHKSUM = "file://COPYING;md5=0448d3676bc0de00406af227d341a4d1"
+LIC_FILES_CHKSUM = "file://COPYING;md5=477378d78dfeeaa93826ee4ec7c643fb"
 
 DEPENDS = "libtool jpeg virtual/libusb0 libexif zlib libxml2"
 
@@ -13,9 +13,7 @@
            file://40-libgphoto2.rules \
            file://0001-configure.ac-remove-AM_PO_SUBDIRS.patch \
 "
-
-SRC_URI[libgphoto2.md5sum] = "83a2f96dade72e95dffb8e5fa9628d7e"
-SRC_URI[libgphoto2.sha256sum] = "f8b85478c44948a0b0b52c4d4dfda2de1d7bcb7b262c76bd1ae306d9c63240d7"
+SRC_URI[libgphoto2.sha256sum] = "ee61a1dac6ad5cf711d114e06b90a6d431961a6e7ec59f4b757a7cd77b1c0fb4"
 
 inherit autotools pkgconfig gettext lib_package
 
@@ -57,4 +55,4 @@
 FILES:${PN} += "${nonarch_base_libdir}/udev/*"
 FILES:${PN}-dbg += "${libdir}/*/*/.debug"
 FILES:${PN}-dev += "${libdir}/*/*/*.la"
-FILES:${PN}-doc += "${datadir}/libgphoto2_port/0.12.0/vcamera/README.txt"
+FILES:${PN}-doc += "${datadir}/libgphoto2_port/0.12.?/vcamera/README.txt"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/gtkperf/gtkperf/0001-main.c-Define-prototype-for-setup_appdata.patch b/meta-openembedded/meta-oe/recipes-graphics/gtkperf/gtkperf/0001-main.c-Define-prototype-for-setup_appdata.patch
new file mode 100644
index 0000000..08e899b
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/gtkperf/gtkperf/0001-main.c-Define-prototype-for-setup_appdata.patch
@@ -0,0 +1,27 @@
+From 6b5f695cb206cd8b3cbcbcd7713e52772e89306b Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 20:47:32 -0700
+Subject: [PATCH] main.c: Define prototype for setup_appdata
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/main.c | 2 ++
+ 1 file changed, 2 insertions(+)
+
+diff --git a/src/main.c b/src/main.c
+index c226799..b4ce6d1 100644
+--- a/src/main.c
++++ b/src/main.c
+@@ -18,6 +18,8 @@
+ #include "callbacks.h"
+ #include "appdata.h"
+ 
++void setup_appdata(AppData * appdata_in);
++
+ int
+ main (int argc, char **argv)
+ {
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/gtkperf/gtkperf_0.40.bb b/meta-openembedded/meta-oe/recipes-graphics/gtkperf/gtkperf_0.40.bb
index 0c4f819..a70d31f 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/gtkperf/gtkperf_0.40.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/gtkperf/gtkperf_0.40.bb
@@ -8,6 +8,7 @@
            file://0001-Include-stdlib.h-for-exit-API.patch \
            file://0002-timing.c-Fix-format-security-errors.patch \
            file://gettext.patch \
+           file://0001-main.c-Define-prototype-for-setup_appdata.patch \
            "
 
 SRC_URI[md5sum] = "4331dde4bb83865e15482885fcb0cc53"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/libforms/files/0001-Make-extern-declarations-in-header-file.patch b/meta-openembedded/meta-oe/recipes-graphics/libforms/files/0001-Make-extern-declarations-in-header-file.patch
index 619bad6..16689f6 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/libforms/files/0001-Make-extern-declarations-in-header-file.patch
+++ b/meta-openembedded/meta-oe/recipes-graphics/libforms/files/0001-Make-extern-declarations-in-header-file.patch
@@ -10,8 +10,7 @@
 Signed-off-by: Khem Raj <raj.khem@gmail.com>
 ---
  fdesign/sp_spinner.c | 2 +-
- lib/flinternal.h     | 4 ++--
- 2 files changed, 3 insertions(+), 3 deletions(-)
+ 1 files changed, 1 insertions(+), 1 deletions(-)
 
 diff --git a/fdesign/sp_spinner.c b/fdesign/sp_spinner.c
 index 04f2c19..6206daa 100644
@@ -26,20 +25,6 @@
  
  
  /***************************************
-diff --git a/lib/flinternal.h b/lib/flinternal.h
-index 06d847e..577bd10 100644
---- a/lib/flinternal.h
-+++ b/lib/flinternal.h
-@@ -138,8 +138,8 @@ enum {
- 
- /* events.c or event related */
- 
--FL_OBJECT * fli_handled_obj;
--FL_OBJECT * fli_handled_parent;
-+extern FL_OBJECT * fli_handled_obj;
-+extern FL_OBJECT * fli_handled_parent;
- 
- void fli_obj_queue_delete( void );
  
 -- 
 2.28.0
diff --git a/meta-openembedded/meta-oe/recipes-graphics/libforms/files/0001-Modify-include-dir.patch b/meta-openembedded/meta-oe/recipes-graphics/libforms/files/0001-Modify-include-dir.patch
new file mode 100644
index 0000000..0b7c063
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/libforms/files/0001-Modify-include-dir.patch
@@ -0,0 +1,33 @@
+From d2d05928342f7f9f687ffea5e0b77f97f0ae5aad Mon Sep 17 00:00:00 2001
+From: Wang Mingyu <wangmy@fujitsu.com>
+Date: Wed, 7 Sep 2022 14:29:13 +0900
+Subject: [PATCH] Modify include dir
+
+error messagge:
+../../xforms-1.2.4/lib/fd/cmdbr.h:28:10: fatal error: ../include/forms.h: No such file or directory
+
+../include/forms.h specified in the header file cmdbr.h is not exist.
+
+Upstream-Status: Pending
+
+Signed-off-by: Wang Mingyu <wangmy@fujitsu.com>
+---
+ lib/fd/cmdbr.h | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/lib/fd/cmdbr.h b/lib/fd/cmdbr.h
+index 8495e05..2ddeced 100644
+--- a/lib/fd/cmdbr.h
++++ b/lib/fd/cmdbr.h
+@@ -25,7 +25,7 @@
+ #ifndef FD_cmd_h_
+ #define FD_cmd_h_
+ 
+-#include "../include/forms.h"
++#include "include/forms.h"
+ 
+ /* Callbacks, globals and object handlers */
+ 
+-- 
+2.25.1
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/libforms/libforms_1.2.3.bb b/meta-openembedded/meta-oe/recipes-graphics/libforms/libforms_1.2.4.bb
similarity index 83%
rename from meta-openembedded/meta-oe/recipes-graphics/libforms/libforms_1.2.3.bb
rename to meta-openembedded/meta-oe/recipes-graphics/libforms/libforms_1.2.4.bb
index 838c1d5..01db3c4 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/libforms/libforms_1.2.3.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/libforms/libforms_1.2.4.bb
@@ -1,6 +1,5 @@
 DESCRIPTION = "The XForms graphical interface widget library	"
 HOMEPAGE = "http://savannah.nongnu.org/projects/xforms/"
-PR = "r0"
 
 LICENSE = "LGPL-2.1-or-later"
 LIC_FILES_CHKSUM = "file://COPYING.LIB;md5=ade9a053df81f5b9408d2f4f5546df86"
@@ -10,10 +9,10 @@
            file://add-absolute-path-for-include-dir.patch \
            file://fix-path-fdesign_LDADD.patch \
            file://0001-Make-extern-declarations-in-header-file.patch \
+           file://0001-Modify-include-dir.patch \
           "
 
-SRC_URI[md5sum] = "235720a758a8b8d9e6e452dc67190e9b"
-SRC_URI[sha256sum] = "7989b39598c769820ad451ad91e5cb0de29946940c8240aac94ca8238c2def61"
+SRC_URI[sha256sum] = "78cc6b07071bbeaa1f906e0a22d5e9980e48f8913577bc082d661afe5cb75696"
 
 inherit autotools features_check
 
diff --git a/meta-openembedded/meta-oe/recipes-graphics/lvgl/dialog-lvgl_git.bb b/meta-openembedded/meta-oe/recipes-graphics/lvgl/dialog-lvgl_git.bb
new file mode 100644
index 0000000..7e24c51
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/lvgl/dialog-lvgl_git.bb
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: Huawei Inc.
+# SPDX-License-Identifier: MIT
+
+SRC_URI = "gitsm://git.ostc-eu.org/rzr/dialog-lvgl;destsuffix=${S};protocol=https;nobranch=1"
+SRCREV = "5d2121457a6988c97cacb0790594440693fc3d29"
+
+LICENSE = "MIT"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=8ce0a84e5276f01364119c873b712c4f"
+AUTHOR = "Philippe Coval <philippe.coval.ext@huawei.com>"
+
+DEPENDS += "lvgl"
+DEPENDS += "lv-drivers"
+
+SUMMARY = "Basic UI utility to be used in scripts"
+DESCRIPTION = "Inspired by ncurses' dialog, implemented using LVGL"
+HOMEPAGE = "https://git.ostc-eu.org/rzr/dialog-lvgl/-/wikis/"
+
+REQUIRED_DISTRO_FEATURES = "wayland"
+
+inherit pkgconfig
+inherit features_check
+
+EXTRA_OEMAKE += "sysroot=${RECIPE_SYSROOT}"
+EXTRA_OEMAKE += "DESTDIR=${D}"
+EXTRA_OEMAKE += "lvgl_driver=wayland"
+
+do_install() {
+    oe_runmake install
+}
diff --git a/meta-openembedded/meta-oe/recipes-graphics/lxdm/lxdm/0001-Initialize-msghdr-struct-in-a-portable-way.patch b/meta-openembedded/meta-oe/recipes-graphics/lxdm/lxdm/0001-Initialize-msghdr-struct-in-a-portable-way.patch
new file mode 100644
index 0000000..d477950
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/lxdm/lxdm/0001-Initialize-msghdr-struct-in-a-portable-way.patch
@@ -0,0 +1,40 @@
+From 7c370576b4fb7c7d3b6dbf33125136a4ae70a330 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 21:52:16 -0700
+Subject: [PATCH] Initialize msghdr struct in a portable way
+
+Initializing the structure assuming glibc layout results in
+compile errors on musl, therefore do partial intialization and then
+assigning the members individually.
+
+Upstream-Status: Submitted [https://sourceforge.net/p/lxdm/code/merge-requests/4/]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/lxcom.c       | 9 +++++++--
+ 2 files changed, 10 insertions(+), 5 deletions(-)
+
+diff --git a/src/lxcom.c b/src/lxcom.c
+index 397d1b5..517c775 100644
+--- a/src/lxcom.c
++++ b/src/lxcom.c
+@@ -117,10 +117,15 @@ static gboolean lxcom_dispatch (GSource *source,GSourceFunc callback,gpointer us
+ 	char ctrl[/*CMSG_SPACE(sizeof(LXDM_CRED))*/1024];
+ 	struct sockaddr_un peer;
+ 	struct iovec v={buf,sizeof(buf)};
+-	struct msghdr h={&peer,sizeof(peer),&v,1,ctrl,sizeof(ctrl),0};
++	struct msghdr h={0};
+ 	struct cmsghdr *cmptr;
+ 	int ret;
+-
++	h.msg_name = &peer;
++	h.msg_namelen = sizeof(peer);
++	h.msg_iov = &v;
++	h.msg_iovlen = 1;
++	h.msg_control = ctrl;
++	h.msg_controllen = sizeof(ctrl);
+ 	while(1)
+ 	{
+ 		peer.sun_family=0;
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/lxdm/lxdm_0.5.3.bb b/meta-openembedded/meta-oe/recipes-graphics/lxdm/lxdm_0.5.3.bb
index 83b15ce..adf9cb5 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/lxdm/lxdm_0.5.3.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/lxdm/lxdm_0.5.3.bb
@@ -19,6 +19,7 @@
            file://0008-greeter.c-show-information-on-gtk-label-info.patch \
            file://0009-greeter.c-disallow-empty-new-password.patch \
            file://0001-systemd-lxdm.service-remove-plymouth-quit-conflicts.patch \
+           file://0001-Initialize-msghdr-struct-in-a-portable-way.patch \
            "
 SRC_URI[md5sum] = "061caae432634e6db38bbdc84bc6ffa0"
 SRC_URI[sha256sum] = "4891efee81c72a400cc6703e40aa76f3f3853833d048b72ec805da0f93567f2f"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/openbox/files/0001-Fix-function-protype-visibility.patch b/meta-openembedded/meta-oe/recipes-graphics/openbox/files/0001-Fix-function-protype-visibility.patch
new file mode 100644
index 0000000..344c5d1
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/openbox/files/0001-Fix-function-protype-visibility.patch
@@ -0,0 +1,94 @@
+From 941d5ff3426e68cb9bcb4ae86066124cb2535b69 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 12:32:20 -0700
+Subject: [PATCH] Fix function protype visibility
+
+Include ctye.h for toupper
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/appearance.c | 1 +
+ src/desktops.c   | 2 +-
+ src/desktops.h   | 2 +-
+ src/main.c       | 1 +
+ src/moveresize.c | 2 +-
+ src/moveresize.h | 2 +-
+ 6 files changed, 6 insertions(+), 4 deletions(-)
+
+diff --git a/src/appearance.c b/src/appearance.c
+index 4fb3f0c..ee55661 100644
+--- a/src/appearance.c
++++ b/src/appearance.c
+@@ -20,6 +20,7 @@
+ #include "main.h"
+ #include "tree.h"
+ #include "preview_update.h"
++#include <ctype.h> /* for toupper */
+ 
+ static gboolean mapping = FALSE;
+ 
+diff --git a/src/desktops.c b/src/desktops.c
+index 8297f00..27ca514 100644
+--- a/src/desktops.c
++++ b/src/desktops.c
+@@ -38,7 +38,7 @@ static void on_desktop_names_cell_edited(GtkCellRendererText *cell,
+                                          gpointer data);
+ static void enable_stuff();
+ 
+-void desktops_setup_tab()
++void desktops_setup_tab(void)
+ {
+     GtkWidget *w;
+     GtkCellRenderer *render;
+diff --git a/src/desktops.h b/src/desktops.h
+index 1ba3e36..446bfbb 100644
+--- a/src/desktops.h
++++ b/src/desktops.h
+@@ -24,5 +24,5 @@
+ 
+ void desktops_setup_num(GtkWidget *w);
+ void desktops_setup_names(GtkWidget *w);
+-
++void desktops_setup_tab(void);
+ #endif
+diff --git a/src/main.c b/src/main.c
+index d7e3446..0176035 100644
+--- a/src/main.c
++++ b/src/main.c
+@@ -28,6 +28,7 @@
+ #include "dock.h"
+ #include "preview_update.h"
+ #include "gettext.h"
++#include "moveresize.h"
+ 
+ #include <gdk/gdkx.h>
+ #define SN_API_NOT_YET_FROZEN
+diff --git a/src/moveresize.c b/src/moveresize.c
+index c6fb3dd..bb52729 100644
+--- a/src/moveresize.c
++++ b/src/moveresize.c
+@@ -37,7 +37,7 @@ static gboolean mapping = FALSE;
+ static void enable_stuff();
+ static void write_fixed_position(const gchar *coord);
+ 
+-void moveresize_setup_tab()
++void moveresize_setup_tab(void)
+ {
+     GtkWidget *w, *w1, *w2, *w3;
+     GtkSizeGroup *group;
+diff --git a/src/moveresize.h b/src/moveresize.h
+index 82ecc96..8faf526 100644
+--- a/src/moveresize.h
++++ b/src/moveresize.h
+@@ -20,6 +20,6 @@
+ #ifndef obconf__moveresize_h
+ #define obconf__moveresize_h
+ 
+-void moveresize_setup_tab();
++void moveresize_setup_tab(void);
+ 
+ #endif
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/openbox/obconf_git.bb b/meta-openembedded/meta-oe/recipes-graphics/openbox/obconf_git.bb
index 7723471..053af5a 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/openbox/obconf_git.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/openbox/obconf_git.bb
@@ -14,6 +14,7 @@
 SRCREV = "63ec47c5e295ad4f09d1df6d92afb7e10c3fec39"
 SRC_URI = " \
     git://git.openbox.org/dana/obconf;branch=master \
+    file://0001-Fix-function-protype-visibility.patch \
 "
 
 S = "${WORKDIR}/git"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/pango/pangomm-2.48_2.50.0.bb b/meta-openembedded/meta-oe/recipes-graphics/pango/pangomm-2.48_2.50.1.bb
similarity index 85%
rename from meta-openembedded/meta-oe/recipes-graphics/pango/pangomm-2.48_2.50.0.bb
rename to meta-openembedded/meta-oe/recipes-graphics/pango/pangomm-2.48_2.50.1.bb
index 84aedba..b4079f7 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/pango/pangomm-2.48_2.50.0.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/pango/pangomm-2.48_2.50.1.bb
@@ -10,7 +10,7 @@
 GNOMEBN = "pangomm"
 inherit gnomebase features_check
 
-SRC_URI[archive.sha256sum] = "a27aa77e017b9afce9e751d85bd1cf890abbb3a58bf59d0fac917eef82db3b5b"
+SRC_URI[archive.sha256sum] = "ccc9923413e408c2bff637df663248327d72822f11e394b423e1c5652b7d9214"
 REQUIRED_DISTRO_FEATURES = "x11"
 
 S = "${WORKDIR}/${GNOMEBN}-${PV}"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Include-limits-header-for-numeric_limits.patch b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Include-limits-header-for-numeric_limits.patch
deleted file mode 100644
index 746497d..0000000
--- a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Include-limits-header-for-numeric_limits.patch
+++ /dev/null
@@ -1,88 +0,0 @@
-From aec0be857ed3faef8802c7fd61f3d7798a565108 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Thu, 29 Apr 2021 16:03:42 -0700
-Subject: [PATCH] Include <limits> header for numeric_limits
-
-Fixes
-vulkancts/framework/vulkan/vkRayTracingUtil.hpp:116:32: error: 'numeric_limits' is not a member of 'std'
-
-Upstream-Status: Pending
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- framework/vulkan/vkRayTracingUtil.hpp                           | 1 +
- modules/vulkan/api/vktApiBufferTests.cpp                        | 1 +
- modules/vulkan/spirv_assembly/vktSpvAsmNonSemanticInfoTests.cpp | 2 ++
- 3 files changed, 4 insertions(+)
-
---- a/framework/vulkan/vkRayTracingUtil.hpp
-+++ b/framework/vulkan/vkRayTracingUtil.hpp
-@@ -33,6 +33,7 @@
- #include "tcuVector.hpp"
- #include "tcuVectorType.hpp"
- 
-+#include <limits>
- #include <vector>
- 
- namespace vk
---- a/modules/vulkan/api/vktApiBufferTests.cpp
-+++ b/modules/vulkan/api/vktApiBufferTests.cpp
-@@ -35,6 +35,7 @@
- #include "tcuPlatform.hpp"
- 
- #include <algorithm>
-+#include <limits>
- 
- namespace vkt
- {
---- a/modules/vulkan/spirv_assembly/vktSpvAsmNonSemanticInfoTests.cpp
-+++ b/modules/vulkan/spirv_assembly/vktSpvAsmNonSemanticInfoTests.cpp
-@@ -28,6 +28,8 @@
- #include "vktSpvAsmComputeShaderCase.hpp"
- #include "vktSpvAsmGraphicsShaderTestUtil.hpp"
- 
-+#include <limits>
-+
- namespace vkt
- {
- namespace SpirVAssembly
---- a/modules/vulkan/memory/vktMemoryDeviceMemoryReportTests.cpp
-+++ b/modules/vulkan/memory/vktMemoryDeviceMemoryReportTests.cpp
-@@ -40,6 +40,7 @@
- 
- #include "deSharedPtr.hpp"
- 
-+#include <limits>
- #include <set>
- #include <vector>
- 
---- a/modules/vulkan/draw/vktDrawDepthClampTests.cpp
-+++ b/modules/vulkan/draw/vktDrawDepthClampTests.cpp
-@@ -38,6 +38,7 @@
- #include "vkQueryUtil.hpp"
- #include "tcuTextureUtil.hpp"
- 
-+#include <limits>
- #include <cmath>
- 
- namespace vkt
---- a/modules/vulkan/wsi/vktWsiFullScreenExclusiveTests.cpp
-+++ b/modules/vulkan/wsi/vktWsiFullScreenExclusiveTests.cpp
-@@ -39,6 +39,8 @@
- #include "tcuPlatform.hpp"
- #include "tcuCommandLine.hpp"
- 
-+#include <limits>
-+
- #if ( DE_OS == DE_OS_WIN32 )
- 	#define NOMINMAX
- 	#define WIN32_LEAN_AND_MEAN
---- a/modules/vulkan/synchronization/vktSynchronizationUtil.cpp
-+++ b/modules/vulkan/synchronization/vktSynchronizationUtil.cpp
-@@ -21,6 +21,7 @@
-  * \brief Synchronization tests utilities
-  *//*--------------------------------------------------------------------*/
- 
-+#include <limits>
- #include "vktSynchronizationUtil.hpp"
- #include "vkTypeUtil.hpp"
- #include "vkCmdUtil.hpp"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Remove-dead-variable-984.patch b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Remove-dead-variable-984.patch
new file mode 100644
index 0000000..c29d80a
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Remove-dead-variable-984.patch
@@ -0,0 +1,34 @@
+From 0d863b444c9a54a92dab176b1b656c116923e1ca Mon Sep 17 00:00:00 2001
+From: alan-baker <alanbaker@google.com>
+Date: Wed, 30 Mar 2022 12:59:28 -0400
+Subject: [PATCH] Remove dead variable (#984)
+
+Upstream-Status: Backport [https://github.com/google/amber/commit/627ee453d6047ced0e2dd13cde983b341d0615e3]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/vkscript/command_parser.cc | 2 --
+ 1 file changed, 2 deletions(-)
+
+diff --git a/src/vkscript/command_parser.cc b/src/vkscript/command_parser.cc
+index 610c7e6..3429d4d 100644
+--- a/src/vkscript/command_parser.cc
++++ b/src/vkscript/command_parser.cc
+@@ -486,7 +486,6 @@ Result CommandParser::ParseValues(const std::string& name,
+                                   std::vector<Value>* values) {
+   assert(values);
+ 
+-  uint32_t row_index = 0;
+   auto token = tokenizer_->NextToken();
+   size_t seen = 0;
+   while (!token->IsEOL() && !token->IsEOS()) {
+@@ -515,7 +514,6 @@ Result CommandParser::ParseValues(const std::string& name,
+     values->push_back(v);
+     token = tokenizer_->NextToken();
+ 
+-    ++row_index;
+     ++seen;
+   }
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Workaround-for-GCC-11-uninit-variable-warnings-946.patch b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Workaround-for-GCC-11-uninit-variable-warnings-946.patch
deleted file mode 100644
index 6c87cad..0000000
--- a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-Workaround-for-GCC-11-uninit-variable-warnings-946.patch
+++ /dev/null
@@ -1,73 +0,0 @@
-From 9cd614dd5481a4fdf552effac4820f51a10092c7 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Mika=20V=C3=A4in=C3=B6l=C3=A4?=
- <33728696+mvainola@users.noreply.github.com>
-Date: Wed, 7 Apr 2021 13:12:17 +0300
-Subject: [PATCH] Workaround for GCC 11 uninit variable warnings (#946)
-
-Building Amber with GCC 11.0.1 produces some uninitialized variable
-warnings. This commit works around them by replacing
-reinterpret_cast with memcpy when type punning unsigned integers to
-floats.
-
-Upstream-Status: Backport [https://github.com/google/amber/commit/aa69a0ac23ea7f68dd32bbef210546a5d84c1734]
----
- src/float16_helper.cc | 22 ++++++++++++++++------
- 1 file changed, 16 insertions(+), 6 deletions(-)
-
-diff --git a/src/float16_helper.cc b/src/float16_helper.cc
-index 617bd72..5cb35e7 100644
---- a/src/float16_helper.cc
-+++ b/src/float16_helper.cc
-@@ -15,6 +15,7 @@
- #include "src/float16_helper.h"
- 
- #include <cassert>
-+#include <cstring>
- 
- // Float10
- // | 9 8 7 6 5 | 4 3 2 1 0 |
-@@ -75,8 +76,11 @@ float HexFloat16ToFloat(const uint8_t* value) {
-   }
- 
-   uint32_t hex = sign | exponent | mantissa;
--  float* hex_float = reinterpret_cast<float*>(&hex);
--  return *hex_float;
-+  float hex_float;
-+  static_assert((sizeof(uint32_t) == sizeof(float)),
-+                "sizeof(uint32_t) != sizeof(float)");
-+  memcpy(&hex_float, &hex, sizeof(float));
-+  return hex_float;
- }
- 
- // Convert float |value| whose size is 11 bits to 32 bits float
-@@ -89,8 +93,11 @@ float HexFloat11ToFloat(const uint8_t* value) {
-   uint32_t mantissa = (static_cast<uint32_t>(value[0]) & 0x3f) << 17U;
- 
-   uint32_t hex = exponent | mantissa;
--  float* hex_float = reinterpret_cast<float*>(&hex);
--  return *hex_float;
-+  float hex_float;
-+  static_assert((sizeof(uint32_t) == sizeof(float)),
-+                "sizeof(uint32_t) != sizeof(float)");
-+  memcpy(&hex_float, &hex, sizeof(float));
-+  return hex_float;
- }
- 
- // Convert float |value| whose size is 10 bits to 32 bits float
-@@ -103,8 +110,11 @@ float HexFloat10ToFloat(const uint8_t* value) {
-   uint32_t mantissa = (static_cast<uint32_t>(value[0]) & 0x1f) << 18U;
- 
-   uint32_t hex = exponent | mantissa;
--  float* hex_float = reinterpret_cast<float*>(&hex);
--  return *hex_float;
-+  float hex_float;
-+  static_assert((sizeof(uint32_t) == sizeof(float)),
-+                "sizeof(uint32_t) != sizeof(float)");
-+  memcpy(&hex_float, &hex, sizeof(float));
-+  return hex_float;
- }
- 
- }  // namespace
--- 
-2.31.1
-
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-cmake-Define-WAYLAND_SCANNER-and-WAYLAND_PROTOCOLS_D.patch b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-cmake-Define-WAYLAND_SCANNER-and-WAYLAND_PROTOCOLS_D.patch
new file mode 100644
index 0000000..d7000b7
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-cmake-Define-WAYLAND_SCANNER-and-WAYLAND_PROTOCOLS_D.patch
@@ -0,0 +1,37 @@
+From ec12bb7bda60cdf2c848e13df67452a7d30a42be Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Sat, 3 Sep 2022 18:52:42 -0700
+Subject: [PATCH] cmake: Define WAYLAND_SCANNER and WAYLAND_PROTOCOLS_DIR if
+ not already defined
+
+This helps with cross compiling and providing these knobs from cmake
+cmdline
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ framework/platform/CMakeLists.txt | 8 ++++++--
+ 1 file changed, 6 insertions(+), 2 deletions(-)
+
+diff --git a/framework/platform/CMakeLists.txt b/framework/platform/CMakeLists.txt
+index c3ac463e4..030163f6c 100644
+--- a/framework/platform/CMakeLists.txt
++++ b/framework/platform/CMakeLists.txt
+@@ -73,8 +73,12 @@ if (NOT DEFINED TCUTIL_PLATFORM_SRCS)
+ 			add_definitions(-DDEQP_SUPPORT_WAYLAND=1)
+ 			include_directories(lnx/wayland)
+ 
+-			pkg_get_variable(WAYLAND_PROTOCOLS_DIR wayland-protocols pkgdatadir)
+-			pkg_get_variable(WAYLAND_SCANNER wayland-scanner wayland_scanner)
++			if (NOT DEFINED WAYLAND_PROTOCOLS_DIR)
++				pkg_get_variable(WAYLAND_PROTOCOLS_DIR wayland-protocols pkgdatadir)
++			endif()
++			if (NOT DEFINED WAYLAND_SCANNER)
++				pkg_get_variable(WAYLAND_SCANNER wayland-scanner wayland_scanner)
++			endif()
+ 
+ 			set(DEQP_XDG_SHELL_PROTOCOL ${WAYLAND_PROTOCOLS_DIR}/stable/xdg-shell/xdg-shell.xml)
+ 			set(DEQP_XDG_SHELL_GEN_OUTPUTS_DIR ${PROJECT_BINARY_DIR}/framework/platform)
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-use-library-sonames-for-linking.patch b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-use-library-sonames-for-linking.patch
index cb396c3..b5a1d33 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-use-library-sonames-for-linking.patch
+++ b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/files/0001-use-library-sonames-for-linking.patch
@@ -19,8 +19,6 @@
  framework/platform/surfaceless/tcuSurfacelessPlatform.cpp   | 6 +++---
  6 files changed, 9 insertions(+), 9 deletions(-)
 
-diff --git a/framework/egl/egluGLContextFactory.cpp b/framework/egl/egluGLContextFactory.cpp
-index 8fbea2af1..8d42f19eb 100644
 --- a/framework/egl/egluGLContextFactory.cpp
 +++ b/framework/egl/egluGLContextFactory.cpp
 @@ -63,7 +63,7 @@ using std::vector;
@@ -41,8 +39,6 @@
  #	endif
  #endif
  
-diff --git a/framework/egl/wrapper/eglwLibrary.cpp b/framework/egl/wrapper/eglwLibrary.cpp
-index d7e07fe18..ebdf68b14 100644
 --- a/framework/egl/wrapper/eglwLibrary.cpp
 +++ b/framework/egl/wrapper/eglwLibrary.cpp
 @@ -148,7 +148,7 @@ DefaultLibrary::~DefaultLibrary (void)
@@ -54,11 +50,9 @@
  #elif (DE_OS == DE_OS_WIN32)
  	return "libEGL.dll";
  #else
-diff --git a/framework/platform/android/tcuAndroidPlatform.cpp b/framework/platform/android/tcuAndroidPlatform.cpp
-index b9a4c716f..05cec0b49 100644
 --- a/framework/platform/android/tcuAndroidPlatform.cpp
 +++ b/framework/platform/android/tcuAndroidPlatform.cpp
-@@ -57,7 +57,7 @@ static const eglu::NativeWindow::Capability		WINDOW_CAPABILITIES		= (eglu::Nativ
+@@ -57,7 +57,7 @@ static const eglu::NativeWindow::Capabil
  class NativeDisplay : public eglu::NativeDisplay
  {
  public:
@@ -67,11 +61,9 @@
  	virtual							~NativeDisplay			(void) {}
  
  	virtual EGLNativeDisplayType	getLegacyNative			(void)			{ return EGL_DEFAULT_DISPLAY;	}
-diff --git a/framework/platform/lnx/X11/tcuLnxX11EglDisplayFactory.cpp b/framework/platform/lnx/X11/tcuLnxX11EglDisplayFactory.cpp
-index 009c05e18..237c5e16f 100644
 --- a/framework/platform/lnx/X11/tcuLnxX11EglDisplayFactory.cpp
 +++ b/framework/platform/lnx/X11/tcuLnxX11EglDisplayFactory.cpp
-@@ -75,7 +75,7 @@ class Library : public eglw::DefaultLibrary
+@@ -75,7 +75,7 @@ class Library : public eglw::DefaultLibr
  {
  public:
  	Library (void)
@@ -80,8 +72,6 @@
  	{
  	}
  
-diff --git a/framework/platform/lnx/wayland/tcuLnxWaylandEglDisplayFactory.cpp b/framework/platform/lnx/wayland/tcuLnxWaylandEglDisplayFactory.cpp
-index 97bc3a0ed..3a20d63d3 100644
 --- a/framework/platform/lnx/wayland/tcuLnxWaylandEglDisplayFactory.cpp
 +++ b/framework/platform/lnx/wayland/tcuLnxWaylandEglDisplayFactory.cpp
 @@ -66,7 +66,7 @@ public:
@@ -93,8 +83,6 @@
  
  	~Display(void) {}
  	wayland::Display&			getWaylandDisplay	(void)	{ return *m_display; }
-diff --git a/framework/platform/surfaceless/tcuSurfacelessPlatform.cpp b/framework/platform/surfaceless/tcuSurfacelessPlatform.cpp
-index 9783eaeab..a1d8ac667 100644
 --- a/framework/platform/surfaceless/tcuSurfacelessPlatform.cpp
 +++ b/framework/platform/surfaceless/tcuSurfacelessPlatform.cpp
 @@ -69,7 +69,7 @@ using std::vector;
@@ -114,8 +102,8 @@
 +#	define DEQP_OPENGL_LIBRARY_PATH "libGL.so.1"
  #endif
  
- namespace tcu
-@@ -238,7 +238,7 @@ glu::RenderContext* ContextFactory::createContext(const glu::RenderConfig& confi
+ #if !defined(DEQP_VULKAN_LIBRARY_PATH)
+@@ -234,7 +234,7 @@ glu::RenderContext* ContextFactory::crea
  }
  
  EglRenderContext::EglRenderContext(const glu::RenderConfig& config, const tcu::CommandLine& cmdLine)
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/khronos-cts.inc b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/khronos-cts.inc
index 7d73bfb..f907455 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/khronos-cts.inc
+++ b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/khronos-cts.inc
@@ -7,23 +7,24 @@
 	git://github.com/KhronosGroup/glslang.git;protocol=https;destsuffix=git/external/glslang/src;name=glslang;branch=master \
 	git://github.com/KhronosGroup/SPIRV-Headers.git;protocol=https;destsuffix=git/external/spirv-headers/src;name=spirv-headers;branch=master \
 	git://github.com/KhronosGroup/SPIRV-Tools.git;protocol=https;destsuffix=git/external/spirv-tools/src;name=spirv-tools;branch=master \
-	https://raw.githubusercontent.com/baldurk/renderdoc/v1.1/renderdoc/api/app/renderdoc_app.h;subdir=git/external/renderdoc/src;name=renderdoc \
+        git://github.com/open-source-parsers/jsoncpp.git;protocol=https;destsuffix=git/external/jsoncpp/src;name=jsoncpp;branch=master \
+        git://github.com/KhronosGroup/Vulkan-Docs.git;protocol=https;destsuffix=git/external/vulkan-docs/src;name=vulkan-docs;branch=main \
+        https://raw.githubusercontent.com/baldurk/renderdoc/fcdea67879fa1991e56cf7734ce0ce27866b665f/renderdoc/api/app/renderdoc_app.h;subdir=git/external/renderdoc/src;name=renderdoc \
 "
 
 S = "${WORKDIR}/git"
 
-SRCREV_FORMAT = "vk-gl-cts_amber_glslang_spirv-headers_spirv-tools"
+SRCREV_FORMAT = "vk-gl-cts_amber_glslang_spirv-headers_spirv-tools_jsoncpp_vulkan-docs"
 
 S = "${WORKDIR}/git"
 
-inherit pkgconfig cmake features_check
+inherit pkgconfig cmake features_check python3native qemu
 
 ANY_OF_DISTRO_FEATURES += "opengl wayland"
 
-DEPENDS += "libpng zlib virtual/libgles2"
+DEPENDS += "python3-lxml-native libpng zlib virtual/libgles2 qemu-native"
 
-SRC_URI += "file://0001-Workaround-for-GCC-11-uninit-variable-warnings-946.patch;patchdir=external/amber/src \
-            file://0001-Include-limits-header-for-numeric_limits.patch;patchdir=external/vulkancts \
+SRC_URI += " \
             file://0001-vulkancts.patch \
             file://0001-use-library-sonames-for-linking.patch \
 "
@@ -37,6 +38,8 @@
 	file://fix-clang-private-operator.patch \
 "
 
+EXTRA_OECMAKE:prepend:class-target = "-DCMAKE_CROSSCOMPILING_EMULATOR=${WORKDIR}/qemuwrapper -DWAYLAND_SCANNER=${STAGING_BINDIR_NATIVE}/wayland-scanner -DWAYLAND_PROTOCOLS_DIR=${STAGING_DATADIR}/wayland-protocols"
+
 # The best thing for the user to do is to not specify any of the following
 # PACKAGECONFIGs (i.e. leave it blank) which tells the project to do its own
 # probing and build what it thinks is appropriate.
@@ -44,11 +47,22 @@
 # to override this behaviour.
 PACKAGECONFIG ??= ""
 PACKAGECONFIG[surfaceless] = "-DDEQP_TARGET=surfaceless,,,,,wayland x11_egl x11_glx x11_egl_glx"
-PACKAGECONFIG[wayland] = "-DDEQP_TARGET=wayland,,wayland,,,surfaceless x11_egl x11_glx x11_egl_glx"
+PACKAGECONFIG[wayland] = "-DDEQP_TARGET=wayland,,wayland-native wayland wayland-protocols,,,surfaceless x11_egl x11_glx x11_egl_glx"
 PACKAGECONFIG[x11_egl] = "-DDEQP_TARGET=x11_egl,,virtual/libx11 virtual/egl,,,surfaceless wayland x11_glx x11_egl_glx"
 PACKAGECONFIG[x11_glx] = "-DDEQP_TARGET=x11_glx,,virtual/libx11,,,surfaceless wayland x11_egl x11_egl_glx"
 PACKAGECONFIG[x11_egl_glx] = "-DDEQP_TARGET=x11_glx,,virtual/libx11 virtual/egl,,,surfaceless wayland x11_egl x11_glx"
 
+do_configure:append() {
+        # Write out a qemu wrapper that will be used by cmake
+        # so that it can run target helper binaries through that.
+        qemu_binary="${@qemu_wrapper_cmdline(d, d.getVar('STAGING_DIR_HOST'), [d.expand('${STAGING_DIR_HOST}${libdir}'),d.expand('${STAGING_DIR_HOST}${base_libdir}')])}"
+        cat > ${WORKDIR}/qemuwrapper << EOF
+#!/bin/sh
+$qemu_binary "\$@"
+EOF
+        chmod +x ${WORKDIR}/qemuwrapper
+}
+
 python __anonymous() {
     # if the user doesn't specify any PACKAGECONFIG then the cts build system
     # is going to probe the sysroot to try to figure out what to build
@@ -57,7 +71,7 @@
     distrofeatures = (d.getVar("DISTRO_FEATURES") or "")
     if not bb.utils.contains_any("PACKAGECONFIG", ["surfaceless", "wayland", "x11_egl", "x11_glx", "x11_egl_glx"], True, False, d):
         if "wayland" in distrofeatures:
-            d.appendVar("DEPENDS", " wayland ")
+            d.appendVar("DEPENDS", " wayland-native wayland wayland-protocols")
         if "x11" in distrofeatures:
             d.appendVar("DEPENDS", " virtual/libx11 virtual/egl ")
 }
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/opengl-es-cts_3.2.7.0.bb b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/opengl-es-cts_3.2.7.0.bb
deleted file mode 100644
index 1c1371b..0000000
--- a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/opengl-es-cts_3.2.7.0.bb
+++ /dev/null
@@ -1,31 +0,0 @@
-DESCRIPTION = "OpenGL CTS"
-
-require khronos-cts.inc
-# opengl-es-cts-3.2.7.0
-SRCREV_vk-gl-cts = "7cba7113c40f2ff03573c8c2c90661b2249e04fa"
-SRCREV_amber = "4d0115cccfcb3b73d20b6513b1c40748e6403c50"
-SRCREV_glslang = "ffccefddfd9a02ec0c0b6dd04ef5e1042279c97f"
-SRCREV_spirv-headers = "104ecc356c1bea4476320faca64440cd1df655a3"
-SRCREV_spirv-tools = "cd590fa3341284cd6d1ee82366155786cfd44c96"
-SRC_URI[renderdoc.sha256sum] = "e7b5f0aa5b1b0eadc63a1c624c0ca7f5af133aa857d6a4271b0ef3d0bdb6868e"
-
-S = "${WORKDIR}/git"
-
-do_install() {
-	install -d ${D}/${CTSDIR}
-	cp -r ${B}/external/openglcts/modules/* ${D}/${CTSDIR}
-
-	install -m 0755 ${B}/modules/egl/deqp-egl ${D}/${CTSDIR}
-	install -m 0755 ${B}/modules/gles2/deqp-gles2 ${D}/${CTSDIR}
-	install -m 0755 ${B}/modules/gles3/deqp-gles3 ${D}/${CTSDIR}
-	install -m 0755 ${B}/modules/gles31/deqp-gles31 ${D}/${CTSDIR}
-	install -m 0755 ${B}/modules/internal/de-internal-tests ${D}/${CTSDIR}
-
-	rm -r ${D}/${CTSDIR}/*.a ${D}/${CTSDIR}/cmake_install.cmake ${D}/${CTSDIR}/CMakeFiles
-	rm -r ${D}/${CTSDIR}/*/*.a ${D}/${CTSDIR}/*/cmake_install.cmake ${D}/${CTSDIR}/*/CMakeFiles
-	rm -r ${D}/${CTSDIR}/common/subgroups/*.a ${D}/${CTSDIR}/common/subgroups/cmake_install.cmake ${D}/${CTSDIR}/common/subgroups/CMakeFiles
-}
-
-SECURITY_CFLAGS:riscv64 = "${SECURITY_NOPIE_CFLAGS}"
-LTO = ""
-
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/opengl-es-cts_3.2.8.0.bb b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/opengl-es-cts_3.2.8.0.bb
new file mode 100644
index 0000000..024a171
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/opengl-es-cts_3.2.8.0.bb
@@ -0,0 +1,36 @@
+DESCRIPTION = "OpenGL CTS"
+
+require khronos-cts.inc
+# opengl-es-cts-3.2.8.0
+SRCREV_vk-gl-cts = "317f467fac032b88a4789e4345421f92c4e8716d"
+SRCREV_amber = "209d92e2c27a333e723d24497e8c7a07b2f2eb39"
+SRCREV_glslang = "9158061398a96033c990e69156bd28c67114544b"
+SRCREV_spirv-headers = "449bc986ba6f4c5e10e32828783f9daef2a77644"
+SRCREV_spirv-tools = "ee30773650eca50b1cd3c913babcc2b50d7b91fd"
+# Not yet needed
+SRCREV_jsoncpp = "9059f5cad030ba11d37818847443a53918c327b1"
+SRCREV_vulkan-docs = "d70e01c0be7b8a7d20b186b30b29a75b18bba75d"
+SRC_URI[renderdoc.sha256sum] = "e7b5f0aa5b1b0eadc63a1c624c0ca7f5af133aa857d6a4271b0ef3d0bdb6868e"
+
+SRC_URI += "file://0001-Remove-dead-variable-984.patch;patchdir=external/amber/src"
+
+S = "${WORKDIR}/git"
+
+do_install() {
+	install -d ${D}/${CTSDIR}
+	cp -r ${B}/external/openglcts/modules/* ${D}/${CTSDIR}
+
+	install -m 0755 ${B}/modules/egl/deqp-egl ${D}/${CTSDIR}
+	install -m 0755 ${B}/modules/gles2/deqp-gles2 ${D}/${CTSDIR}
+	install -m 0755 ${B}/modules/gles3/deqp-gles3 ${D}/${CTSDIR}
+	install -m 0755 ${B}/modules/gles31/deqp-gles31 ${D}/${CTSDIR}
+	install -m 0755 ${B}/modules/internal/de-internal-tests ${D}/${CTSDIR}
+
+	rm -rf ${D}/${CTSDIR}/*.a ${D}/${CTSDIR}/cmake_install.cmake ${D}/${CTSDIR}/CMakeFiles
+	rm -rf ${D}/${CTSDIR}/*/*.a ${D}/${CTSDIR}/*/cmake_install.cmake ${D}/${CTSDIR}/*/CMakeFiles
+	rm -rf ${D}/${CTSDIR}/common/subgroups/*.a ${D}/${CTSDIR}/common/subgroups/cmake_install.cmake ${D}/${CTSDIR}/common/subgroups/CMakeFiles
+}
+
+SECURITY_CFLAGS:riscv64 = "${SECURITY_NOPIE_CFLAGS}"
+LTO = ""
+
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/vulkan-cts_1.2.6.0.bb b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/vulkan-cts_1.2.6.0.bb
deleted file mode 100644
index f816c1b..0000000
--- a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/vulkan-cts_1.2.6.0.bb
+++ /dev/null
@@ -1,24 +0,0 @@
-DESCRIPTION = "Vulkan CTS"
-
-require khronos-cts.inc
-# vulkan-cts-1.2.6.0
-SRCREV_vk-gl-cts = "2cab49df5ad25a2d0061152367a21c6da83ed097"
-SRCREV_amber = "dabae26164714abf951c6815a2b4513260f7c6a4"
-SRCREV_glslang = "5c4f421121c4d24aad23a507e630dc5dc6c92c7c"
-SRCREV_spirv-headers = "faa570afbc91ac73d594d787486bcf8f2df1ace0"
-SRCREV_spirv-tools = "f11f7434815838bbad349124767b258ce7df41f0"
-SRC_URI[renderdoc.sha256sum] = "e7b5f0aa5b1b0eadc63a1c624c0ca7f5af133aa857d6a4271b0ef3d0bdb6868e"
-
-S = "${WORKDIR}/git"
-
-REQUIRED_DISTRO_FEATURES = "vulkan"
-inherit features_check
-
-DEPENDS += " vulkan-loader"
-
-do_install() {
-	install -d ${D}/${CTSDIR}
-	cp -r ${B}/external/vulkancts/modules/vulkan/* ${D}/${CTSDIR}/
-	rm -r ${D}/${CTSDIR}/*.a ${D}/${CTSDIR}/cmake_install.cmake ${D}/${CTSDIR}/CMakeFiles
-	rm -r ${D}/${CTSDIR}/*/*.a ${D}/${CTSDIR}/*/cmake_install.cmake ${D}/${CTSDIR}/*/CMakeFiles
-}
diff --git a/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/vulkan-cts_1.3.3.1.bb b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/vulkan-cts_1.3.3.1.bb
new file mode 100644
index 0000000..c996eb1
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-graphics/vk-gl-cts/vulkan-cts_1.3.3.1.bb
@@ -0,0 +1,28 @@
+DESCRIPTION = "Vulkan CTS"
+
+require khronos-cts.inc
+# vulkan-cts-1.3.3.1
+SRCREV_vk-gl-cts = "6024a88390942876147a88dce82bbed73b866c1b"
+SRCREV_amber = "8b145a6c89dcdb4ec28173339dd176fb7b6f43ed"
+SRCREV_glslang = "7dda6a6347b0bd550e202942adee475956ef462a"
+SRCREV_spirv-headers = "b765c355f488837ca4c77980ba69484f3ff277f5"
+SRCREV_spirv-tools = "b930e734ea198b7aabbbf04ee1562cf6f57962f0"
+SRCREV_jsoncpp = "9059f5cad030ba11d37818847443a53918c327b1"
+SRCREV_vulkan-docs = "9b5562187a8ad72c171410b036ceedbc450153ba"
+SRC_URI[renderdoc.sha256sum] = "e7b5f0aa5b1b0eadc63a1c624c0ca7f5af133aa857d6a4271b0ef3d0bdb6868e"
+
+SRC_URI += "file://0001-cmake-Define-WAYLAND_SCANNER-and-WAYLAND_PROTOCOLS_D.patch"
+
+S = "${WORKDIR}/git"
+
+REQUIRED_DISTRO_FEATURES = "vulkan"
+inherit features_check
+
+DEPENDS += " vulkan-loader"
+
+do_install() {
+	install -d ${D}/${CTSDIR}
+	cp -r ${B}/external/vulkancts/modules/vulkan/* ${D}/${CTSDIR}/
+	rm -rf ${D}/${CTSDIR}/*.a ${D}/${CTSDIR}/cmake_install.cmake ${D}/${CTSDIR}/CMakeFiles
+	rm -rf ${D}/${CTSDIR}/*/*.a ${D}/${CTSDIR}/*/cmake_install.cmake ${D}/${CTSDIR}/*/CMakeFiles
+}
diff --git a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/iceauth_1.0.8.bb b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/iceauth_1.0.9.bb
similarity index 69%
rename from meta-openembedded/meta-oe/recipes-graphics/xorg-app/iceauth_1.0.8.bb
rename to meta-openembedded/meta-oe/recipes-graphics/xorg-app/iceauth_1.0.9.bb
index 62296b5..ea0ef0a 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/iceauth_1.0.8.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/iceauth_1.0.9.bb
@@ -11,6 +11,6 @@
 
 DEPENDS += "libice"
 
-SRC_URI[md5sum] = "3b9b79fa0f9928161f4bad94273de7ae"
-SRC_URI[sha256sum] = "e6ee213a217265cc76050e4293ea70b98c32dce6505c6421227efbda62ab60c6"
+SRC_URI_EXT = "xz"
+SRC_URI[sha256sum] = "2cb9dfcb545683af77fb1029bea3fc52dcc8a0666f7b8b2d7373b6ed4c408c05"
 
diff --git a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/setxkbmap_1.3.2.bb b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/setxkbmap_1.3.3.bb
similarity index 75%
rename from meta-openembedded/meta-oe/recipes-graphics/xorg-app/setxkbmap_1.3.2.bb
rename to meta-openembedded/meta-oe/recipes-graphics/xorg-app/setxkbmap_1.3.3.bb
index 30fbd1b..18fa63e 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/setxkbmap_1.3.2.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/setxkbmap_1.3.3.bb
@@ -11,9 +11,9 @@
 
 PE = "1"
 
-DEPENDS += "libxkbfile"
+DEPENDS += "libxkbfile xrandr"
 
 BBCLASSEXTEND = "native"
 
-SRC_URI[md5sum] = "93e736c98fb75856ee8227a0c49a128d"
-SRC_URI[sha256sum] = "8ff27486442725e50b02d7049152f51d125ecad71b7ce503cfa09d5d8ceeb9f5"
+SRC_URI_EXT = "xz"
+SRC_URI[sha256sum] = "b560c678da6930a0da267304fa3a41cc5df39a96a5e23d06f14984c87b6f587b"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xclock_1.0.9.bb b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xclock_1.1.1.bb
similarity index 63%
rename from meta-openembedded/meta-oe/recipes-graphics/xorg-app/xclock_1.0.9.bb
rename to meta-openembedded/meta-oe/recipes-graphics/xorg-app/xclock_1.1.1.bb
index 31c553e..a7360b6 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xclock_1.0.9.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xclock_1.1.1.bb
@@ -6,5 +6,5 @@
 
 DEPENDS += " libxaw libxrender libxft libxkbfile libxt"
 
-SRC_URI[md5sum] = "437522a96f424f68fc64ed34ece9b211"
-SRC_URI[sha256sum] = "cf461fb2c6f2ac42c54d8429ee2010fdb9a1442a370adfbfe8a7bfaf33c123bb"
+SRC_URI_EXT = "xz"
+SRC_URI[sha256sum] = "df7ceabf8f07044a2fde4924d794554996811640a45de40cb12c2cf1f90f742c"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xkbutils_1.0.4.bb b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xkbutils_1.0.5.bb
similarity index 79%
rename from meta-openembedded/meta-oe/recipes-graphics/xorg-app/xkbutils_1.0.4.bb
rename to meta-openembedded/meta-oe/recipes-graphics/xorg-app/xkbutils_1.0.5.bb
index 6a05e98..a672ba5 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xkbutils_1.0.4.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xkbutils_1.0.5.bb
@@ -15,5 +15,5 @@
 
 BBCLASSEXTEND = "native"
 
-SRC_URI[md5sum] = "502b14843f610af977dffc6cbf2102d5"
-SRC_URI[sha256sum] = "d2a18ab90275e8bca028773c44264d2266dab70853db4321bdbc18da75148130"
+SRC_URI_EXT = "xz"
+SRC_URI[sha256sum] = "f6a4a8e9c54582beb3787b1faa8168caab125c1fee0ca9cfa5b6c9c1df25eea4"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xlsfonts_1.0.6.bb b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xlsfonts_1.0.7.bb
similarity index 65%
rename from meta-openembedded/meta-oe/recipes-graphics/xorg-app/xlsfonts_1.0.6.bb
rename to meta-openembedded/meta-oe/recipes-graphics/xorg-app/xlsfonts_1.0.7.bb
index 76b76e4..e5cd0ce 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xlsfonts_1.0.6.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xlsfonts_1.0.7.bb
@@ -6,5 +6,5 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://COPYING;md5=5ec74dd7ea4d10c4715a7c44f159a40b"
 
-SRC_URI[md5sum] = "5774fd4f518b3f338f2b28f270e04bfc"
-SRC_URI[sha256sum] = "89b80b3a030006ab6cef717be286c12f2477894b227158b1e6133274f6ebd368"
+SRC_URI_EXT = "xz"
+SRC_URI[sha256sum] = "7b726945a967b44c35cddee5edd74802907a239ce2e2e515730b8a32c8e50465"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xmag_1.0.6.bb b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xmag_1.0.7.bb
similarity index 68%
rename from meta-openembedded/meta-oe/recipes-graphics/xorg-app/xmag_1.0.6.bb
rename to meta-openembedded/meta-oe/recipes-graphics/xorg-app/xmag_1.0.7.bb
index ab74f5b..f3dac53 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xmag_1.0.6.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xmag_1.0.7.bb
@@ -7,5 +7,5 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=3413fe6832380b44b69b172d2d1b2387"
 DEPENDS += " libxaw libxt"
 
-SRC_URI[md5sum] = "8aaa41374935d697ee55d7dc9de70781"
-SRC_URI[sha256sum] = "87a2bc23b251e2d8f8370d3283a4d6c8dac98a30cb5749a04336cdb55c14e161"
+SRC_URI_EXT = "xz"
+SRC_URI[sha256sum] = "009936cc5a2706084079127b26cf55c713767650a34cb69e5682d60e33ce7461"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xwud_1.0.5.bb b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xwud_1.0.6.bb
similarity index 73%
rename from meta-openembedded/meta-oe/recipes-graphics/xorg-app/xwud_1.0.5.bb
rename to meta-openembedded/meta-oe/recipes-graphics/xorg-app/xwud_1.0.6.bb
index 7cb550d..1be0b5c 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xwud_1.0.5.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/xorg-app/xwud_1.0.6.bb
@@ -8,5 +8,5 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://COPYING;md5=31e8892c80b7a0c1c5f37c8e8ae6d794"
 
-SRC_URI[md5sum] = "79972093bb0766fcd0223b2bd6d11932"
-SRC_URI[sha256sum] = "99997051f8a892313f22edf41dab45864e86e7062ee9012d5dbb6a40fc6b10a9"
+SRC_URI_EXT = "xz"
+SRC_URI[sha256sum] = "64048cd15eba3cd9a3d2e3280650391259ebf6b529f2101d1a20f441038c1afe"
diff --git a/meta-openembedded/meta-oe/recipes-graphics/xorg-driver/xf86-video-nouveau_1.0.17.bb b/meta-openembedded/meta-oe/recipes-graphics/xorg-driver/xf86-video-nouveau_1.0.17.bb
index d471610..fbf6406 100644
--- a/meta-openembedded/meta-oe/recipes-graphics/xorg-driver/xf86-video-nouveau_1.0.17.bb
+++ b/meta-openembedded/meta-oe/recipes-graphics/xorg-driver/xf86-video-nouveau_1.0.17.bb
@@ -15,5 +15,7 @@
 
 COMPATIBLE_HOST = '(i.86|x86_64).*-linux'
 
+CFLAGS += "-Wno-error=implicit-function-declaration"
+
 SRC_URI += "file://0001-nouveau-fixup-driver-for-new-X-server-ABI.patch"
 SRC_URI[sha256sum] = "499322e27a55c8183166bf2dd1e47d085eb834143e0d7036baba8427b90c156b"
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-Migrate-to-openssl-1.1.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-Migrate-to-openssl-1.1.patch
deleted file mode 100644
index 394aa16..0000000
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-Migrate-to-openssl-1.1.patch
+++ /dev/null
@@ -1,152 +0,0 @@
-From c9dcb6afef9c343d070aaff208d11a997a45a105 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Wed, 5 Sep 2018 22:19:38 -0700
-Subject: [PATCH] Migrate to openssl 1.1
-
-Upstream-Status: Backport [https://sourceforge.net/p/ipmitool/source/ci/1664902525a1c3771b4d8b3ccab7ea1ba6b2bdd1/]
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- src/plugins/lanplus/lanplus_crypt_impl.c | 50 ++++++++++++++----------
- 1 file changed, 29 insertions(+), 21 deletions(-)
-
-diff --git a/src/plugins/lanplus/lanplus_crypt_impl.c b/src/plugins/lanplus/lanplus_crypt_impl.c
-index d5fac37..9652a5e 100644
---- a/src/plugins/lanplus/lanplus_crypt_impl.c
-+++ b/src/plugins/lanplus/lanplus_crypt_impl.c
-@@ -164,11 +164,7 @@ lanplus_encrypt_aes_cbc_128(const uint8_t * iv,
- 							uint8_t       * output,
- 							uint32_t        * bytes_written)
- {
--	EVP_CIPHER_CTX ctx;
--	EVP_CIPHER_CTX_init(&ctx);
--	EVP_EncryptInit_ex(&ctx, EVP_aes_128_cbc(), NULL, key, iv);
--	EVP_CIPHER_CTX_set_padding(&ctx, 0);
--	
-+	EVP_CIPHER_CTX *ctx = NULL;
- 
- 	*bytes_written = 0;
- 
-@@ -182,6 +178,14 @@ lanplus_encrypt_aes_cbc_128(const uint8_t * iv,
- 		printbuf(input, input_length, "encrypting this data");
- 	}
- 
-+	ctx = EVP_CIPHER_CTX_new();
-+	if (ctx == NULL) {
-+		lprintf(LOG_DEBUG, "ERROR: EVP_CIPHER_CTX_new() failed");
-+		return;
-+	}
-+	EVP_CIPHER_CTX_init(ctx);
-+	EVP_EncryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, key, iv);
-+	EVP_CIPHER_CTX_set_padding(ctx, 0);
- 
- 	/*
- 	 * The default implementation adds a whole block of padding if the input
-@@ -191,28 +195,28 @@ lanplus_encrypt_aes_cbc_128(const uint8_t * iv,
- 	assert((input_length % IPMI_CRYPT_AES_CBC_128_BLOCK_SIZE) == 0);
- 
- 
--	if(!EVP_EncryptUpdate(&ctx, output, (int *)bytes_written, input, input_length))
-+	if(!EVP_EncryptUpdate(ctx, output, (int *)bytes_written, input, input_length))
- 	{
- 		/* Error */
- 		*bytes_written = 0;
--		return;
- 	}
- 	else
- 	{
- 		uint32_t tmplen;
- 
--		if(!EVP_EncryptFinal_ex(&ctx, output + *bytes_written, (int *)&tmplen))
-+		if(!EVP_EncryptFinal_ex(ctx, output + *bytes_written, (int *)&tmplen))
- 		{
-+			/* Error */
- 			*bytes_written = 0;
--			return; /* Error */
- 		}
- 		else
- 		{
- 			/* Success */
- 			*bytes_written += tmplen;
--			EVP_CIPHER_CTX_cleanup(&ctx);
- 		}
- 	}
-+	/* performs cleanup and free */
-+	EVP_CIPHER_CTX_free(ctx);
- }
- 
- 
-@@ -239,11 +243,7 @@ lanplus_decrypt_aes_cbc_128(const uint8_t * iv,
- 							uint8_t       * output,
- 							uint32_t        * bytes_written)
- {
--	EVP_CIPHER_CTX ctx;
--	EVP_CIPHER_CTX_init(&ctx);
--	EVP_DecryptInit_ex(&ctx, EVP_aes_128_cbc(), NULL, key, iv);
--	EVP_CIPHER_CTX_set_padding(&ctx, 0);
--
-+	EVP_CIPHER_CTX *ctx = NULL;
- 
- 	if (verbose >= 5)
- 	{
-@@ -252,12 +252,20 @@ lanplus_decrypt_aes_cbc_128(const uint8_t * iv,
- 		printbuf(input, input_length, "decrypting this data");
- 	}
- 
--
- 	*bytes_written = 0;
- 
- 	if (input_length == 0)
- 		return;
- 
-+	ctx = EVP_CIPHER_CTX_new();
-+	if (ctx == NULL) {
-+		lprintf(LOG_DEBUG, "ERROR: EVP_CIPHER_CTX_new() failed");
-+		return;
-+	}
-+	EVP_CIPHER_CTX_init(ctx);
-+	EVP_DecryptInit_ex(ctx, EVP_aes_128_cbc(), NULL, key, iv);
-+	EVP_CIPHER_CTX_set_padding(ctx, 0);
-+
- 	/*
- 	 * The default implementation adds a whole block of padding if the input
- 	 * data is perfectly aligned.  We would like to keep that from happening.
-@@ -266,33 +274,33 @@ lanplus_decrypt_aes_cbc_128(const uint8_t * iv,
- 	assert((input_length % IPMI_CRYPT_AES_CBC_128_BLOCK_SIZE) == 0);
- 
- 
--	if (!EVP_DecryptUpdate(&ctx, output, (int *)bytes_written, input, input_length))
-+	if (!EVP_DecryptUpdate(ctx, output, (int *)bytes_written, input, input_length))
- 	{
- 		/* Error */
- 		lprintf(LOG_DEBUG, "ERROR: decrypt update failed");
- 		*bytes_written = 0;
--		return;
- 	}
- 	else
- 	{
- 		uint32_t tmplen;
- 
--		if (!EVP_DecryptFinal_ex(&ctx, output + *bytes_written, (int *)&tmplen))
-+		if (!EVP_DecryptFinal_ex(ctx, output + *bytes_written, (int *)&tmplen))
- 		{
-+			/* Error */
- 			char buffer[1000];
- 			ERR_error_string(ERR_get_error(), buffer);
- 			lprintf(LOG_DEBUG, "the ERR error %s", buffer);
- 			lprintf(LOG_DEBUG, "ERROR: decrypt final failed");
- 			*bytes_written = 0;
--			return; /* Error */
- 		}
- 		else
- 		{
- 			/* Success */
- 			*bytes_written += tmplen;
--			EVP_CIPHER_CTX_cleanup(&ctx);
- 		}
- 	}
-+	/* performs cleanup and free */
-+	EVP_CIPHER_CTX_free(ctx);
- 
- 	if (verbose >= 5)
- 	{
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-configure-Remove-the-logic-to-download-IANA-PEN-data.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-configure-Remove-the-logic-to-download-IANA-PEN-data.patch
new file mode 100644
index 0000000..442f132
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-configure-Remove-the-logic-to-download-IANA-PEN-data.patch
@@ -0,0 +1,41 @@
+From 63d72f97bd106dd2101cd7fdac6df4f7a053d67c Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 08:27:39 -0700
+Subject: [PATCH] configure: Remove the logic to download IANA PEN database
+ during configure
+
+OE will do all downloading before it starts to configure therefore this
+step is moved out into bitbake recipe, so we can make it immutable build
+
+Upstream-Status: Inappropriate [OE-Specific]
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 16 +---------------
+ 1 file changed, 1 insertion(+), 15 deletions(-)
+
+--- a/configure.ac
++++ b/configure.ac
+@@ -56,21 +56,7 @@ if test "x$exec_prefix" = "xNONE"; then
+ 	exec_prefix="$prefix"
+ fi
+ 
+-if test "x$WGET" = "x"; then
+-	if test "x$CURL" = "x"; then
+-		AC_MSG_WARN([** Neither wget nor curl could be found.])
+-		AC_MSG_WARN([** IANA PEN database will not be installed by `make install` !])
+-	else
+-		DOWNLOAD="$CURL --location --progress-bar"
+-		AM_CONDITIONAL([DOWNLOAD], [true])
+-	fi
+-else
+-	DOWNLOAD="$WGET -c -nd -O -"
+-	AM_CONDITIONAL([DOWNLOAD], [true])
+-fi
+-
+-AC_MSG_WARN([** Download is:])
+-AC_MSG_WARN($DOWNLOAD)
++AM_CONDITIONAL([DOWNLOAD], [false])
+ AC_SUBST(DOWNLOAD, $DOWNLOAD)
+ 
+ dnl
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-fru-Fix-buffer-overflow-in-ipmi_spd_print_fru.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-fru-Fix-buffer-overflow-in-ipmi_spd_print_fru.patch
deleted file mode 100644
index eadfb7e..0000000
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-fru-Fix-buffer-overflow-in-ipmi_spd_print_fru.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 24aed93efb30a8f557aedc2f03b6ccec758ccbf4 Mon Sep 17 00:00:00 2001
-From: Chrostoper Ertl <chertl@microsoft.com>
-Date: Thu, 28 Nov 2019 16:44:18 +0000
-Subject: [PATCH 1/5] fru: Fix buffer overflow in ipmi_spd_print_fru
-
-Partial fix for CVE-2020-5208, see
-https://github.com/ipmitool/ipmitool/security/advisories/GHSA-g659-9qxw-p7cp
-
-The `ipmi_spd_print_fru` function has a similar issue as the one fixed
-by the previous commit in `read_fru_area_section`. An initial request is
-made to get the `fru.size`, which is used as the size for the allocation
-of `spd_data`. Inside a loop, further requests are performed to get the
-copy sizes which are not checked before being used as the size for a
-copy into the buffer.
-
-Upstream-Status: Backport[https://github.com/ipmitool/ipmitool/commit/840fb1cbb4fb365cb9797300e3374d4faefcdb10]
-CVE: CVE-2020-5208
-
-Signed-off-by: Wenlin Kang <wenlin.kang@windriver.com>
----
- lib/dimm_spd.c | 9 ++++++++-
- 1 file changed, 8 insertions(+), 1 deletion(-)
-
-diff --git a/lib/dimm_spd.c b/lib/dimm_spd.c
-index 91ae117..4c9c21d 100644
---- a/lib/dimm_spd.c
-+++ b/lib/dimm_spd.c
-@@ -1014,7 +1014,7 @@ ipmi_spd_print_fru(struct ipmi_intf * intf, uint8_t id)
- 	struct ipmi_rq req;
- 	struct fru_info fru;
- 	uint8_t *spd_data, msg_data[4];
--	int len, offset;
-+	uint32_t len, offset;
- 
- 	msg_data[0] = id;
- 
-@@ -1091,6 +1091,13 @@ ipmi_spd_print_fru(struct ipmi_intf * intf, uint8_t id)
- 		}
- 
- 		len = rsp->data[0];
-+		if(rsp->data_len < 1
-+		   || len > rsp->data_len - 1
-+		   || len > fru.size - offset)
-+		{
-+			printf(" Not enough buffer size");
-+			return -1;
-+		}
- 		memcpy(&spd_data[offset], rsp->data + 1, len);
- 		offset += len;
- 	} while (offset < fru.size);
--- 
-1.9.1
-
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-fru-Fix-buffer-overflow-vulnerabilities.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-fru-Fix-buffer-overflow-vulnerabilities.patch
deleted file mode 100644
index b65e3ef..0000000
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-fru-Fix-buffer-overflow-vulnerabilities.patch
+++ /dev/null
@@ -1,133 +0,0 @@
-From e824c23316ae50beb7f7488f2055ac65e8b341f2 Mon Sep 17 00:00:00 2001
-From: Chrostoper Ertl <chertl@microsoft.com>
-Date: Thu, 28 Nov 2019 16:33:59 +0000
-Subject: [PATCH] fru: Fix buffer overflow vulnerabilities
-
-Partial fix for CVE-2020-5208, see
-https://github.com/ipmitool/ipmitool/security/advisories/GHSA-g659-9qxw-p7cp
-
-The `read_fru_area_section` function only performs size validation of
-requested read size, and falsely assumes that the IPMI message will not
-respond with more than the requested amount of data; it uses the
-unvalidated response size to copy into `frubuf`. If the response is
-larger than the request, this can result in overflowing the buffer.
-
-The same issue affects the `read_fru_area` function.
-
-Upstream-Status: Backport[https://github.com/ipmitool/ipmitool/commit/e824c23316ae50beb7f7488f2055ac65e8b341f2]
-CVE: CVE-2020-5208
-
-Signed-off-by: Wenlin Kang <wenlin.kang@windriver.com>
----
- lib/ipmi_fru.c | 33 +++++++++++++++++++++++++++++++--
- 1 file changed, 31 insertions(+), 2 deletions(-)
-
-diff --git a/lib/ipmi_fru.c b/lib/ipmi_fru.c
-index c2a139d..2e323ff 100644
---- a/lib/ipmi_fru.c
-+++ b/lib/ipmi_fru.c
-@@ -663,7 +663,10 @@ int
- read_fru_area(struct ipmi_intf * intf, struct fru_info *fru, uint8_t id,
- 			uint32_t offset, uint32_t length, uint8_t *frubuf)
- {
--	uint32_t off = offset, tmp, finish;
-+	uint32_t off = offset;
-+	uint32_t tmp;
-+	uint32_t finish;
-+	uint32_t size_left_in_buffer;
- 	struct ipmi_rs * rsp;
- 	struct ipmi_rq req;
- 	uint8_t msg_data[4];
-@@ -676,10 +679,12 @@ read_fru_area(struct ipmi_intf * intf, struct fru_info *fru, uint8_t id,
- 
- 	finish = offset + length;
- 	if (finish > fru->size) {
-+		memset(frubuf + fru->size, 0, length - fru->size);
- 		finish = fru->size;
- 		lprintf(LOG_NOTICE, "Read FRU Area length %d too large, "
- 			"Adjusting to %d",
- 			offset + length, finish - offset);
-+		length = finish - offset;
- 	}
- 
- 	memset(&req, 0, sizeof(req));
-@@ -715,6 +720,7 @@ read_fru_area(struct ipmi_intf * intf, struct fru_info *fru, uint8_t id,
- 		}
- 	}
- 
-+	size_left_in_buffer = length;
- 	do {
- 		tmp = fru->access ? off >> 1 : off;
- 		msg_data[0] = id;
-@@ -756,9 +762,18 @@ read_fru_area(struct ipmi_intf * intf, struct fru_info *fru, uint8_t id,
- 		}
- 
- 		tmp = fru->access ? rsp->data[0] << 1 : rsp->data[0];
-+		if(rsp->data_len < 1
-+		   || tmp > rsp->data_len - 1
-+		   || tmp > size_left_in_buffer)
-+		{
-+			printf(" Not enough buffer size");
-+			return -1;
-+		}
-+
- 		memcpy(frubuf, rsp->data + 1, tmp);
- 		off += tmp;
- 		frubuf += tmp;
-+		size_left_in_buffer -= tmp;
- 		/* sometimes the size returned in the Info command
- 		* is too large.  return 0 so higher level function
- 		* still attempts to parse what was returned */
-@@ -791,7 +806,9 @@ read_fru_area_section(struct ipmi_intf * intf, struct fru_info *fru, uint8_t id,
- 			uint32_t offset, uint32_t length, uint8_t *frubuf)
- {
- 	static uint32_t fru_data_rqst_size = 20;
--	uint32_t off = offset, tmp, finish;
-+	uint32_t off = offset;
-+	uint32_t tmp, finish;
-+	uint32_t size_left_in_buffer;
- 	struct ipmi_rs * rsp;
- 	struct ipmi_rq req;
- 	uint8_t msg_data[4];
-@@ -804,10 +821,12 @@ read_fru_area_section(struct ipmi_intf * intf, struct fru_info *fru, uint8_t id,
- 
- 	finish = offset + length;
- 	if (finish > fru->size) {
-+		memset(frubuf + fru->size, 0, length - fru->size);
- 		finish = fru->size;
- 		lprintf(LOG_NOTICE, "Read FRU Area length %d too large, "
- 			"Adjusting to %d",
- 			offset + length, finish - offset);
-+		length = finish - offset;
- 	}
- 
- 	memset(&req, 0, sizeof(req));
-@@ -822,6 +841,8 @@ read_fru_area_section(struct ipmi_intf * intf, struct fru_info *fru, uint8_t id,
- 	if (fru->access && fru_data_rqst_size > 16)
- #endif
- 		fru_data_rqst_size = 16;
-+
-+	size_left_in_buffer = length;
- 	do {
- 		tmp = fru->access ? off >> 1 : off;
- 		msg_data[0] = id;
-@@ -853,8 +874,16 @@ read_fru_area_section(struct ipmi_intf * intf, struct fru_info *fru, uint8_t id,
- 		}
- 
- 		tmp = fru->access ? rsp->data[0] << 1 : rsp->data[0];
-+		if(rsp->data_len < 1
-+		   || tmp > rsp->data_len - 1
-+		   || tmp > size_left_in_buffer)
-+		{
-+			printf(" Not enough buffer size");
-+			return -1;
-+		}
- 		memcpy((frubuf + off)-offset, rsp->data + 1, tmp);
- 		off += tmp;
-+		size_left_in_buffer -= tmp;
- 
- 		/* sometimes the size returned in the Info command
- 		* is too large.  return 0 so higher level function
--- 
-2.17.1
-
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-hpmfwupg-move-variable-definition-to-.c-file.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-hpmfwupg-move-variable-definition-to-.c-file.patch
deleted file mode 100644
index a765c3a..0000000
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-hpmfwupg-move-variable-definition-to-.c-file.patch
+++ /dev/null
@@ -1,41 +0,0 @@
-From 3f7bb7218181745ca7762c1b4832cbb1c9e692f5 Mon Sep 17 00:00:00 2001
-From: Vaclav Dolezal <vdolezal@redhat.com>
-Date: Thu, 23 Jan 2020 11:26:32 +0100
-Subject: [PATCH] hpmfwupg: move variable definition to .c file
-
-Upstream-Status: Pending
-Signed-off-by: Vaclav Dolezal <vdolezal@redhat.com>
----
- include/ipmitool/ipmi_hpmfwupg.h | 2 +-
- lib/ipmi_hpmfwupg.c              | 2 ++
- 2 files changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/include/ipmitool/ipmi_hpmfwupg.h b/include/ipmitool/ipmi_hpmfwupg.h
-index de65292..07f597b 100644
---- a/include/ipmitool/ipmi_hpmfwupg.h
-+++ b/include/ipmitool/ipmi_hpmfwupg.h
-@@ -800,7 +800,7 @@ typedef struct _VERSIONINFO {
- 	char descString[HPMFWUPG_DESC_STRING_LENGTH + 1];
- }VERSIONINFO, *PVERSIONINFO;
- 
--VERSIONINFO gVersionInfo[HPMFWUPG_COMPONENT_ID_MAX];
-+extern VERSIONINFO gVersionInfo[HPMFWUPG_COMPONENT_ID_MAX];
- 
- #define TARGET_VER (0x01)
- #define ROLLBACK_VER (0x02)
-diff --git a/lib/ipmi_hpmfwupg.c b/lib/ipmi_hpmfwupg.c
-index bbcffc0..d7cdcd6 100644
---- a/lib/ipmi_hpmfwupg.c
-+++ b/lib/ipmi_hpmfwupg.c
-@@ -58,6 +58,8 @@ ipmi_intf_get_max_request_data_size(struct ipmi_intf * intf);
- 
- extern int verbose;
- 
-+VERSIONINFO gVersionInfo[HPMFWUPG_COMPONENT_ID_MAX];
-+
- int HpmfwupgUpgrade(struct ipmi_intf *intf, char *imageFilename,
- 		int activate, int, int);
- int HpmfwupgValidateImageIntegrity(struct HpmfwupgUpgradeCtx *pFwupgCtx);
--- 
-2.28.0
-
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-ipmi_fru.c-Provide-missing-function-declarations.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-ipmi_fru.c-Provide-missing-function-declarations.patch
new file mode 100644
index 0000000..704bbdb
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0001-ipmi_fru.c-Provide-missing-function-declarations.patch
@@ -0,0 +1,34 @@
+From e5bbf96edf776821f29ab67baed22a690bf8ab10 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 07:30:10 -0700
+Subject: [PATCH] ipmi_fru.c: Provide missing function declarations
+
+Fixes build with clang-15+
+
+Upstream-Status: Submitted [https://github.com/ipmitool/ipmitool/pull/360]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ lib/ipmi_fru.c | 7 +++++++
+ 1 file changed, 7 insertions(+)
+
+diff --git a/lib/ipmi_fru.c b/lib/ipmi_fru.c
+index 3d1d8a1..5c5661c 100644
+--- a/lib/ipmi_fru.c
++++ b/lib/ipmi_fru.c
+@@ -60,6 +60,13 @@ static const char *section_id[4] = {
+ 	"Board Section",
+ 	"Product Section"
+ };
++/* From lib/ipmi_hpmfwupg.c: */
++uint16_t
++ipmi_intf_get_max_request_data_size(struct ipmi_intf * intf);
++
++/* From src/plugins/ipmi_intf.c: */
++uint16_t
++ipmi_intf_get_max_response_data_size(struct ipmi_intf * intf);
+ 
+ static const char * combined_voltage_desc[] = {
+ 	"12 V",
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0002-session-Fix-buffer-overflow-in-ipmi_get_session_info.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0002-session-Fix-buffer-overflow-in-ipmi_get_session_info.patch
deleted file mode 100644
index b8742b1..0000000
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0002-session-Fix-buffer-overflow-in-ipmi_get_session_info.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From 81144cfba131b4ddbfcf9c530274b23bfc7e0ea8 Mon Sep 17 00:00:00 2001
-From: Chrostoper Ertl <chertl@microsoft.com>
-Date: Thu, 28 Nov 2019 16:51:49 +0000
-Subject: [PATCH 2/5] session: Fix buffer overflow in ipmi_get_session_info
-
-Partial fix for CVE-2020-5208, see
-https://github.com/ipmitool/ipmitool/security/advisories/GHSA-g659-9qxw-p7cp
-
-The `ipmi_get_session_info` function does not properly check the
-response `data_len`, which is used as a copy size, allowing stack buffer
-overflow.
-
-Upstream-Status: Backport[https://github.com/ipmitool/ipmitool/commit/41d7026946fafbd4d1ec0bcaca3ea30a6e8eed22]
-CVE: CVE-2020-5208
-
-Signed-off-by: Wenlin Kang <wenlin.kang@windriver.com>
----
- lib/ipmi_session.c | 12 ++++++++----
- 1 file changed, 8 insertions(+), 4 deletions(-)
-
-diff --git a/lib/ipmi_session.c b/lib/ipmi_session.c
-index 4855bc4..71bef4c 100644
---- a/lib/ipmi_session.c
-+++ b/lib/ipmi_session.c
-@@ -319,8 +319,10 @@ ipmi_get_session_info(struct ipmi_intf         * intf,
- 		}
- 		else
- 		{
--			memcpy(&session_info,  rsp->data, rsp->data_len);
--			print_session_info(&session_info, rsp->data_len);
-+			memcpy(&session_info,  rsp->data,
-+			       __min(rsp->data_len, sizeof(session_info)));
-+			print_session_info(&session_info,
-+			                   __min(rsp->data_len, sizeof(session_info)));
- 		}
- 		break;
- 		
-@@ -351,8 +353,10 @@ ipmi_get_session_info(struct ipmi_intf         * intf,
- 				break;
- 			}
- 
--			memcpy(&session_info,  rsp->data, rsp->data_len);
--			print_session_info(&session_info, rsp->data_len);
-+			memcpy(&session_info,  rsp->data,
-+			       __min(rsp->data_len, sizeof(session_info)));
-+			print_session_info(&session_info,
-+			                   __min(rsp->data_len, sizeof(session_info)));
- 			
- 		} while (i <= session_info.session_slot_count);
- 		break;
--- 
-1.9.1
-
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0003-channel-Fix-buffer-overflow.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0003-channel-Fix-buffer-overflow.patch
deleted file mode 100644
index deebd35..0000000
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0003-channel-Fix-buffer-overflow.patch
+++ /dev/null
@@ -1,69 +0,0 @@
-From 5057761e30e3a7682edab60f98f631616392ddc6 Mon Sep 17 00:00:00 2001
-From: Chrostoper Ertl <chertl@microsoft.com>
-Date: Thu, 28 Nov 2019 16:56:38 +0000
-Subject: [PATCH 3/3] channel: Fix buffer overflow
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Partial fix for CVE-2020-5208, see
-https://github.com/ipmitool/ipmitool/security/advisories/GHSA-g659-9qxw-p7cp
-
-The `ipmi_get_channel_cipher_suites` function does not properly check
-the final response’s `data_len`, which can lead to stack buffer overflow
-on the final copy.
-
-Upstream-Status: Backport[https://github.com/ipmitool/ipmitool/commit/9452be87181a6e83cfcc768b3ed8321763db50e4]
-CVE: CVE-2020-5208
-
-[Make some changes to apply it]
-Signed-off-by: Wenlin Kang <wenlin.kang@windriver.com>
----
- include/ipmitool/ipmi_channel.h |  2 ++
- lib/ipmi_channel.c              | 10 ++++++++--
- 2 files changed, 10 insertions(+), 2 deletions(-)
-
-diff --git a/include/ipmitool/ipmi_channel.h b/include/ipmitool/ipmi_channel.h
-index b138c26..d7cce5e 100644
---- a/include/ipmitool/ipmi_channel.h
-+++ b/include/ipmitool/ipmi_channel.h
-@@ -77,6 +77,8 @@ struct channel_access_t {
- 	uint8_t user_level_auth;
- };
- 
-+#define MAX_CIPHER_SUITE_DATA_LEN 0x10
-+
- /*
-  * The Get Authentication Capabilities response structure
-  * From table 22-15 of the IPMI v2.0 spec
-diff --git a/lib/ipmi_channel.c b/lib/ipmi_channel.c
-index fab2e54..76ecdcd 100644
---- a/lib/ipmi_channel.c
-+++ b/lib/ipmi_channel.c
-@@ -378,7 +378,10 @@ ipmi_get_channel_cipher_suites(struct ipmi_intf *intf, const char *payload_type,
- 		lprintf(LOG_ERR, "Unable to Get Channel Cipher Suites");
- 		return -1;
- 	}
--	if (rsp->ccode > 0) {
-+	if (rsp->ccode
-+	    || rsp->data_len < 1
-+	    || rsp->data_len > sizeof(uint8_t) + MAX_CIPHER_SUITE_DATA_LEN)
-+	{
- 		lprintf(LOG_ERR, "Get Channel Cipher Suites failed: %s",
- 			val2str(rsp->ccode, completion_code_vals));
- 		return -1;
-@@ -413,7 +416,10 @@ ipmi_get_channel_cipher_suites(struct ipmi_intf *intf, const char *payload_type,
- 			lprintf(LOG_ERR, "Unable to Get Channel Cipher Suites");
- 			return -1;
- 		}
--		if (rsp->ccode > 0) {
-+		if (rsp->ccode
-+		    || rsp->data_len < 1
-+		    || rsp->data_len > sizeof(uint8_t) + MAX_CIPHER_SUITE_DATA_LEN)
-+		{
- 			lprintf(LOG_ERR, "Get Channel Cipher Suites failed: %s",
- 					val2str(rsp->ccode, completion_code_vals));
- 			return -1;
--- 
-2.18.1
-
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0004-lanp-Fix-buffer-overflows-in-get_lan_param_select.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0004-lanp-Fix-buffer-overflows-in-get_lan_param_select.patch
deleted file mode 100644
index b5ce9e9..0000000
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0004-lanp-Fix-buffer-overflows-in-get_lan_param_select.patch
+++ /dev/null
@@ -1,94 +0,0 @@
-From e6aa6076f65e71544bd6450d20d943d7baaccb9f Mon Sep 17 00:00:00 2001
-From: Chrostoper Ertl <chertl@microsoft.com>
-Date: Thu, 28 Nov 2019 17:06:39 +0000
-Subject: [PATCH 4/5] lanp: Fix buffer overflows in get_lan_param_select
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Partial fix for CVE-2020-5208, see
-https://github.com/ipmitool/ipmitool/security/advisories/GHSA-g659-9qxw-p7cp
-
-The `get_lan_param_select` function is missing a validation check on the
-response’s `data_len`, which it then returns to caller functions, where
-stack buffer overflow can occur.
-
-Upstream-Status: Backport[https://github.com/ipmitool/ipmitool/commit/d45572d71e70840e0d4c50bf48218492b79c1a10]
-CVE: CVE-2020-5208
-
-[Make some changes to apply it]
-Signed-off-by: Wenlin Kang <wenlin.kang@windriver.com>
----
- lib/ipmi_lanp.c | 14 +++++++-------
- 1 file changed, 7 insertions(+), 7 deletions(-)
-
-diff --git a/lib/ipmi_lanp.c b/lib/ipmi_lanp.c
-index 060e753..dee21ee 100644
---- a/lib/ipmi_lanp.c
-+++ b/lib/ipmi_lanp.c
-@@ -1917,7 +1917,7 @@ ipmi_lan_alert_set(struct ipmi_intf * intf, uint8_t chan, uint8_t alert,
- 		if (p == NULL) {
- 			return (-1);
- 		}
--		memcpy(data, p->data, p->data_len);
-+		memcpy(data, p->data, __min(p->data_len, sizeof(data)));
- 		/* set new ipaddr */
- 		memcpy(data+3, temp, 4);
- 		printf("Setting LAN Alert %d IP Address to %d.%d.%d.%d\n", alert,
-@@ -1932,7 +1932,7 @@ ipmi_lan_alert_set(struct ipmi_intf * intf, uint8_t chan, uint8_t alert,
- 		if (p == NULL) {
- 			return (-1);
- 		}
--		memcpy(data, p->data, p->data_len);
-+		memcpy(data, p->data, __min(p->data_len, sizeof(data)));
- 		/* set new macaddr */
- 		memcpy(data+7, temp, 6);
- 		printf("Setting LAN Alert %d MAC Address to "
-@@ -1947,7 +1947,7 @@ ipmi_lan_alert_set(struct ipmi_intf * intf, uint8_t chan, uint8_t alert,
- 		if (p == NULL) {
- 			return (-1);
- 		}
--		memcpy(data, p->data, p->data_len);
-+		memcpy(data, p->data, __min(p->data_len, sizeof(data)));
- 
- 		if (strncasecmp(argv[1], "def", 3) == 0 ||
- 		    strncasecmp(argv[1], "default", 7) == 0) {
-@@ -1973,7 +1973,7 @@ ipmi_lan_alert_set(struct ipmi_intf * intf, uint8_t chan, uint8_t alert,
- 		if (p == NULL) {
- 			return (-1);
- 		}
--		memcpy(data, p->data, p->data_len);
-+		memcpy(data, p->data, __min(p->data_len, sizeof(data)));
- 
- 		if (strncasecmp(argv[1], "on", 2) == 0 ||
- 		    strncasecmp(argv[1], "yes", 3) == 0) {
-@@ -1998,7 +1998,7 @@ ipmi_lan_alert_set(struct ipmi_intf * intf, uint8_t chan, uint8_t alert,
- 		if (p == NULL) {
- 			return (-1);
- 		}
--		memcpy(data, p->data, p->data_len);
-+		memcpy(data, p->data, __min(p->data_len, sizeof(data)));
- 
- 		if (strncasecmp(argv[1], "pet", 3) == 0) {
- 			printf("Setting LAN Alert %d destination to PET Trap\n", alert);
-@@ -2026,7 +2026,7 @@ ipmi_lan_alert_set(struct ipmi_intf * intf, uint8_t chan, uint8_t alert,
- 		if (p == NULL) {
- 			return (-1);
- 		}
--		memcpy(data, p->data, p->data_len);
-+		memcpy(data, p->data, __min(p->data_len, sizeof(data)));
- 
- 		if (str2uchar(argv[1], &data[2]) != 0) {
- 			lprintf(LOG_ERR, "Invalid time: %s", argv[1]);
-@@ -2042,7 +2042,7 @@ ipmi_lan_alert_set(struct ipmi_intf * intf, uint8_t chan, uint8_t alert,
- 		if (p == NULL) {
- 			return (-1);
- 		}
--		memcpy(data, p->data, p->data_len);
-+		memcpy(data, p->data, __min(p->data_len, sizeof(data)));
- 
- 		if (str2uchar(argv[1], &data[3]) != 0) {
- 			lprintf(LOG_ERR, "Invalid retry: %s", argv[1]);
--- 
-1.9.1
-
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0005-fru-sdr-Fix-id_string-buffer-overflows.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0005-fru-sdr-Fix-id_string-buffer-overflows.patch
deleted file mode 100644
index cf8b925..0000000
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool/0005-fru-sdr-Fix-id_string-buffer-overflows.patch
+++ /dev/null
@@ -1,142 +0,0 @@
-From 26e64ca78ae844c5ceedde89531e2924d7d4594c Mon Sep 17 00:00:00 2001
-From: Chrostoper Ertl <chertl@microsoft.com>
-Date: Thu, 28 Nov 2019 17:13:45 +0000
-Subject: [PATCH 5/5] fru, sdr: Fix id_string buffer overflows
-
-Final part of the fixes for CVE-2020-5208, see
-https://github.com/ipmitool/ipmitool/security/advisories/GHSA-g659-9qxw-p7cp
-
-9 variants of stack buffer overflow when parsing `id_string` field of
-SDR records returned from `CMD_GET_SDR` command.
-
-SDR record structs have an `id_code` field, and an `id_string` `char`
-array.
-
-The length of `id_string` is calculated as `(id_code & 0x1f) + 1`,
-which can be larger than expected 16 characters (if `id_code = 0xff`,
-then length will be `(0xff & 0x1f) + 1 = 32`).
-
-In numerous places, this can cause stack buffer overflow when copying
-into fixed buffer of size `17` bytes from this calculated length.
-
-Upstream-Status: Backport[https://github.com/ipmitool/ipmitool/commit/7ccea283dd62a05a320c1921e3d8d71a87772637]
-CVE: CVE-2020-5208
-
-Signed-off-by: Wenlin Kang <wenlin.kang@windriver.com>
----
- lib/ipmi_fru.c |  2 +-
- lib/ipmi_sdr.c | 40 ++++++++++++++++++++++++----------------
- 2 files changed, 25 insertions(+), 17 deletions(-)
-
-diff --git a/lib/ipmi_fru.c b/lib/ipmi_fru.c
-index b71ea23..1decea2 100644
---- a/lib/ipmi_fru.c
-+++ b/lib/ipmi_fru.c
-@@ -3038,7 +3038,7 @@ ipmi_fru_print(struct ipmi_intf * intf, struct sdr_record_fru_locator * fru)
- 		return 0;
- 
- 	memset(desc, 0, sizeof(desc));
--	memcpy(desc, fru->id_string, fru->id_code & 0x01f);
-+	memcpy(desc, fru->id_string, __min(fru->id_code & 0x01f, sizeof(desc)));
- 	desc[fru->id_code & 0x01f] = 0;
- 	printf("FRU Device Description : %s (ID %d)\n", desc, fru->device_id);
- 
-diff --git a/lib/ipmi_sdr.c b/lib/ipmi_sdr.c
-index fa7b082..175a86f 100644
---- a/lib/ipmi_sdr.c
-+++ b/lib/ipmi_sdr.c
-@@ -2113,7 +2113,7 @@ ipmi_sdr_print_sensor_eventonly(struct ipmi_intf *intf,
- 		return -1;
- 
- 	memset(desc, 0, sizeof (desc));
--	snprintf(desc, (sensor->id_code & 0x1f) + 1, "%s", sensor->id_string);
-+	snprintf(desc, sizeof(desc), "%.*s", (sensor->id_code & 0x1f) + 1, sensor->id_string);
- 
- 	if (verbose) {
- 		printf("Sensor ID              : %s (0x%x)\n",
-@@ -2164,7 +2164,7 @@ ipmi_sdr_print_sensor_mc_locator(struct ipmi_intf *intf,
- 		return -1;
- 
- 	memset(desc, 0, sizeof (desc));
--	snprintf(desc, (mc->id_code & 0x1f) + 1, "%s", mc->id_string);
-+	snprintf(desc, sizeof(desc), "%.*s", (mc->id_code & 0x1f) + 1, mc->id_string);
- 
- 	if (verbose == 0) {
- 		if (csv_output)
-@@ -2257,7 +2257,7 @@ ipmi_sdr_print_sensor_generic_locator(struct ipmi_intf *intf,
- 	char desc[17];
- 
- 	memset(desc, 0, sizeof (desc));
--	snprintf(desc, (dev->id_code & 0x1f) + 1, "%s", dev->id_string);
-+	snprintf(desc, sizeof(desc), "%.*s", (dev->id_code & 0x1f) + 1, dev->id_string);
- 
- 	if (!verbose) {
- 		if (csv_output)
-@@ -2314,7 +2314,7 @@ ipmi_sdr_print_sensor_fru_locator(struct ipmi_intf *intf,
- 	char desc[17];
- 
- 	memset(desc, 0, sizeof (desc));
--	snprintf(desc, (fru->id_code & 0x1f) + 1, "%s", fru->id_string);
-+	snprintf(desc, sizeof(desc), "%.*s", (fru->id_code & 0x1f) + 1, fru->id_string);
- 
- 	if (!verbose) {
- 		if (csv_output)
-@@ -2518,35 +2518,43 @@ ipmi_sdr_print_name_from_rawentry(struct ipmi_intf *intf,uint16_t id,
- 
-    int rc =0;
-    char desc[17];
-+   const char *id_string;
-+   uint8_t id_code;
-    memset(desc, ' ', sizeof (desc));
- 
-    switch ( type) {
-       case SDR_RECORD_TYPE_FULL_SENSOR:
-       record.full = (struct sdr_record_full_sensor *) raw;
--      snprintf(desc, (record.full->id_code & 0x1f) +1, "%s",
--               (const char *)record.full->id_string);
-+      id_code = record.full->id_code;
-+      id_string = record.full->id_string;
-       break;
-+
-       case SDR_RECORD_TYPE_COMPACT_SENSOR:
-       record.compact = (struct sdr_record_compact_sensor *) raw	;
--      snprintf(desc, (record.compact->id_code & 0x1f)  +1, "%s",
--               (const char *)record.compact->id_string);
-+      id_code = record.compact->id_code;
-+      id_string = record.compact->id_string;
-       break;
-+
-       case SDR_RECORD_TYPE_EVENTONLY_SENSOR:
-       record.eventonly  = (struct sdr_record_eventonly_sensor *) raw ;
--      snprintf(desc, (record.eventonly->id_code & 0x1f)  +1, "%s",
--               (const char *)record.eventonly->id_string);
--      break;            
-+      id_code = record.eventonly->id_code;
-+      id_string = record.eventonly->id_string;
-+      break;
-+
-       case SDR_RECORD_TYPE_MC_DEVICE_LOCATOR:
-       record.mcloc  = (struct sdr_record_mc_locator *) raw ;
--      snprintf(desc, (record.mcloc->id_code & 0x1f)  +1, "%s",
--               (const char *)record.mcloc->id_string);		
-+      id_code = record.mcloc->id_code;
-+      id_string = record.mcloc->id_string;
-       break;
-+
-       default:
-       rc = -1;
--      break;
--   }   
-+   }
-+   if (!rc) {
-+       snprintf(desc, sizeof(desc), "%.*s", (id_code & 0x1f) + 1, id_string);
-+   }
- 
--      lprintf(LOG_INFO, "ID: 0x%04x , NAME: %-16s", id, desc);
-+   lprintf(LOG_INFO, "ID: 0x%04x , NAME: %-16s", id, desc);
-    return rc;
- }
- 
--- 
-1.9.1
-
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool_1.8.18.bb b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool_1.8.19.bb
similarity index 60%
rename from meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool_1.8.18.bb
rename to meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool_1.8.19.bb
index 3337fe5..0a600e2 100644
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool_1.8.18.bb
+++ b/meta-openembedded/meta-oe/recipes-kernel/ipmitool/ipmitool_1.8.19.bb
@@ -21,24 +21,31 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=9aa91e13d644326bf281924212862184"
 
 DEPENDS = "openssl readline ncurses"
-
-SRC_URI = "${SOURCEFORGE_MIRROR}/ipmitool/ipmitool-${PV}.tar.bz2 \
-           file://0001-Migrate-to-openssl-1.1.patch \
-           file://0001-fru-Fix-buffer-overflow-vulnerabilities.patch \
-           file://0001-fru-Fix-buffer-overflow-in-ipmi_spd_print_fru.patch \
-           file://0002-session-Fix-buffer-overflow-in-ipmi_get_session_info.patch \
-           file://0003-channel-Fix-buffer-overflow.patch \
-           file://0004-lanp-Fix-buffer-overflows-in-get_lan_param_select.patch \
-           file://0005-fru-sdr-Fix-id_string-buffer-overflows.patch \
-           file://0001-hpmfwupg-move-variable-definition-to-.c-file.patch \
+SRCREV = "19d78782d795d0cf4ceefe655f616210c9143e62"
+SRC_URI = "git://github.com/ipmitool/ipmitool;protocol=https;branch=master \
+           ${IANA_ENTERPRISE_NUMBERS} \
+           file://0001-ipmi_fru.c-Provide-missing-function-declarations.patch \
+           file://0001-configure-Remove-the-logic-to-download-IANA-PEN-data.patch \
            "
-SRC_URI[md5sum] = "bab7ea104c7b85529c3ef65c54427aa3"
-SRC_URI[sha256sum] = "0c1ba3b1555edefb7c32ae8cd6a3e04322056bc087918f07189eeedfc8b81e01"
+IANA_ENTERPRISE_NUMBERS ?= ""
+
+# Add these via bbappend if this database is needed by the system
+#IANA_ENTERPRISE_NUMBERS ?= "http://www.iana.org/assignments/enterprise-numbers;name=iana-enterprise-numbers;downloadfilename=iana-enterprise-numbers"
+#SRC_URI[iana-enterprise-numbers.sha256sum] = "cdd97fc08325667434b805eb589104ae63f7a9eb720ecea73cb55110b383934c"
+
+S = "${WORKDIR}/git"
 
 inherit autotools
 
+do_install:append() {
+        if [ -e ${WORKDIR}/iana-enterprise-numbers ]; then
+                install -Dm 0755 ${WORKDIR}/iana-enterprise-numbers ${D}${datadir}/misc/enterprise-numbers
+        fi
+}
+
 PACKAGES =+ "${PN}-ipmievd"
 FILES:${PN}-ipmievd += "${sbindir}/ipmievd"
+FILES:${PN} += "${datadir}/misc"
 
 # --disable-dependency-tracking speeds up the build
 # --enable-file-security adds some security checks
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmiutil/ipmiutil/0001-ihpm-Include-stdlib.h-for-malloc-free-atoi-functions.patch b/meta-openembedded/meta-oe/recipes-kernel/ipmiutil/ipmiutil/0001-ihpm-Include-stdlib.h-for-malloc-free-atoi-functions.patch
new file mode 100644
index 0000000..1ed6472
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-kernel/ipmiutil/ipmiutil/0001-ihpm-Include-stdlib.h-for-malloc-free-atoi-functions.patch
@@ -0,0 +1,26 @@
+From 097e108b81f2571c4c51871044adf409b6954649 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Sat, 3 Sep 2022 00:44:52 -0700
+Subject: [PATCH] ihpm: Include stdlib.h for malloc/free/atoi functions
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ util/ihpm.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/util/ihpm.c b/util/ihpm.c
+index 740ab74..5ad9ef6 100644
+--- a/util/ihpm.c
++++ b/util/ihpm.c
+@@ -68,6 +68,7 @@ typedef uint32_t    socklen_t;
+ #endif
+ #include <stdio.h>
+ #include <stdarg.h>
++#include <stdlib.h> /* malloc/free/atoi */
+ #include <string.h>
+ #include <time.h>
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-kernel/ipmiutil/ipmiutil_3.1.5.bb b/meta-openembedded/meta-oe/recipes-kernel/ipmiutil/ipmiutil_3.1.5.bb
index d0f3688..64bcf5f 100644
--- a/meta-openembedded/meta-oe/recipes-kernel/ipmiutil/ipmiutil_3.1.5.bb
+++ b/meta-openembedded/meta-oe/recipes-kernel/ipmiutil/ipmiutil_3.1.5.bb
@@ -22,6 +22,7 @@
 SRC_URI = "${SOURCEFORGE_MIRROR}/ipmiutil/ipmiutil-${PV}.tar.gz \
            file://fix_systemd_path.patch \
            file://0001-Prevent-access-times-from-changing-resulting-gzip-md.patch \
+           file://0001-ihpm-Include-stdlib.h-for-malloc-free-atoi-functions.patch \
           "
 SRC_URI[md5sum] = "292d6df25cad678bb27e5c8cdc6748f9"
 SRC_URI[sha256sum] = "58ccdbd5755d7dd72478756715af09e9c73330dfad2b91dbf03d2ac504b301a3"
diff --git a/meta-openembedded/meta-oe/recipes-kernel/libpfm/libpfm4_4.11.0.bb b/meta-openembedded/meta-oe/recipes-kernel/libpfm/libpfm4_4.12.0.bb
similarity index 93%
rename from meta-openembedded/meta-oe/recipes-kernel/libpfm/libpfm4_4.11.0.bb
rename to meta-openembedded/meta-oe/recipes-kernel/libpfm/libpfm4_4.12.0.bb
index d2d2512..4baed62 100644
--- a/meta-openembedded/meta-oe/recipes-kernel/libpfm/libpfm4_4.11.0.bb
+++ b/meta-openembedded/meta-oe/recipes-kernel/libpfm/libpfm4_4.12.0.bb
@@ -16,7 +16,7 @@
            file://0001-Include-poll.h-instead-of-sys-poll.h.patch \
            file://0002-perf_examples-Remove-unused-sum-variable.patch \
            "
-SRC_URI[sha256sum] = "5da5f8872bde14b3634c9688d980f68bda28b510268723cc12973eedbab9fecc"
+SRC_URI[sha256sum] = "4b0c1f53f39a61525b69bebf532c68040c1b984d7544a8ae0844b13cd91e1ee4"
 
 UPSTREAM_CHECK_URI = "http://sourceforge.net/projects/perfmon2/files/libpfm4/"
 
diff --git a/meta-openembedded/meta-oe/recipes-kernel/oprofile/oprofile/0001-configure-Include-unistd.h-for-getpid-API.patch b/meta-openembedded/meta-oe/recipes-kernel/oprofile/oprofile/0001-configure-Include-unistd.h-for-getpid-API.patch
new file mode 100644
index 0000000..0ac03bd
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-kernel/oprofile/oprofile/0001-configure-Include-unistd.h-for-getpid-API.patch
@@ -0,0 +1,28 @@
+From 46f0aadf80d5e28f587149b6e90c3ba005971f6e Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 19:22:17 -0700
+Subject: [PATCH] configure: Include unistd.h for getpid API
+
+This fixes the check for perf events support in configure
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/configure.ac b/configure.ac
+index e4f4024..3384628 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -119,6 +119,7 @@ if test "$kernel_may_have_perf_events_support" = "yes"; then
+ 				#include <asm/unistd.h>
+ 				#include <sys/types.h>
+ 				#include <string.h>
++				#include <unistd.h>
+ 			]],
+ 			[[struct perf_event_attr attr;
+ 				pid_t pid;
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-kernel/oprofile/oprofile_1.4.0.bb b/meta-openembedded/meta-oe/recipes-kernel/oprofile/oprofile_1.4.0.bb
index 23e7d3d..e6eec0a 100644
--- a/meta-openembedded/meta-oe/recipes-kernel/oprofile/oprofile_1.4.0.bb
+++ b/meta-openembedded/meta-oe/recipes-kernel/oprofile/oprofile_1.4.0.bb
@@ -30,6 +30,7 @@
            file://0008-include-linux-limits.h-for-MAX_INPUT.patch \
            file://0009-Prevent-running-check-tests-on-host-if-cross-compili.patch \
            file://0010-oprofile-Determine-the-root-home-directory-dynamical.patch \
+           file://0001-configure-Include-unistd.h-for-getpid-API.patch \
 "
 SRC_URI[sha256sum] = "7ba06f99d7c188389d20d1d5e53ee690c7733f87aa9af62bd664fa0ca235a412"
 
diff --git a/meta-openembedded/meta-oe/recipes-multimedia/cdrkit/cdrkit_1.1.11.bb b/meta-openembedded/meta-oe/recipes-multimedia/cdrkit/cdrkit_1.1.11.bb
index 757f99d..9fd9111 100644
--- a/meta-openembedded/meta-oe/recipes-multimedia/cdrkit/cdrkit_1.1.11.bb
+++ b/meta-openembedded/meta-oe/recipes-multimedia/cdrkit/cdrkit_1.1.11.bb
@@ -12,6 +12,7 @@
            file://0002-Do-not-use-rcmd-on-build-with-musl.patch \
            file://0001-genisoimage-Add-missing-extern-definition.patch \
            file://0001-add-new-option-eltorito-platform.patch \
+           file://0001-genisoimage-Add-checksum.h-and-md5.h-for-function-pr.patch \
            "
 SRC_URI:append:class-nativesdk = " \
            file://0001-install-netscsid-to-bin-for-nativesdk.patch \
diff --git a/meta-openembedded/meta-oe/recipes-multimedia/cdrkit/files/0001-genisoimage-Add-checksum.h-and-md5.h-for-function-pr.patch b/meta-openembedded/meta-oe/recipes-multimedia/cdrkit/files/0001-genisoimage-Add-checksum.h-and-md5.h-for-function-pr.patch
new file mode 100644
index 0000000..b999577
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-multimedia/cdrkit/files/0001-genisoimage-Add-checksum.h-and-md5.h-for-function-pr.patch
@@ -0,0 +1,44 @@
+From f28b8ec20c3485068f1617ff93b497bafe5264e1 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Sat, 3 Sep 2022 00:50:17 -0700
+Subject: [PATCH] genisoimage: Add checksum.h and md5.h for function prototypes
+
+Needed for parse_checksum_algo and calculate_md5sum
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ genisoimage/genisoimage.c | 2 ++
+ genisoimage/jte.c         | 2 ++
+ 2 files changed, 4 insertions(+)
+
+diff --git a/genisoimage/genisoimage.c b/genisoimage/genisoimage.c
+index 84ac3c2..5c9f7f3 100644
+--- a/genisoimage/genisoimage.c
++++ b/genisoimage/genisoimage.c
+@@ -59,6 +59,8 @@
+ #include "udf.h"
+ #endif
+ 
++#include "checksum.h"
++
+ #ifdef	NEED_O_BINARY
+ #include <io.h>					/* for setmode() prototype */
+ #endif
+diff --git a/genisoimage/jte.c b/genisoimage/jte.c
+index 0dff289..1f03ad3 100644
+--- a/genisoimage/jte.c
++++ b/genisoimage/jte.c
+@@ -36,6 +36,8 @@
+ #include "vms.h"
+ #endif
+ 
++#include "md5.h"
++
+ /* Different types used in building our state list below */
+ #define JTET_FILE_MATCH 1
+ #define JTET_NOMATCH    2
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-multimedia/jack/jack_1.19.20.bb b/meta-openembedded/meta-oe/recipes-multimedia/jack/jack_1.19.21.bb
similarity index 96%
rename from meta-openembedded/meta-oe/recipes-multimedia/jack/jack_1.19.20.bb
rename to meta-openembedded/meta-oe/recipes-multimedia/jack/jack_1.19.21.bb
index 90e5d56..c949623 100644
--- a/meta-openembedded/meta-oe/recipes-multimedia/jack/jack_1.19.20.bb
+++ b/meta-openembedded/meta-oe/recipes-multimedia/jack/jack_1.19.21.bb
@@ -15,7 +15,7 @@
 DEPENDS = "libsamplerate0 libsndfile1 readline"
 
 SRC_URI = "git://github.com/jackaudio/jack2.git;branch=master;protocol=https"
-SRCREV = "a2fe7ec2fdbd315f112c8035282d94a429451178"
+SRCREV = "6b3c96d8aeef8aeb4b20f3aa647e6d01b4000ff8"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-multimedia/libcdio/libcdio_2.1.0.bb b/meta-openembedded/meta-oe/recipes-multimedia/libcdio/libcdio_2.1.0.bb
index 1facd47..62004ac 100644
--- a/meta-openembedded/meta-oe/recipes-multimedia/libcdio/libcdio_2.1.0.bb
+++ b/meta-openembedded/meta-oe/recipes-multimedia/libcdio/libcdio_2.1.0.bb
@@ -18,6 +18,9 @@
 PACKAGECONFIG[cddb] = "--enable-cddb,--disable-cddb,libcddb"
 PACKAGECONFIG[vcd-info] = "--enable-vcd-info,--disable-vcd-info,vcdimager"
 
+# add -D_LARGEFILE64_SOURCE for 32bit targets
+CFLAGS += "${@['-D_LARGEFILE64_SOURCE',''][d.getVar('SITEINFO_BITS') != '32']}"
+
 PACKAGES += "${PN}-utils"
 
 FILES:${PN} = "${libdir}/${BPN}${SOLIB}"
diff --git a/meta-openembedded/meta-oe/recipes-networking/cyrus-sasl/cyrus-sasl/0001-Fix-time.h-check.patch b/meta-openembedded/meta-oe/recipes-networking/cyrus-sasl/cyrus-sasl/0001-Fix-time.h-check.patch
new file mode 100644
index 0000000..a6df213
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-networking/cyrus-sasl/cyrus-sasl/0001-Fix-time.h-check.patch
@@ -0,0 +1,61 @@
+From 266f0acf7f5e029afbb3e263437039e50cd6c262 Mon Sep 17 00:00:00 2001
+From: Sam James <sam@gentoo.org>
+Date: Wed, 23 Feb 2022 00:45:15 +0000
+Subject: [PATCH] Fix <time.h> check
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+We're conditionally including based on HAVE_TIME_H in a bunch of places,
+but we're not actually checking for time.h, so that's never going to be defined.
+
+While at it, add in a missing include in the cram plugin.
+
+This fixes a bunch of implicit declaration warnings:
+```
+ * cyrus-sasl-2.1.28/lib/saslutil.c:280:3: warning: implicit declaration of function ‘time’ [-Wimplicit-function-declaration]
+ * cyrus-sasl-2.1.28/lib/saslutil.c:364:41: warning: implicit declaration of function ‘clock’ [-Wimplicit-function-declaration]
+ * cyrus-sasl-2.1.28/plugins/cram.c:132:7: warning: implicit declaration of function ‘time’ [-Wimplicit-function-declaration]
+ * cyrus-sasl-2.1.28/lib/saslutil.c:280:3: warning: implicit declaration of function ‘time’ [-Wimplicit-function-declaration]
+ * cyrus-sasl-2.1.28/lib/saslutil.c:364:41: warning: implicit declaration of function ‘clock’ [-Wimplicit-function-declaration]
+ * cyrus-sasl-2.1.28/plugins/cram.c:132:7: warning: implicit declaration of function ‘time’ [-Wimplicit-function-declaration]
+```
+
+Upstream-Status: Backport [https://github.com/cyrusimap/cyrus-sasl/commit/266f0acf7f5e029afbb3e263437039e50cd6c262]
+Signed-off-by: Sam James <sam@gentoo.org>
+---
+ configure.ac   | 2 +-
+ plugins/cram.c | 4 ++++
+ 2 files changed, 5 insertions(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index e1bf53b6..ad781830 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -1290,7 +1290,7 @@ AC_CHECK_HEADERS_ONCE([sys/time.h])
+ 
+ AC_HEADER_DIRENT
+ AC_HEADER_SYS_WAIT
+-AC_CHECK_HEADERS(crypt.h des.h dlfcn.h fcntl.h limits.h malloc.h paths.h strings.h sys/file.h sys/time.h syslog.h unistd.h inttypes.h sys/uio.h sys/param.h sysexits.h stdarg.h varargs.h krb5.h)
++AC_CHECK_HEADERS(crypt.h des.h dlfcn.h fcntl.h limits.h malloc.h paths.h strings.h sys/file.h sys/time.h syslog.h time.h unistd.h inttypes.h sys/uio.h sys/param.h sysexits.h stdarg.h varargs.h krb5.h)
+ 
+ IPv6_CHECK_SS_FAMILY()
+ IPv6_CHECK_SA_LEN()
+diff --git a/plugins/cram.c b/plugins/cram.c
+index d02e9baa..695aaa91 100644
+--- a/plugins/cram.c
++++ b/plugins/cram.c
+@@ -53,6 +53,10 @@
+ #endif
+ #include <fcntl.h>
+ 
++#ifdef HAVE_TIME_H
++#include <time.h>
++#endif
++
+ #include <sasl.h>
+ #include <saslplug.h>
+ #include <saslutil.h>
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-networking/cyrus-sasl/cyrus-sasl_2.1.28.bb b/meta-openembedded/meta-oe/recipes-networking/cyrus-sasl/cyrus-sasl_2.1.28.bb
index e344733..3277fb1 100644
--- a/meta-openembedded/meta-oe/recipes-networking/cyrus-sasl/cyrus-sasl_2.1.28.bb
+++ b/meta-openembedded/meta-oe/recipes-networking/cyrus-sasl/cyrus-sasl_2.1.28.bb
@@ -15,6 +15,7 @@
            file://saslauthd.conf \
            file://CVE-2019-19906.patch \
 	   file://CVE-2022-24407.patch \
+           file://0001-Fix-time.h-check.patch \
            "
 
 UPSTREAM_CHECK_URI = "https://github.com/cyrusimap/cyrus-sasl/archives"
diff --git a/meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc/makefile-add-ldflags.patch b/meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc/makefile-add-ldflags.patch
index e902349..6e4803f 100644
--- a/meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc/makefile-add-ldflags.patch
+++ b/meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc/makefile-add-ldflags.patch
@@ -4,19 +4,17 @@
 
 Signed-off-by: Yi Zhao <yi.zhao@windriver.com>
 
-diff --git a/Makefile b/Makefile
-index 49d622d..cd17334 100644
 --- a/Makefile
 +++ b/Makefile
-@@ -48,18 +48,17 @@ CFLAGS = -Wall -W -O2
- CFLAGS_lib = $(CFLAGS) -fPIC
- CFLAGS_bin = $(CFLAGS) -fomit-frame-pointer
+@@ -68,18 +68,17 @@ XGETTEXT = xgettext
+ XGETTEXT_OPTS = --keyword=_ --keyword=P2_:1,1 --keyword=P3_:1,2 --language=C --add-comments
+ MSGMERGE = msgmerge
  
 -LDFLAGS =
- LDFLAGS_shared = --shared
- LDFLAGS_shared_LINUX = --shared
- LDFLAGS_shared_SUN = -G
- LDFLAGS_shared_HP = -b
+ LDFLAGS_shared = $(LDFLAGS) --shared
+ LDFLAGS_shared_LINUX = $(LDFLAGS) --shared
+ LDFLAGS_shared_SUN = $(LDFLAGS) -G
+ LDFLAGS_shared_HP = $(LDFLAGS) -b
  LDFLAGS_lib = $(LDFLAGS_shared)
 -LDFLAGS_lib_LINUX = $(LDFLAGS_shared_LINUX) \
 +LDFLAGS_lib_LINUX = $(LDFLAGS) $(LDFLAGS_shared_LINUX) \
diff --git a/meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc_1.3.1.bb b/meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc_2.0.2.bb
similarity index 88%
rename from meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc_1.3.1.bb
rename to meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc_2.0.2.bb
index b148fdc..8462701 100644
--- a/meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc_1.3.1.bb
+++ b/meta-openembedded/meta-oe/recipes-security/passwdqc/passwdqc_2.0.2.bb
@@ -25,13 +25,12 @@
 REQUIRED_DISTRO_FEATURES = "pam"
 
 LICENSE = "BSD-1-Clause"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=1b4af6f3d4ee079a38107366e93b334d"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=ac99c8678577a1c2f9f04cccee411d5d"
 
 SRC_URI = "http://www.openwall.com/${BPN}/${BP}.tar.gz \
            file://makefile-add-ldflags.patch \
           "
-SRC_URI[md5sum] = "3878b57bcd3fdbcf3d4b362dbc6228b9"
-SRC_URI[sha256sum] = "d1fedeaf759e8a0f32d28b5811ef11b5a5365154849190f4b7fab670a70ffb14"
+SRC_URI[sha256sum] = "ff1f505764c020f6a4484b1e0cc4fdbf2e3f71b522926d90b4709104ca0604ab"
 
 # explicitly define LINUX_PAM in case DISTRO_FEATURES no pam
 # this package's pam_passwdqc.so needs pam
@@ -58,7 +57,7 @@
 PROVIDES += "pam-${BPN}"
 PACKAGES =+ "lib${BPN} pam-${BPN}"
 
-FILES:lib${BPN} = "${base_libdir}/libpasswdqc.so.0"
+FILES:lib${BPN} = "${base_libdir}/libpasswdqc.so.1"
 FILES:pam-${BPN} = "${base_libdir}/security/pam_passwdqc.so"
 FILES:${PN}-dbg += "${base_libdir}/security/.debug"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/augeas/augeas.inc b/meta-openembedded/meta-oe/recipes-support/augeas/augeas.inc
deleted file mode 100644
index d46f388..0000000
--- a/meta-openembedded/meta-oe/recipes-support/augeas/augeas.inc
+++ /dev/null
@@ -1,33 +0,0 @@
-SUMMARY = "Augeas configuration API"
-HOMEPAGE = "http://augeas.net/"
-BUGTRACKER = "https://fedorahosted.org/augeas/report/1"
-
-LICENSE = "LGPL-2.1-or-later"
-LIC_FILES_CHKSUM = "file://COPYING;md5=bbb461211a33b134d42ed5ee802b37ff"
-
-SRC_URI = "http://download.augeas.net/${BP}.tar.gz \
-           file://sepbuildfix.patch \
-          "
-
-DEPENDS = "readline libxml2"
-
-inherit autotools pkgconfig
-
-PACKAGES =+ "${PN}-lenses lib${BPN}"
-
-FILES:${PN}-lenses = "${datadir}/augeas/lenses"
-FILES:lib${BPN} = "${libdir}/lib*${SOLIBS}"
-
-RDEPENDS:lib${BPN} += "${PN}-lenses"
-RRECOMMENDS:lib${BPN} += "${PN}"
-
-LEAD_SONAME = "libaugeas.so"
-
-do_install:append() {
-    rm -fr ${D}${datadir}/vim
-}
-
-PACKAGECONFIG ??= "${@bb.utils.filter('DISTRO_FEATURES', 'selinux', d)}"
-PACKAGECONFIG[selinux] = "--with-selinux,--without-selinux,libselinux"
-
-EXTRA_AUTORECONF += "-I ${S}/gnulib/m4"
diff --git a/meta-openembedded/meta-oe/recipes-support/augeas/augeas/0001-src-internal-Use-__GLIBC__-to-check-for-GNU-extentio.patch b/meta-openembedded/meta-oe/recipes-support/augeas/augeas/0001-src-internal-Use-__GLIBC__-to-check-for-GNU-extentio.patch
new file mode 100644
index 0000000..9424be2
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/augeas/augeas/0001-src-internal-Use-__GLIBC__-to-check-for-GNU-extentio.patch
@@ -0,0 +1,34 @@
+From e5ccf769c2dc7283b56a597fffdb5dc1558e3ce8 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Sat, 3 Sep 2022 01:23:28 -0700
+Subject: [PATCH] src/internal: Use __GLIBC__ to check for GNU extention
+ implementation
+
+__USE_GNU is defined by _GNU_SOURCE and configure explicitly sets this
+macro and it does have meaning on musl too, where it may have some level
+of GNU compatibility but strerror_r is not one of them. Therefore we
+have to check for libc implementation for this to be sure.
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/internal.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/internal.c b/src/internal.c
+index ef83b71..fb326fa 100644
+--- a/src/internal.c
++++ b/src/internal.c
+@@ -431,7 +431,7 @@ char *cleanpath(char *path) {
+ 
+ const char *xstrerror(int errnum, char *buf, size_t len) {
+ #ifdef HAVE_STRERROR_R
+-# ifdef __USE_GNU
++# ifdef __GLIBC__
+     /* Annoying linux specific API contract */
+     return strerror_r(errnum, buf, len);
+ # else
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/augeas/augeas_1.12.0.bb b/meta-openembedded/meta-oe/recipes-support/augeas/augeas_1.12.0.bb
index d32c469..2c6d4cd 100644
--- a/meta-openembedded/meta-oe/recipes-support/augeas/augeas_1.12.0.bb
+++ b/meta-openembedded/meta-oe/recipes-support/augeas/augeas_1.12.0.bb
@@ -1,4 +1,36 @@
-require augeas.inc
+SUMMARY = "Augeas configuration API"
+HOMEPAGE = "http://augeas.net/"
+BUGTRACKER = "https://fedorahosted.org/augeas/report/1"
 
+LICENSE = "LGPL-2.1-or-later"
+LIC_FILES_CHKSUM = "file://COPYING;md5=bbb461211a33b134d42ed5ee802b37ff"
+
+SRC_URI = "http://download.augeas.net/${BP}.tar.gz \
+           file://sepbuildfix.patch \
+           file://0001-src-internal-Use-__GLIBC__-to-check-for-GNU-extentio.patch \
+          "
 SRC_URI[md5sum] = "74f1c7b8550f4e728486091f6b907175"
 SRC_URI[sha256sum] = "321942c9cc32185e2e9cb72d0a70eea106635b50269075aca6714e3ec282cb87"
+
+DEPENDS = "readline libxml2"
+
+inherit autotools pkgconfig
+
+PACKAGES =+ "${PN}-lenses lib${BPN}"
+
+FILES:${PN}-lenses = "${datadir}/augeas/lenses"
+FILES:lib${BPN} = "${libdir}/lib*${SOLIBS}"
+
+RDEPENDS:lib${BPN} += "${PN}-lenses"
+RRECOMMENDS:lib${BPN} += "${PN}"
+
+LEAD_SONAME = "libaugeas.so"
+
+do_install:append() {
+    rm -fr ${D}${datadir}/vim
+}
+
+PACKAGECONFIG ??= "${@bb.utils.filter('DISTRO_FEATURES', 'selinux', d)}"
+PACKAGECONFIG[selinux] = "--with-selinux,--without-selinux,libselinux"
+
+EXTRA_AUTORECONF += "-I ${S}/gnulib/m4"
diff --git a/meta-openembedded/meta-oe/recipes-support/bdwgc/bdwgc_8.2.2.bb b/meta-openembedded/meta-oe/recipes-support/bdwgc/bdwgc_8.2.2.bb
index 622402a..d980ecc 100644
--- a/meta-openembedded/meta-oe/recipes-support/bdwgc/bdwgc_8.2.2.bb
+++ b/meta-openembedded/meta-oe/recipes-support/bdwgc/bdwgc_8.2.2.bb
@@ -30,9 +30,7 @@
 
 inherit autotools pkgconfig
 
-EXTRA_OECONF += "--enable-cpluscplus"
-
-CFLAGS:append:libc-musl = " -D_GNU_SOURCE -DNO_GETCONTEXT -DSEARCH_FOR_DATA_START -DUSE_MMAP -DHAVE_DL_ITERATE_PHDR"
+EXTRA_OECONF += "--enable-cplusplus"
 
 FILES:${PN}-doc = "${datadir}"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/boost-sml/boost-sml_git.bb b/meta-openembedded/meta-oe/recipes-support/boost-sml/boost-sml_git.bb
index e22eddb..c83181c 100644
--- a/meta-openembedded/meta-oe/recipes-support/boost-sml/boost-sml_git.bb
+++ b/meta-openembedded/meta-oe/recipes-support/boost-sml/boost-sml_git.bb
@@ -9,10 +9,10 @@
 
 DEPENDS += "boost qemu-native"
 
-PV = "1.1.5"
+PV = "1.1.6"
 
 SRC_URI = "git://github.com/boost-ext/sml.git;protocol=https;branch=master"
-SRCREV = "7ed8f85fbe5b5af87bfb818e9e0347aaf7f7056d"
+SRCREV = "23e9a24e434cac9922039cbb43ca54ab70bef72c"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/canonical-multitouch/geis/0001-provide-function-declaration-with-prototypes.patch b/meta-openembedded/meta-oe/recipes-support/canonical-multitouch/geis/0001-provide-function-declaration-with-prototypes.patch
new file mode 100644
index 0000000..ddad140
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/canonical-multitouch/geis/0001-provide-function-declaration-with-prototypes.patch
@@ -0,0 +1,690 @@
+From 05fe9751dfb370b0fb726e95bc36674468116db4 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 2 Sep 2022 16:57:51 -0700
+Subject: [PATCH] provide function declaration with prototypes
+
+Fixes build with clang-15+
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ libgeis/backend/grail/geis_grail_window_grab.c           | 2 +-
+ libgeis/backend/grail/geis_ugsubscription_store.c        | 2 +-
+ libgeis/backend/grail/geis_ugsubscription_store.h        | 2 +-
+ libgeis/backend/test_fixture/geis_backend_test_fixture.c | 4 ++--
+ libgeis/geis.c                                           | 2 +-
+ libgeis/geis_backend_multiplexor.c                       | 4 ++--
+ libgeis/geis_backend_multiplexor.h                       | 2 +-
+ libgeis/geis_class.c                                     | 2 +-
+ libgeis/geis_class.h                                     | 2 +-
+ libgeis/geis_device.c                                    | 2 +-
+ libgeis/geis_device.h                                    | 2 +-
+ libgeis/geis_event_queue.c                               | 2 +-
+ libgeis/geis_event_queue.h                               | 2 +-
+ libgeis/geis_filter.c                                    | 2 +-
+ libgeis/geis_filter.h                                    | 2 +-
+ libgeis/geis_filterable.c                                | 2 +-
+ libgeis/geis_filterable.h                                | 2 +-
+ libgeis/geis_frame.c                                     | 2 +-
+ libgeis/geis_frame.h                                     | 2 +-
+ libgeis/geis_group.c                                     | 2 +-
+ libgeis/geis_group.h                                     | 2 +-
+ libgeis/geis_region.c                                    | 2 +-
+ libgeis/geis_region.h                                    | 2 +-
+ libgeis/geis_touch.c                                     | 2 +-
+ libgeis/geis_touch.h                                     | 2 +-
+ libgeis/server/geis_dbus_client_proxy.c                  | 2 +-
+ libgeis/server/geis_dbus_proxy_box.c                     | 2 +-
+ libgeis/server/geis_dbus_proxy_box.h                     | 2 +-
+ libs/geis-dbus/geis_dbus_dispatcher.c                    | 2 +-
+ libs/geis-util/geis_logging.c                            | 2 +-
+ testsuite/geis1/check_gesture_attrs.c                    | 2 +-
+ testsuite/geis1/check_gesture_types.c                    | 2 +-
+ testsuite/geis1/check_instance.c                         | 2 +-
+ testsuite/geis1/check_subscription.c                     | 2 +-
+ testsuite/geis2/check_attr.c                             | 2 +-
+ testsuite/geis2/check_class.c                            | 2 +-
+ testsuite/geis2/check_config.c                           | 2 +-
+ testsuite/geis2/check_device.c                           | 2 +-
+ testsuite/geis2/check_error_codes.c                      | 2 +-
+ testsuite/geis2/check_event.c                            | 2 +-
+ testsuite/geis2/check_filter.c                           | 2 +-
+ testsuite/geis2/check_frame.c                            | 2 +-
+ testsuite/geis2/check_geis_new.c                         | 2 +-
+ testsuite/geis2/check_general_types.c                    | 2 +-
+ testsuite/geis2/check_region.c                           | 2 +-
+ testsuite/geis2/check_subscription.c                     | 2 +-
+ tools/geis-server/geis-server.c                          | 2 +-
+ 47 files changed, 49 insertions(+), 49 deletions(-)
+
+diff --git a/libgeis/backend/grail/geis_grail_window_grab.c b/libgeis/backend/grail/geis_grail_window_grab.c
+index 003adee..f8480ac 100644
+--- a/libgeis/backend/grail/geis_grail_window_grab.c
++++ b/libgeis/backend/grail/geis_grail_window_grab.c
+@@ -49,7 +49,7 @@ static const GeisFloat _geis_grail_window_grab_store_growth_factor = 1.7;
+ 
+ 
+ static GeisGrailWindowGrabStore
+-_window_grab_allocate()
++_window_grab_allocate(void)
+ {
+   GeisGrailWindowGrabStore wgs = malloc(sizeof(struct GeisGrailWindowGrabStore));
+   if (!wgs)
+diff --git a/libgeis/backend/grail/geis_ugsubscription_store.c b/libgeis/backend/grail/geis_ugsubscription_store.c
+index bb1f0bb..cb7e620 100644
+--- a/libgeis/backend/grail/geis_ugsubscription_store.c
++++ b/libgeis/backend/grail/geis_ugsubscription_store.c
+@@ -42,7 +42,7 @@ static const GeisFloat _geis_grail_ugsubscription_store_growth_factor = 1.7;
+  * Creates a new, empty grail subscription store.
+  */
+ GeisUGSubscriptionStore
+-geis_ugsubscription_store_new()
++geis_ugsubscription_store_new(void)
+ {
+   GeisUGSubscriptionStore store = geis_bag_new(sizeof(struct GeisUGSubscription),
+                                   _geis_grail_ugsubscription_store_default_size,
+diff --git a/libgeis/backend/grail/geis_ugsubscription_store.h b/libgeis/backend/grail/geis_ugsubscription_store.h
+index a93e1b6..06273ef 100644
+--- a/libgeis/backend/grail/geis_ugsubscription_store.h
++++ b/libgeis/backend/grail/geis_ugsubscription_store.h
+@@ -41,7 +41,7 @@ typedef GeisBag GeisUGSubscriptionStore;
+  * @returns a pointer to a valid store, or NULL to indicate failure.
+  */
+ GeisUGSubscriptionStore
+-geis_ugsubscription_store_new();
++geis_ugsubscription_store_new(void);
+ 
+ /**
+  * Destroys a grail subscription store.
+diff --git a/libgeis/backend/test_fixture/geis_backend_test_fixture.c b/libgeis/backend/test_fixture/geis_backend_test_fixture.c
+index 5912acb..099cde4 100644
+--- a/libgeis/backend/test_fixture/geis_backend_test_fixture.c
++++ b/libgeis/backend/test_fixture/geis_backend_test_fixture.c
+@@ -434,7 +434,7 @@ _token_free_subscription_pdata(GeisBackendToken gbtoken GEIS_UNUSED,
+ }
+ 
+ __attribute__((constructor))
+-static void _register_test_fixture()
++static void _register_test_fixture(void)
+ {
+   geis_register_backend(GEIS_INIT_MOCK_BACKEND,
+                         sizeof(struct GeisBackendTestFixture),
+@@ -444,6 +444,6 @@ static void _register_test_fixture()
+ 
+ /* A dummy routine to force linkage of this module without dlopening it */
+ void
+-geis_include_backend_test_fixture()
++geis_include_backend_test_fixture(void)
+ {
+ }
+diff --git a/libgeis/geis.c b/libgeis/geis.c
+index 4290d19..3f7bd12 100644
+--- a/libgeis/geis.c
++++ b/libgeis/geis.c
+@@ -402,7 +402,7 @@ geis_filterable_attribute_foreach(Geis geis,
+  * Creates a new empty Geis API instance.
+  */
+ static Geis
+-geis_new_empty()
++geis_new_empty(void)
+ {
+   geis_error_clear(NULL);
+   Geis geis = calloc(1, sizeof(struct _Geis));
+diff --git a/libgeis/geis_backend_multiplexor.c b/libgeis/geis_backend_multiplexor.c
+index 3f80080..0f76198 100644
+--- a/libgeis/geis_backend_multiplexor.c
++++ b/libgeis/geis_backend_multiplexor.c
+@@ -60,7 +60,7 @@ struct _GeisBackendMultiplexor
+  * Creates a new container for callback info.
+  */
+ static CallbackInfoBag
+-_callback_info_bag_new()
++_callback_info_bag_new(void)
+ {
+   CallbackInfoBag cbib = calloc(1, sizeof(struct CallbackInfoBag));
+   if (!cbib)
+@@ -209,7 +209,7 @@ _callback_info_bag_release(CallbackInfoBag cbib, int fd)
+  * Creates a new backend multiplexor.
+  */
+ GeisBackendMultiplexor
+-geis_backend_multiplexor_new()
++geis_backend_multiplexor_new(void)
+ {
+   GeisBackendMultiplexor mx = calloc(1, sizeof(struct _GeisBackendMultiplexor));
+   if (!mx)
+diff --git a/libgeis/geis_backend_multiplexor.h b/libgeis/geis_backend_multiplexor.h
+index ff4c318..8e7102e 100644
+--- a/libgeis/geis_backend_multiplexor.h
++++ b/libgeis/geis_backend_multiplexor.h
+@@ -74,7 +74,7 @@ typedef void (*GeisBackendFdEventCallback)(int                             fd,
+ /**
+  * Constructs a new back end multiplexor.
+  */
+-GeisBackendMultiplexor geis_backend_multiplexor_new();
++GeisBackendMultiplexor geis_backend_multiplexor_new(void);
+ 
+ /**
+  * A reasonable default value for the max_events_per_pump parameter to
+diff --git a/libgeis/geis_class.c b/libgeis/geis_class.c
+index e889e4b..5c26a87 100644
+--- a/libgeis/geis_class.c
++++ b/libgeis/geis_class.c
+@@ -46,7 +46,7 @@ static const int gesture_class_bag_growth_constant = 2;
+  * Creates a new class bag,
+  */
+ GeisGestureClassBag
+-geis_gesture_class_bag_new()
++geis_gesture_class_bag_new(void)
+ {
+   GeisGestureClassBag bag = calloc(1, sizeof(struct _GeisGestureClassBag));
+   if (!bag)
+diff --git a/libgeis/geis_class.h b/libgeis/geis_class.h
+index 1d60998..724c353 100644
+--- a/libgeis/geis_class.h
++++ b/libgeis/geis_class.h
+@@ -35,7 +35,7 @@ typedef struct _GeisGestureClassBag *GeisGestureClassBag;
+ /**
+  * Creates a new class bag,
+  */
+-GeisGestureClassBag geis_gesture_class_bag_new();
++GeisGestureClassBag geis_gesture_class_bag_new(void);
+ 
+ /**
+  * Destroys a gesture class bag.
+diff --git a/libgeis/geis_device.c b/libgeis/geis_device.c
+index 1f61fc1..ff3216b 100644
+--- a/libgeis/geis_device.c
++++ b/libgeis/geis_device.c
+@@ -44,7 +44,7 @@ static const int device_bag_growth_constant = 2;
+ 
+ 
+ GeisDeviceBag
+-geis_device_bag_new()
++geis_device_bag_new(void)
+ {
+   GeisDeviceBag bag = calloc(1, sizeof(struct _GeisDeviceBag));
+   if (!bag)
+diff --git a/libgeis/geis_device.h b/libgeis/geis_device.h
+index c411e54..707943b 100644
+--- a/libgeis/geis_device.h
++++ b/libgeis/geis_device.h
+@@ -35,7 +35,7 @@ typedef struct _GeisDeviceBag *GeisDeviceBag;
+ /**
+  * Creates a new device bag,
+  */
+-GeisDeviceBag geis_device_bag_new();
++GeisDeviceBag geis_device_bag_new(void);
+ 
+ /**
+  * Destroys a device bag.
+diff --git a/libgeis/geis_event_queue.c b/libgeis/geis_event_queue.c
+index 0b7d0fb..f357683 100644
+--- a/libgeis/geis_event_queue.c
++++ b/libgeis/geis_event_queue.c
+@@ -50,7 +50,7 @@ struct _GeisEventQueue
+  * Creates a new Geis Event queue.
+  */
+ GeisEventQueue
+-geis_event_queue_new()
++geis_event_queue_new(void)
+ {
+   GeisEventQueue queue = calloc(1, sizeof(struct _GeisEventQueue));
+   if (!queue)
+diff --git a/libgeis/geis_event_queue.h b/libgeis/geis_event_queue.h
+index e4c186d..5f3da1d 100644
+--- a/libgeis/geis_event_queue.h
++++ b/libgeis/geis_event_queue.h
+@@ -41,7 +41,7 @@ typedef struct _GeisEventQueue *GeisEventQueue;
+ /**
+  * Creates a new Geis Event queue.
+  */
+-GeisEventQueue geis_event_queue_new();
++GeisEventQueue geis_event_queue_new(void);
+ 
+ /**
+  * Destroys a Geis Event queue.
+diff --git a/libgeis/geis_filter.c b/libgeis/geis_filter.c
+index 4fe5da6..83d65a8 100644
+--- a/libgeis/geis_filter.c
++++ b/libgeis/geis_filter.c
+@@ -57,7 +57,7 @@ static GeisSize s_filter_oid = 0;
+  * Creates a new filter bag,
+  */
+ GeisFilterBag
+-geis_filter_bag_new()
++geis_filter_bag_new(void)
+ {
+   GeisFilterBag bag = calloc(1, sizeof(struct _GeisFilterBag));
+   if (!bag)
+diff --git a/libgeis/geis_filter.h b/libgeis/geis_filter.h
+index 7b613bf..f825bb7 100644
+--- a/libgeis/geis_filter.h
++++ b/libgeis/geis_filter.h
+@@ -42,7 +42,7 @@ typedef GeisFilter *GeisFilterIterator;
+ /**
+  * Creates a new filter bag,
+  */
+-GeisFilterBag geis_filter_bag_new();
++GeisFilterBag geis_filter_bag_new(void);
+ 
+ /**
+  * Destroys a filter bag.
+diff --git a/libgeis/geis_filterable.c b/libgeis/geis_filterable.c
+index 57bd4ac..5fb4b0c 100644
+--- a/libgeis/geis_filterable.c
++++ b/libgeis/geis_filterable.c
+@@ -40,7 +40,7 @@ struct FilterableAttributeBag
+  * Constructs a new filterable attribute bag.
+  */
+ FilterableAttributeBag
+-geis_filterable_attribute_bag_new()
++geis_filterable_attribute_bag_new(void)
+ {
+   FilterableAttributeBag bag = calloc(1, sizeof(struct FilterableAttributeBag));
+   if (!bag)
+diff --git a/libgeis/geis_filterable.h b/libgeis/geis_filterable.h
+index 6d400fc..63c563d 100644
+--- a/libgeis/geis_filterable.h
++++ b/libgeis/geis_filterable.h
+@@ -86,7 +86,7 @@ geis_filterable_attribute_init(GeisFilterableAttribute fa,
+  * Constructs a new filterable attribute bag.
+  */
+ FilterableAttributeBag
+-geis_filterable_attribute_bag_new();
++geis_filterable_attribute_bag_new(void);
+ 
+ /**
+  * Destroys a filterable attribute bag.
+diff --git a/libgeis/geis_frame.c b/libgeis/geis_frame.c
+index ebe11a1..068d9aa 100644
+--- a/libgeis/geis_frame.c
++++ b/libgeis/geis_frame.c
+@@ -57,7 +57,7 @@ struct _GeisFrameSet
+  * Creates a new, empty frame set.
+  */
+ GeisFrameSet
+-geis_frameset_new()
++geis_frameset_new(void)
+ {
+   GeisFrameSet frameset = calloc(1, sizeof(struct _GeisFrameSet));
+   if (!frameset)
+diff --git a/libgeis/geis_frame.h b/libgeis/geis_frame.h
+index e0d1eb6..4a4b0ba 100644
+--- a/libgeis/geis_frame.h
++++ b/libgeis/geis_frame.h
+@@ -32,7 +32,7 @@ typedef struct _GeisFrameSet *GeisFrameSet;
+ /**
+  * Creates a new, empty frame set.
+  */
+-GeisFrameSet geis_frameset_new();
++GeisFrameSet geis_frameset_new(void);
+ 
+ /**
+  * Destroys a frame set and all framees contained in it.
+diff --git a/libgeis/geis_group.c b/libgeis/geis_group.c
+index 0829bd4..4c51a39 100644
+--- a/libgeis/geis_group.c
++++ b/libgeis/geis_group.c
+@@ -42,7 +42,7 @@ struct _GeisGroupSet
+  * Creates a new, empty group set.
+  */
+ GeisGroupSet
+-geis_groupset_new()
++geis_groupset_new(void)
+ {
+   GeisGroupSet groupset = calloc(1, sizeof(struct _GeisGroupSet));
+   if (!groupset)
+diff --git a/libgeis/geis_group.h b/libgeis/geis_group.h
+index 76e0efc..c44ce62 100644
+--- a/libgeis/geis_group.h
++++ b/libgeis/geis_group.h
+@@ -31,7 +31,7 @@
+ /**
+  * Creates a new, empty group set.
+  */
+-GeisGroupSet geis_groupset_new();
++GeisGroupSet geis_groupset_new(void);
+ 
+ /**
+  * Destroys a group set and all groups contained in it.
+diff --git a/libgeis/geis_region.c b/libgeis/geis_region.c
+index fc8cc1e..18181cb 100644
+--- a/libgeis/geis_region.c
++++ b/libgeis/geis_region.c
+@@ -52,7 +52,7 @@ static const int region_bag_growth_constant = 2;
+  * Constructs a region bag.
+  */
+ GeisRegionBag
+-geis_region_bag_new()
++geis_region_bag_new(void)
+ {
+   GeisRegionBag bag = calloc(1, sizeof(struct _GeisRegionBag));
+   if (!bag)
+diff --git a/libgeis/geis_region.h b/libgeis/geis_region.h
+index 53a52b8..18d908f 100644
+--- a/libgeis/geis_region.h
++++ b/libgeis/geis_region.h
+@@ -49,7 +49,7 @@ typedef struct _GeisRegionBag *GeisRegionBag;
+ /**
+  * Creates a new region bag.
+  */
+-GeisRegionBag geis_region_bag_new();
++GeisRegionBag geis_region_bag_new(void);
+ 
+ /**
+  * Destroys a region bag.
+diff --git a/libgeis/geis_touch.c b/libgeis/geis_touch.c
+index 74d1b3e..be0f1a7 100644
+--- a/libgeis/geis_touch.c
++++ b/libgeis/geis_touch.c
+@@ -43,7 +43,7 @@ struct _GeisTouchSet
+  * Creates a new, empty touch set.
+  */
+ GeisTouchSet
+-geis_touchset_new()
++geis_touchset_new(void)
+ {
+   GeisTouchSet touchset = calloc(1, sizeof(struct _GeisTouchSet));
+   if (!touchset)
+diff --git a/libgeis/geis_touch.h b/libgeis/geis_touch.h
+index 62af610..b64c755 100644
+--- a/libgeis/geis_touch.h
++++ b/libgeis/geis_touch.h
+@@ -30,7 +30,7 @@
+ /**
+  * Creates a new, empty touch set.
+  */
+-GeisTouchSet geis_touchset_new();
++GeisTouchSet geis_touchset_new(void);
+ 
+ /**
+  * Destroys a touch set and all touches contained in it.
+diff --git a/libgeis/server/geis_dbus_client_proxy.c b/libgeis/server/geis_dbus_client_proxy.c
+index 3f1af47..65f229b 100644
+--- a/libgeis/server/geis_dbus_client_proxy.c
++++ b/libgeis/server/geis_dbus_client_proxy.c
+@@ -52,7 +52,7 @@ struct GeisDBusClientProxy
+  * failure.
+  */
+ GeisDBusClientProxy
+-_client_proxy_allocate()
++_client_proxy_allocate(void)
+ {
+   GeisDBusClientProxy proxy = calloc(1, sizeof(struct GeisDBusClientProxy));
+   return proxy;
+diff --git a/libgeis/server/geis_dbus_proxy_box.c b/libgeis/server/geis_dbus_proxy_box.c
+index 4c7752a..f129bc3 100644
+--- a/libgeis/server/geis_dbus_proxy_box.c
++++ b/libgeis/server/geis_dbus_proxy_box.c
+@@ -49,7 +49,7 @@ struct GeisDBusProxyBox
+  * Constructs a %GeisDBusProxyBox.
+  */
+ GeisDBusProxyBox
+-geis_dbus_proxy_box_new()
++geis_dbus_proxy_box_new(void)
+ {
+   GeisDBusProxyBox box = calloc(1, sizeof(struct GeisDBusProxyBox));
+   if (!box)
+diff --git a/libgeis/server/geis_dbus_proxy_box.h b/libgeis/server/geis_dbus_proxy_box.h
+index d8a70f5..47e84b7 100644
+--- a/libgeis/server/geis_dbus_proxy_box.h
++++ b/libgeis/server/geis_dbus_proxy_box.h
+@@ -47,7 +47,7 @@ typedef struct GeisDBusProxyBoxNode *GeisDBusProxyBoxIterator;
+  * @returns a valid %GeisDBusProxyBox or NULL on failure.
+  */
+ GeisDBusProxyBox
+-geis_dbus_proxy_box_new();
++geis_dbus_proxy_box_new(void);
+ 
+ /**
+  * Destroys a %GeisDBusProxyBox.
+diff --git a/libs/geis-dbus/geis_dbus_dispatcher.c b/libs/geis-dbus/geis_dbus_dispatcher.c
+index 10c4134..d8a3f9d 100644
+--- a/libs/geis-dbus/geis_dbus_dispatcher.c
++++ b/libs/geis-dbus/geis_dbus_dispatcher.c
+@@ -82,7 +82,7 @@ struct GeisDBusDispatcher
+  * assumption that if you're creating a bag you're going to use it.
+  */
+ static GeisDBusWatchBag 
+-_geis_dbus_watch_bag_new()
++_geis_dbus_watch_bag_new(void)
+ {
+   GeisDBusWatchBag bag = calloc(1, sizeof(struct GeisDBusWatchBag));
+   if (!bag)
+diff --git a/libs/geis-util/geis_logging.c b/libs/geis-util/geis_logging.c
+index a02077b..030a591 100644
+--- a/libs/geis-util/geis_logging.c
++++ b/libs/geis-util/geis_logging.c
+@@ -31,7 +31,7 @@ static const char *error_marker   = "error";
+ 
+ 
+ static int
+-reporting_level()
++reporting_level(void)
+ {
+   char *level = getenv("GEIS_DEBUG");
+   if (level)
+diff --git a/testsuite/geis1/check_gesture_attrs.c b/testsuite/geis1/check_gesture_attrs.c
+index 2228d2f..d990c7c 100644
+--- a/testsuite/geis1/check_gesture_attrs.c
++++ b/testsuite/geis1/check_gesture_attrs.c
+@@ -169,7 +169,7 @@ END_TEST
+ 
+ 
+ Suite *
+-geis1_gesture_attrs_new()
++geis1_gesture_attrs_new(void)
+ {
+   Suite *s = suite_create("geis1_gesture_attrs");
+   TCase *test;
+diff --git a/testsuite/geis1/check_gesture_types.c b/testsuite/geis1/check_gesture_types.c
+index 3492062..980f8fb 100644
+--- a/testsuite/geis1/check_gesture_types.c
++++ b/testsuite/geis1/check_gesture_types.c
+@@ -62,7 +62,7 @@ START_TEST(gesture_types)
+ END_TEST
+ 
+ Suite *
+-geis1_gesture_types_new()
++geis1_gesture_types_new(void)
+ {
+   Suite *s = suite_create("geis1_gesture_types");
+   TCase *test;
+diff --git a/testsuite/geis1/check_instance.c b/testsuite/geis1/check_instance.c
+index ac897ab..584abfd 100644
+--- a/testsuite/geis1/check_instance.c
++++ b/testsuite/geis1/check_instance.c
+@@ -44,7 +44,7 @@ END_TEST
+ 
+ 
+ Suite *
+-geis1_instance_suite_new()
++geis1_instance_suite_new(void)
+ {
+   TCase *create;
+   Suite *s = suite_create("geis1_instance_suite");
+diff --git a/testsuite/geis1/check_subscription.c b/testsuite/geis1/check_subscription.c
+index 0797337..e69c332 100644
+--- a/testsuite/geis1/check_subscription.c
++++ b/testsuite/geis1/check_subscription.c
+@@ -146,7 +146,7 @@ END_TEST
+ 
+ 
+ Suite *
+-geis1_subscription_new()
++geis1_subscription_new(void)
+ {
+   Suite *s = suite_create("geis1_subscription");
+   TCase *test;
+diff --git a/testsuite/geis2/check_attr.c b/testsuite/geis2/check_attr.c
+index 9e01219..58c8ebc 100644
+--- a/testsuite/geis2/check_attr.c
++++ b/testsuite/geis2/check_attr.c
+@@ -42,7 +42,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_attr_suite_new()
++geis2_attr_suite_new(void)
+ {
+   Suite *s = suite_create("geis2_attr");
+ 
+diff --git a/testsuite/geis2/check_class.c b/testsuite/geis2/check_class.c
+index ca7accd..b60d827 100644
+--- a/testsuite/geis2/check_class.c
++++ b/testsuite/geis2/check_class.c
+@@ -92,7 +92,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_gesture_class_suite_new()
++geis2_gesture_class_suite_new(void)
+ {
+   TCase *gesture_class;
+   TCase *usage;
+diff --git a/testsuite/geis2/check_config.c b/testsuite/geis2/check_config.c
+index a9fc9c1..03b3137 100644
+--- a/testsuite/geis2/check_config.c
++++ b/testsuite/geis2/check_config.c
+@@ -48,7 +48,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_config_suite_new()
++geis2_config_suite_new(void)
+ {
+   Suite *s = suite_create("geis2_configuration");
+ 
+diff --git a/testsuite/geis2/check_device.c b/testsuite/geis2/check_device.c
+index 8b3b7be..9583cb5 100644
+--- a/testsuite/geis2/check_device.c
++++ b/testsuite/geis2/check_device.c
+@@ -99,7 +99,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_device_suite_new()
++geis2_device_suite_new(void)
+ {
+   TCase *device;
+   TCase *usage;
+diff --git a/testsuite/geis2/check_error_codes.c b/testsuite/geis2/check_error_codes.c
+index 9c7508d..57e7fc7 100644
+--- a/testsuite/geis2/check_error_codes.c
++++ b/testsuite/geis2/check_error_codes.c
+@@ -20,7 +20,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_error_codes_suite_new()
++geis2_error_codes_suite_new(void)
+ {
+   Suite *s = suite_create("geis2_error_codes");
+ 
+diff --git a/testsuite/geis2/check_event.c b/testsuite/geis2/check_event.c
+index 8ecda1d..c752a30 100644
+--- a/testsuite/geis2/check_event.c
++++ b/testsuite/geis2/check_event.c
+@@ -43,7 +43,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_event_suite_new()
++geis2_event_suite_new(void)
+ {
+   Suite *s = suite_create("geis2_event");
+ 
+diff --git a/testsuite/geis2/check_filter.c b/testsuite/geis2/check_filter.c
+index 3272a2a..afa23e5 100644
+--- a/testsuite/geis2/check_filter.c
++++ b/testsuite/geis2/check_filter.c
+@@ -67,7 +67,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_filter_suite_new()
++geis2_filter_suite_new(void)
+ {
+   TCase *filter;
+   TCase *usage;
+diff --git a/testsuite/geis2/check_frame.c b/testsuite/geis2/check_frame.c
+index dce6faf..3ea3db0 100644
+--- a/testsuite/geis2/check_frame.c
++++ b/testsuite/geis2/check_frame.c
+@@ -172,7 +172,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_gesture_frame_suite_new()
++geis2_gesture_frame_suite_new(void)
+ {
+   Suite *s = suite_create("geis2_gesture_frame");
+   TCase *gesture_frame;
+diff --git a/testsuite/geis2/check_geis_new.c b/testsuite/geis2/check_geis_new.c
+index 1fb8de2..36cd3e9 100644
+--- a/testsuite/geis2/check_geis_new.c
++++ b/testsuite/geis2/check_geis_new.c
+@@ -32,7 +32,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_geis_new_suite_new()
++geis2_geis_new_suite_new(void)
+ {
+   Suite *s = suite_create("geis2_geis_init");
+ 
+diff --git a/testsuite/geis2/check_general_types.c b/testsuite/geis2/check_general_types.c
+index 79e29bb..0f463d4 100644
+--- a/testsuite/geis2/check_general_types.c
++++ b/testsuite/geis2/check_general_types.c
+@@ -20,7 +20,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_general_types_suite_new()
++geis2_general_types_suite_new(void)
+ {
+   Suite *s = suite_create("geis2_general_types");
+   TCase *create;
+diff --git a/testsuite/geis2/check_region.c b/testsuite/geis2/check_region.c
+index 3e64e0c..606350a 100644
+--- a/testsuite/geis2/check_region.c
++++ b/testsuite/geis2/check_region.c
+@@ -48,7 +48,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_region_suite_new()
++geis2_region_suite_new(void)
+ {
+   Suite *s = suite_create("geis2_region");
+   TCase *creation;
+diff --git a/testsuite/geis2/check_subscription.c b/testsuite/geis2/check_subscription.c
+index a45c4ad..17a6779 100644
+--- a/testsuite/geis2/check_subscription.c
++++ b/testsuite/geis2/check_subscription.c
+@@ -161,7 +161,7 @@ END_TEST
+ 
+ /* boilerplate */
+ Suite *
+-geis2_subscription_suite_new()
++geis2_subscription_suite_new(void)
+ {
+   TCase *create;
+   TCase *usage;
+diff --git a/tools/geis-server/geis-server.c b/tools/geis-server/geis-server.c
+index 97c8b99..6600d8f 100644
+--- a/tools/geis-server/geis-server.c
++++ b/tools/geis-server/geis-server.c
+@@ -23,7 +23,7 @@
+ 
+ 
+ int
+-main()
++main(void)
+ {
+   GeisStatus status;
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/canonical-multitouch/geis_2.2.17.bb b/meta-openembedded/meta-oe/recipes-support/canonical-multitouch/geis_2.2.17.bb
index a1a28a8..e7f3b51 100644
--- a/meta-openembedded/meta-oe/recipes-support/canonical-multitouch/geis_2.2.17.bb
+++ b/meta-openembedded/meta-oe/recipes-support/canonical-multitouch/geis_2.2.17.bb
@@ -22,6 +22,7 @@
 SRC_URI = "https://launchpad.net/${BPN}/trunk/${PV}/+download/${BPN}-${PV}.tar.xz \
            file://fix-indentation-for-gcc6.patch \
            file://0001-libgeis-Compare-the-first-character-of-string-to-nul.patch \
+           file://0001-provide-function-declaration-with-prototypes.patch \
            "
 
 UPSTREAM_CHECK_URI = "https://launchpad.net/geis/trunk"
diff --git a/meta-openembedded/meta-oe/recipes-support/ckermit/ckermit/0001-Fix-function-prototype-errors.patch b/meta-openembedded/meta-oe/recipes-support/ckermit/ckermit/0001-Fix-function-prototype-errors.patch
new file mode 100644
index 0000000..306a2ee
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/ckermit/ckermit/0001-Fix-function-prototype-errors.patch
@@ -0,0 +1,106 @@
+From f73eade85b7a1b93f7b9ef6ca9ead9d2441f8f84 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 19:01:38 -0700
+Subject: [PATCH] Fix function prototype errors
+
+Clang 15 is strict, therefore ensure that right headers are pulled in
+and also right function prototypes are used.
+
+Upstream-Status: Pending
+
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ ckcmai.c | 4 ++++
+ ckucmd.c | 2 +-
+ ckucmd.h | 2 --
+ ckucon.c | 2 +-
+ ckufio.c | 2 +-
+ ckuusx.c | 2 +-
+ 6 files changed, 8 insertions(+), 6 deletions(-)
+
+diff --git a/ckcmai.c b/ckcmai.c
+index a5640e5..a4e322d 100644
+--- a/ckcmai.c
++++ b/ckcmai.c
+@@ -561,6 +561,10 @@ ACKNOWLEDGMENTS:
+ #include "ckntap.h"
+ #endif /* NT */
+ 
++#ifndef VMS
++#include <time.h> /* time() */
++#endif
++
+ #ifndef NOSERVER
+ /* Text message definitions.. each should be 256 chars long, or less. */
+ #ifdef MINIX
+diff --git a/ckucmd.c b/ckucmd.c
+index 274dc2d..730f20d 100644
+--- a/ckucmd.c
++++ b/ckucmd.c
+@@ -7356,7 +7356,7 @@ cmdgetc(timelimit) int timelimit; {	/* Get a character from the tty. */
+   Returns 0 or greater always.
+ */
+ int
+-cmdconchk() {
++cmdconchk(void) {
+     int x = 0, y;
+     y = pushc ? 1 : 0;			/* Have command character pushed? */
+ #ifdef OS2
+diff --git a/ckucmd.h b/ckucmd.h
+index 7cd4ced..5c43cc4 100644
+--- a/ckucmd.h
++++ b/ckucmd.h
+@@ -280,9 +280,7 @@ _PROTOTYP( int cmdsquo, (int) );
+ _PROTOTYP( int cmdgquo, (void) );
+ _PROTOTYP( char * ckcvtdate, (char *, int) );
+ _PROTOTYP( int cmdgetc, (int));
+-#ifndef NOARROWKEYS
+ _PROTOTYP( int cmdconchk, (void) );
+-#endif /* NOARROWKEYS */
+ 
+ #ifdef CK_RECALL
+ _PROTOTYP( char * cmgetcmd, (char *) );
+diff --git a/ckucon.c b/ckucon.c
+index 50ceb7e..954719e 100644
+--- a/ckucon.c
++++ b/ckucon.c
+@@ -39,8 +39,8 @@ _PROTOTYP( static VOID concld, (void) );
+ 
+ #ifdef NEXT
+ #undef NSIG
+-#include <sys/wait.h>			/* For wait() */
+ #endif /* NEXT */
++#include <sys/wait.h>			/* For wait() */
+ 
+ #include <signal.h>			/* Signals */
+ 
+diff --git a/ckufio.c b/ckufio.c
+index b5bfaae..2a8d4e5 100644
+--- a/ckufio.c
++++ b/ckufio.c
+@@ -142,8 +142,8 @@ _PROTOTYP( int parser, ( int ) );
+ 
+ #ifdef UNIX                             /* Pointer arg to wait() allowed */
+ #define CK_CHILD                        /* Assume this is safe in all UNIX */
++#include <sys/wait.h>			/* wait() API */
+ #endif /* UNIX */
+-
+ extern int binary, recursive, stathack;
+ #ifdef CK_CTRLZ
+ extern int eofmethod;
+diff --git a/ckuusx.c b/ckuusx.c
+index d332bed..253f992 100644
+--- a/ckuusx.c
++++ b/ckuusx.c
+@@ -9144,7 +9144,7 @@ char *s;        /* a string */
+ #ifndef CK_CURPOS
+ /* Dummies for when cursor control is not supported */
+ int
+-ck_curpos(row, col) {
++ck_curpos(int row, int col) {
+     return(-1);
+ }
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/ckermit/ckermit_302.bb b/meta-openembedded/meta-oe/recipes-support/ckermit/ckermit_302.bb
index abeeecc..53f2b9d 100644
--- a/meta-openembedded/meta-oe/recipes-support/ckermit/ckermit_302.bb
+++ b/meta-openembedded/meta-oe/recipes-support/ckermit/ckermit_302.bb
@@ -8,7 +8,9 @@
 LICENSE = "BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://COPYING.TXT;md5=932ca542d6c6cb8a59a0bcd76ab67cc3"
 
-SRC_URI = "http://www.kermitproject.org/ftp/kermit/archives/cku${PV}.tar.gz;subdir=${BPN}-${PV}"
+SRC_URI = "http://www.kermitproject.org/ftp/kermit/archives/cku${PV}.tar.gz;subdir=${BPN}-${PV} \
+           file://0001-Fix-function-prototype-errors.patch \
+           "
 SRC_URI[md5sum] = "eac4dbf18b45775e4cdee5a7c74762b0"
 SRC_URI[sha256sum] = "0d5f2cd12bdab9401b4c836854ebbf241675051875557783c332a6a40dac0711"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/cli11/cli11/0001-Do-not-download-the-catch-framework-during-configure.patch b/meta-openembedded/meta-oe/recipes-support/cli11/cli11/0001-Do-not-download-the-catch-framework-during-configure.patch
new file mode 100644
index 0000000..7e6611b
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/cli11/cli11/0001-Do-not-download-the-catch-framework-during-configure.patch
@@ -0,0 +1,41 @@
+From 93557d5c5acf9ebb2c0fa708658e1d36621dd23a Mon Sep 17 00:00:00 2001
+From: Wang Mingyu <wangmy@fujitsu.com>
+Date: Wed, 7 Sep 2022 10:05:15 +0900
+Subject: [PATCH] Do not download the catch framework during configure.
+
+Upstream-Status: Inappropriate [configuration]
+
+Signed-off-by: Wang Mingyu <wangmy@fujitsu.com>
+---
+ tests/CMakeLists.txt | 15 ---------------
+ 1 file changed, 15 deletions(-)
+
+diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt
+index 80c4f6a..ae210db 100644
+--- a/tests/CMakeLists.txt
++++ b/tests/CMakeLists.txt
+@@ -74,21 +74,6 @@ if(Catch2_FOUND)
+   endif()
+   message(STATUS "Found Catch2")
+   target_link_libraries(catch_main PUBLIC Catch2::Catch2)
+-else()
+-  message(STATUS "Downloading Catch2")
+-
+-  # FetchContent would be better, but requires newer CMake.
+-  file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/catch2")
+-  set(url https://github.com/philsquared/Catch/releases/download/v2.13.7/catch.hpp)
+-  file(
+-    DOWNLOAD ${url} "${CMAKE_CURRENT_BINARY_DIR}/catch2/catch.hpp"
+-    STATUS status
+-    EXPECTED_HASH SHA256=ea379c4a3cb5799027b1eb451163dff065a3d641aaba23bf4e24ee6b536bd9bc)
+-  list(GET status 0 error)
+-  if(error)
+-    message(FATAL_ERROR "Could not download ${url}, and Catch2 not found on your system.")
+-  endif()
+-  target_include_directories(catch_main PUBLIC "${CMAKE_CURRENT_BINARY_DIR}")
+ endif()
+ 
+ # Target must already exist
+-- 
+2.25.1
+
diff --git a/meta-openembedded/meta-oe/recipes-support/cli11/cli11_1.9.1.bb b/meta-openembedded/meta-oe/recipes-support/cli11/cli11_1.9.1.bb
deleted file mode 100644
index 7de3335..0000000
--- a/meta-openembedded/meta-oe/recipes-support/cli11/cli11_1.9.1.bb
+++ /dev/null
@@ -1,17 +0,0 @@
-SUMMARY = "C++11 command line parser"
-DESCRIPTION = "A command line parser for C++11 and beyond that provides a rich feature set with a simple and intuitive interface."
-HOMEPAGE = "https://github.com/CLIUtils/CLI11"
-LICENSE = "BSD-3-Clause"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=b73927b18d5c6cd8d2ed28a6ad539733"
-SRCREV = "5cb3efabce007c3a0230e4cc2e27da491c646b6c"
-PV .= "+git${SRCPV}"
-
-SRC_URI += "gitsm://github.com/CLIUtils/CLI11;branch=v1;protocol=https"
-
-S = "${WORKDIR}/git"
-
-inherit cmake
-inherit ptest
-
-# cli11 is a header only C++ library, so the main package will be empty.
-RDEPENDS:${PN}-dev = ""
diff --git a/meta-openembedded/meta-oe/recipes-support/cli11/cli11_2.2.0.bb b/meta-openembedded/meta-oe/recipes-support/cli11/cli11_2.2.0.bb
new file mode 100644
index 0000000..53a5748
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/cli11/cli11_2.2.0.bb
@@ -0,0 +1,25 @@
+SUMMARY = "C++11 command line parser"
+DESCRIPTION = "A command line parser for C++11 and beyond that provides a rich feature set with a simple and intuitive interface."
+HOMEPAGE = "https://github.com/CLIUtils/CLI11"
+LICENSE = "BSD-3-Clause"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=9ad746b5f49c0fd53c08ca1faff1922c"
+SRCREV = "b9be5b9444772324459989177108a6a65b8b2769"
+PV .= "+git${SRCPV}"
+
+SRC_URI += "gitsm://github.com/CLIUtils/CLI11;branch=main;protocol=https \
+            https://github.com/philsquared/Catch/releases/download/v2.13.7/catch.hpp \
+            file://0001-Do-not-download-the-catch-framework-during-configure.patch"
+
+SRC_URI[sha256sum] = "ea379c4a3cb5799027b1eb451163dff065a3d641aaba23bf4e24ee6b536bd9bc"
+S = "${WORKDIR}/git"
+
+do_configure:prepend() {
+    mkdir -p ${S}/tests/catch2
+    cp ${DL_DIR}/catch.hpp ${S}/tests/catch2/catch.hpp 
+}
+
+inherit cmake
+inherit ptest
+
+# cli11 is a header only C++ library, so the main package will be empty.
+RDEPENDS:${PN}-dev = ""
diff --git a/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools/0001-Fix-signature-of-main-function.patch b/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools/0001-Fix-signature-of-main-function.patch
new file mode 100644
index 0000000..19f0ae7
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools/0001-Fix-signature-of-main-function.patch
@@ -0,0 +1,209 @@
+From 269f18d5e6698fdd34fec2798f10c6fe072f3cd5 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 23:36:52 -0700
+Subject: [PATCH] Fix signature of main function
+
+Clang-15 errors out otherewise.
+Include needed headers for missing functions
+
+Upstream-Status:Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/chkshsgr.c    | 2 +-
+ src/tai64n.c      | 2 +-
+ src/tai64nlocal.c | 2 +-
+ src/trycpp.c      | 5 +++--
+ src/tryflock.c    | 4 ++--
+ src/trymkffo.c    | 2 +-
+ src/trypoll.c     | 3 ++-
+ src/trysgact.c    | 4 ++--
+ src/trysgprm.c    | 4 ++--
+ src/tryshsgr.c    | 3 ++-
+ src/tryulong64.c  | 4 ++--
+ src/trywaitp.c    | 4 ++--
+ src/x86cpuid.c    | 2 +-
+ 13 files changed, 22 insertions(+), 19 deletions(-)
+
+diff --git a/src/chkshsgr.c b/src/chkshsgr.c
+index 038afe9..9547a4c 100644
+--- a/src/chkshsgr.c
++++ b/src/chkshsgr.c
+@@ -5,7 +5,7 @@
+ #include <grp.h>
+ #include <unistd.h>
+ 
+-int main()
++int main(int argc, char *argv[])
+ {
+   gid_t x[4];
+ 
+diff --git a/src/tai64n.c b/src/tai64n.c
+index 17bdb82..7096ab3 100644
+--- a/src/tai64n.c
++++ b/src/tai64n.c
+@@ -27,7 +27,7 @@ buffer in = BUFFER_INIT(myread,0,inbuf,sizeof inbuf);
+ 
+ char stamp[TIMESTAMP + 1];
+ 
+-int main()
++int main(int argc, char *argv[])
+ {
+   char ch;
+ 
+diff --git a/src/tai64nlocal.c b/src/tai64nlocal.c
+index ce16ad8..2435737 100644
+--- a/src/tai64nlocal.c
++++ b/src/tai64nlocal.c
+@@ -28,7 +28,7 @@ unsigned long nanosecs;
+ unsigned long u;
+ struct tm *t;
+ 
+-int main()
++int main(int argc, char *argv[])
+ {
+   char ch;
+ 
+diff --git a/src/trycpp.c b/src/trycpp.c
+index e4503d4..d96c955 100644
+--- a/src/trycpp.c
++++ b/src/trycpp.c
+@@ -1,6 +1,7 @@
+ /* Public domain. */
+-
+-int main()
++#include <stdio.h>
++#include <stdlib.h>
++int main(int argc, char *argv[])
+ {
+ #ifdef NeXT
+   printf("nextstep\n"); exit(0);
+diff --git a/src/tryflock.c b/src/tryflock.c
+index a82ffc2..5ca97d3 100644
+--- a/src/tryflock.c
++++ b/src/tryflock.c
+@@ -3,8 +3,8 @@
+ #include <sys/types.h>
+ #include <sys/file.h>
+ #include <fcntl.h>
+-
+-main()
++void
++main(int argc, char *argv[])
+ {
+   flock(0,LOCK_EX | LOCK_UN | LOCK_NB);
+ }
+diff --git a/src/trymkffo.c b/src/trymkffo.c
+index 9356342..f92414a 100644
+--- a/src/trymkffo.c
++++ b/src/trymkffo.c
+@@ -3,7 +3,7 @@
+ #include <sys/types.h>
+ #include <sys/stat.h>
+ 
+-void main()
++void main(int argc, char *argv[])
+ {
+   mkfifo("temp-trymkffo",0);
+ }
+diff --git a/src/trypoll.c b/src/trypoll.c
+index 6506617..cb888cc 100644
+--- a/src/trypoll.c
++++ b/src/trypoll.c
+@@ -3,8 +3,9 @@
+ #include <sys/types.h>
+ #include <fcntl.h>
+ #include <poll.h>
++#include <unistd.h>
+ 
+-int main()
++int main(int argc, char *argv[])
+ {
+   struct pollfd x;
+ 
+diff --git a/src/trysgact.c b/src/trysgact.c
+index e264ef2..25da013 100644
+--- a/src/trysgact.c
++++ b/src/trysgact.c
+@@ -1,8 +1,8 @@
+ /* Public domain. */
+ 
+ #include <signal.h>
+-
+-main()
++void
++main(int argc, char *argv[])
+ {
+   struct sigaction sa;
+   sa.sa_handler = 0;
+diff --git a/src/trysgprm.c b/src/trysgprm.c
+index a46c82c..5a9491c 100644
+--- a/src/trysgprm.c
++++ b/src/trysgprm.c
+@@ -1,8 +1,8 @@
+ /* Public domain. */
+ 
+ #include <signal.h>
+-
+-main()
++void
++main(int argc, char *argv[])
+ {
+   sigset_t ss;
+  
+diff --git a/src/tryshsgr.c b/src/tryshsgr.c
+index c5ed6d6..d111e40 100644
+--- a/src/tryshsgr.c
++++ b/src/tryshsgr.c
+@@ -1,6 +1,7 @@
+ /* Public domain. */
+ 
+-int main()
++#include <unistd.h>
++int main(int argc, char *argv[])
+ {
+   short x[4];
+  
+diff --git a/src/tryulong64.c b/src/tryulong64.c
+index 003548a..20a3a40 100644
+--- a/src/tryulong64.c
++++ b/src/tryulong64.c
+@@ -1,6 +1,6 @@
+ /* Public domain. */
+-
+-int main()
++#include <unistd.h>
++int main(int argc, char *argv[])
+ {
+   unsigned long u;
+   u = 1;
+diff --git a/src/trywaitp.c b/src/trywaitp.c
+index 319b81f..90bc5aa 100644
+--- a/src/trywaitp.c
++++ b/src/trywaitp.c
+@@ -2,8 +2,8 @@
+ 
+ #include <sys/types.h>
+ #include <sys/wait.h>
+-
+-main()
++void
++main(int argc, char *argv[])
+ {
+   waitpid(0,0,0);
+ }
+diff --git a/src/x86cpuid.c b/src/x86cpuid.c
+index f81c593..1cb1ea6 100644
+--- a/src/x86cpuid.c
++++ b/src/x86cpuid.c
+@@ -7,7 +7,7 @@ void nope()
+   exit(1);
+ }
+ 
+-int main()
++int main(int argc, char *argv[])
+ {
+   unsigned long x[4];
+   unsigned long y[4];
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools/warnings.patch b/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools/warnings.patch
new file mode 100644
index 0000000..2fd18f6
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools/warnings.patch
@@ -0,0 +1,74 @@
+Fixup misc warnings
+
+Patch by RiverRat
+
+http://bugs.gentoo.org/124487
+
+--- a/src/chkshsgr.c
++++ b/src/chkshsgr.c
+@@ -1,10 +1,13 @@
+ /* Public domain. */
+ 
++#include <sys/types.h>
++#include <stdlib.h>
++#include <grp.h>
+ #include <unistd.h>
+ 
+ int main()
+ {
+-  short x[4];
++  gid_t x[4];
+ 
+   x[0] = x[1] = 0;
+   if (getgroups(1,x) == 0) if (setgroups(1,x) == -1) _exit(1);
+--- a/src/matchtest.c
++++ b/src/matchtest.c
+@@ -1,3 +1,4 @@
++#include <unistd.h>
+ #include "match.h"
+ #include "buffer.h"
+ #include "str.h"
+--- a/src/multilog.c
++++ b/src/multilog.c
+@@ -1,3 +1,4 @@
++#include <stdio.h>
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+--- a/src/prot.c
++++ b/src/prot.c
+@@ -1,5 +1,8 @@
+ /* Public domain. */
+ 
++#include <sys/types.h>
++#include <unistd.h>
++#include <grp.h>
+ #include "hasshsgr.h"
+ #include "prot.h"
+ 
+--- a/src/seek_set.c
++++ b/src/seek_set.c
+@@ -1,6 +1,7 @@
+ /* Public domain. */
+ 
+ #include <sys/types.h>
++#include <unistd.h>
+ #include "seek.h"
+ 
+ #define SET 0 /* sigh */
+--- a/src/supervise.c
++++ b/src/supervise.c
+@@ -1,3 +1,4 @@
++#include <stdio.h>
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+--- a/src/pathexec_run.c
++++ b/src/pathexec_run.c
+@@ -1,5 +1,6 @@
+ /* Public domain. */
+ 
++#include <unistd.h>
+ #include "error.h"
+ #include "stralloc.h"
+ #include "str.h"
diff --git a/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools_0.76.bb b/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools_0.76.bb
index ed73408..a35b54e 100644
--- a/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools_0.76.bb
+++ b/meta-openembedded/meta-oe/recipes-support/daemontools/daemontools_0.76.bb
@@ -15,11 +15,15 @@
 LIC_FILES_CHKSUM = "file://src/prot.c;beginline=1;endline=1;md5=96964cadf07e8f8c1e2ffb3b507dd647"
 LICENSE = "PD"
 
+DEPENDS += "coreutils"
+
 SRC_URI = "http://cr.yp.to/daemontools/${BPN}-${PV}.tar.gz \
     file://0001-error.h-include-errno.h-instead-of-extern-int.diff \
     file://0002-supervise.c-.-supervise-may-be-a-symlink-if-it-s-da.diff \
     file://cross-compile.patch \
     file://0001-daemontools-Fix-QA-Issue.patch \
+    file://warnings.patch \
+    file://0001-Fix-signature-of-main-function.patch \
 "
 
 SRC_URI[md5sum] = "1871af2453d6e464034968a0fbcb2bfc"
diff --git a/meta-openembedded/meta-oe/recipes-support/emacs/emacs_27.2.bb b/meta-openembedded/meta-oe/recipes-support/emacs/emacs_28.2.bb
similarity index 92%
rename from meta-openembedded/meta-oe/recipes-support/emacs/emacs_27.2.bb
rename to meta-openembedded/meta-oe/recipes-support/emacs/emacs_28.2.bb
index 4a7e7ab..7b0a422 100644
--- a/meta-openembedded/meta-oe/recipes-support/emacs/emacs_27.2.bb
+++ b/meta-openembedded/meta-oe/recipes-support/emacs/emacs_28.2.bb
@@ -5,11 +5,10 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=1ebbd3e34237af26da5dc08a4e440464"
 
 SRC_URI = "https://ftp.gnu.org/pub/gnu/emacs/emacs-${PV}.tar.xz \
-           file://emacs-glibc-2.34.patch \
           "
 SRC_URI:append:class-target = " file://usemake-docfile-native.patch"
 
-SRC_URI[sha256sum] = "b4a7cc4e78e63f378624e0919215b910af5bb2a0afc819fad298272e9f40c1b9"
+SRC_URI[sha256sum] = "ee21182233ef3232dc97b486af2d86e14042dbb65bbc535df562c3a858232488"
 
 CVE_CHECK_IGNORE = "\
     CVE-2007-6109 \
@@ -23,13 +22,23 @@
 
 # We could use --without-all but its better to
 # split it into several packages (size of minimal doesnt change)
-EXTRA_OECONF = " --with-x=no --with-dumping=none"
+EXTRA_OECONF = " --with-x=no --with-dumping=none --disable-build-details"
+
+# Disable seccomp, as its a default dependency for gnutls but it doesnt work when cross-compiling emacs
+EXTRA_OECONF:append = " ${@bb.utils.contains('PACKAGECONFIG', 'gnutls', 'ac_cv_have_decl_SECCOMP_FILTER_FLAG_TSYNC=no ac_cv_have_decl_SECCOMP_SET_MODE_FILTER=no', '', d)}"
+
 
 DEPENDS = "ncurses"
 DEPENDS:append:class-target = " emacs-native"
 
 inherit autotools mime-xdg pkgconfig
 
+# Remove build host references to avoid target pollution
+do_compile:prepend () {
+    sed -i -e 's|${TMPDIR}||g' ${B}/src/config.h
+    sed -i -e 's|${B}||g' ${B}/src/epaths.h
+}
+
 do_compile:class-native (){
     cd ${B}/lib-src
     oe_runmake make-docfile
@@ -53,7 +62,6 @@
     chown -R root:root ${D}${datadir}
 }
 
-
 # Use a similar strategy to how we build python:
 # Create three packages
 # minimal - A working lisp based text editor
@@ -68,191 +76,194 @@
 
 
 # A minimal version of emacs that works
+# These are kept sorted in alphabetical order
 FILES:${PN}-minimal = " \
-    ${datadir}/${BPN}/${PV}/lisp/loadup.el \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/byte-run.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/backquote.elc \
-    ${datadir}/${BPN}/${PV}/lisp/subr.elc \
-    ${datadir}/${BPN}/${PV}/lisp/version.elc \
-    ${datadir}/${BPN}/${PV}/lisp/widget.elc \
-    ${datadir}/${BPN}/${PV}/lisp/custom.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/map-ynp.elc \
-    ${datadir}/${BPN}/${PV}/lisp/international/mule.elc \
-    ${datadir}/${BPN}/${PV}/lisp/international/mule-conf.elc \
-    ${datadir}/${BPN}/${PV}/lisp/env.elc \
-    ${datadir}/${BPN}/${PV}/lisp/format.elc \
-    ${datadir}/${BPN}/${PV}/lisp/bindings.elc \
-    ${datadir}/${BPN}/${PV}/lisp/window.elc \
-    ${datadir}/${BPN}/${PV}/lisp/files.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/macroexp.elc \
-    ${datadir}/${BPN}/${PV}/lisp/cus-face.elc \
-    ${datadir}/${BPN}/${PV}/lisp/faces.elc \
-    ${datadir}/${BPN}/${PV}/lisp/button.elc \
-    ${datadir}/${BPN}/${PV}/lisp/loaddefs.el \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/nadvice.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-preloaded.elc \
-    ${datadir}/${BPN}/${PV}/lisp/obarray.elc \
+    ${bindir}/emacs* \
+    ${datadir}/${BPN}/${PV}/etc/charsets/ \
     ${datadir}/${BPN}/${PV}/lisp/abbrev.elc \
-    ${datadir}/${BPN}/${PV}/lisp/simple.elc \
-    ${datadir}/${BPN}/${PV}/lisp/jka-cmpr-hook.elc \
-    ${datadir}/${BPN}/${PV}/lisp/epa-hook.elc \
-    ${datadir}/${BPN}/${PV}/lisp/international/mule-cmds.elc \
+    ${datadir}/${BPN}/${PV}/lisp/bindings.elc \
+    ${datadir}/${BPN}/${PV}/lisp/buff-menu.elc \
+    ${datadir}/${BPN}/${PV}/lisp/button.elc \
     ${datadir}/${BPN}/${PV}/lisp/case-table.elc \
-    ${datadir}/${BPN}/${PV}/lisp/international/charprop.el \
-    ${datadir}/${BPN}/${PV}/lisp/international/characters.elc \
-    ${datadir}/${BPN}/${PV}/lisp/international/charscript.elc \
     ${datadir}/${BPN}/${PV}/lisp/composite.elc \
+    ${datadir}/${BPN}/${PV}/lisp/cus-face.elc \
+    ${datadir}/${BPN}/${PV}/lisp/cus-start.elc \
+    ${datadir}/${BPN}/${PV}/lisp/custom.elc \
+    ${datadir}/${BPN}/${PV}/lisp/disp-table.elc \
+    ${datadir}/${BPN}/${PV}/lisp/electric.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/backquote.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/byte-opt.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/byte-run.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/bytecomp.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cconv.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-generic.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-preloaded.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/easymenu.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/eldoc.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/float-sup.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/gv.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/lisp-mode.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/lisp.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/macroexp.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/map-ynp.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/map.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/nadvice.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/regexp-opt.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/seq.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/shorthands.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/subr-x.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/syntax.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/tabulated-list.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/timer.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/vc/warnings.elc \
+    ${datadir}/${BPN}/${PV}/lisp/env.elc \
+    ${datadir}/${BPN}/${PV}/lisp/epa-hook.elc \
+    ${datadir}/${BPN}/${PV}/lisp/facemenu.elc \
+    ${datadir}/${BPN}/${PV}/lisp/faces.elc \
+    ${datadir}/${BPN}/${PV}/lisp/files.elc \
+    ${datadir}/${BPN}/${PV}/lisp/font-core.elc \
+    ${datadir}/${BPN}/${PV}/lisp/font-lock.elc \
+    ${datadir}/${BPN}/${PV}/lisp/format.elc \
+    ${datadir}/${BPN}/${PV}/lisp/frame.elc \
+    ${datadir}/${BPN}/${PV}/lisp/help.elc \
+    ${datadir}/${BPN}/${PV}/lisp/image.elc \
+    ${datadir}/${BPN}/${PV}/lisp/indent.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/characters.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/charprop.el \
+    ${datadir}/${BPN}/${PV}/lisp/international/charscript.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/cp51932.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/emoji-zwj.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/eucjp-ms.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/iso-transl.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/mule-cmds.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/mule-conf.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/mule.elc \
+    ${datadir}/${BPN}/${PV}/lisp/international/uni*.el \
+    ${datadir}/${BPN}/${PV}/lisp/isearch.elc \
+    ${datadir}/${BPN}/${PV}/lisp/jit-lock.elc \
+    ${datadir}/${BPN}/${PV}/lisp/jka-cmpr-hook.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/burmese.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/cham.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/chinese.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/cyrillic.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/indian.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/sinhala.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/czech.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/english.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/ethiopic.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/european.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/czech.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/slovak.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/romanian.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/georgian.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/greek.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/hebrew.elc \
-    ${datadir}/${BPN}/${PV}/lisp/international/cp51932.elc \
-    ${datadir}/${BPN}/${PV}/lisp/international/eucjp-ms.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/indian.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/japanese.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/khmer.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/korean.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/lao.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/misc-lang.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/romanian.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/sinhala.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/slovak.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/tai-viet.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/thai.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/tibetan.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/vietnamese.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/misc-lang.elc \
     ${datadir}/${BPN}/${PV}/lisp/language/utf-8-lang.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/georgian.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/khmer.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/burmese.elc \
-    ${datadir}/${BPN}/${PV}/lisp/language/cham.elc \
-    ${datadir}/${BPN}/${PV}/lisp/indent.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-generic.elc \
-    ${datadir}/${BPN}/${PV}/lisp/minibuffer.elc \
-    ${datadir}/${BPN}/${PV}/lisp/frame.elc \
-    ${datadir}/${BPN}/${PV}/lisp/startup.elc \
-    ${datadir}/${BPN}/${PV}/lisp/term/tty-colors.elc \
-    ${datadir}/${BPN}/${PV}/lisp/font-core.elc \
-    ${datadir}/${BPN}/${PV}/lisp/facemenu.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/syntax.elc \
-    ${datadir}/${BPN}/${PV}/lisp/font-lock.elc \
-    ${datadir}/${BPN}/${PV}/lisp/jit-lock.elc \
-    ${datadir}/${BPN}/${PV}/lisp/mouse.elc \
-    ${datadir}/${BPN}/${PV}/lisp/select.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/timer.elc \
-    ${datadir}/${BPN}/${PV}/lisp/isearch.elc \
-    ${datadir}/${BPN}/${PV}/lisp/rfn-eshadow.elc \
-    ${datadir}/${BPN}/${PV}/lisp/menu-bar.elc \
-    ${datadir}/${BPN}/${PV}/lisp/tab-bar.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/lisp.elc \
-    ${datadir}/${BPN}/${PV}/lisp/textmodes/page.elc \
-    ${datadir}/${BPN}/${PV}/lisp/register.elc \
-    ${datadir}/${BPN}/${PV}/lisp/textmodes/paragraphs.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/prog-mode.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/lisp-mode.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/elisp-mode.elc \
-    ${datadir}/${BPN}/${PV}/lisp/textmodes/text-mode.elc \
-    ${datadir}/${BPN}/${PV}/lisp/textmodes/fill.elc \
-    ${datadir}/${BPN}/${PV}/lisp/newcomment.elc \
-    ${datadir}/${BPN}/${PV}/lisp/replace.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/tabulated-list.elc \
-    ${datadir}/${BPN}/${PV}/lisp/buff-menu.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/float-sup.elc \
-    ${datadir}/${BPN}/${PV}/lisp/vc/vc-hooks.elc \
-    ${datadir}/${BPN}/${PV}/lisp/vc/ediff-hook.elc \
-    ${datadir}/${BPN}/${PV}/lisp/uniquify.elc \
-    ${datadir}/${BPN}/${PV}/lisp/electric.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/eldoc.elc \
-    ${datadir}/${BPN}/${PV}/lisp/cus-start.elc \
-    ${datadir}/${BPN}/${PV}/lisp/tooltip.elc \
-    ${datadir}/${BPN}/${PV}/lisp/simple.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/regexp-opt.elc \
-    ${datadir}/${BPN}/${PV}/lisp/term/xterm.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/bytecomp.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cconv.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/gv.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/byte-opt.elc \
-    ${datadir}/${BPN}/${PV}/lisp/image.elc \
+    ${datadir}/${BPN}/${PV}/lisp/language/vietnamese.elc \
     ${datadir}/${BPN}/${PV}/lisp/ldefs-boot.el \
-    ${datadir}/${BPN}/${PV}/lisp/help.elc \
-    ${datadir}/${BPN}/${PV}/lisp/international/uni*.el \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/vc/warnings.elc \
-    ${datadir}/${BPN}/${PV}/etc/charsets/ \
-    ${datadir}/${BPN}/${PV}/lisp/disp-table.elc \
-    ${bindir}/emacs* \
+    ${datadir}/${BPN}/${PV}/lisp/loaddefs.el \
+    ${datadir}/${BPN}/${PV}/lisp/loadup.el \
+    ${datadir}/${BPN}/${PV}/lisp/menu-bar.elc \
+    ${datadir}/${BPN}/${PV}/lisp/minibuffer.elc \
+    ${datadir}/${BPN}/${PV}/lisp/mouse.elc \
+    ${datadir}/${BPN}/${PV}/lisp/newcomment.elc \
+    ${datadir}/${BPN}/${PV}/lisp/obarray.elc \
+    ${datadir}/${BPN}/${PV}/lisp/paren.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/elisp-mode.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/prog-mode.elc \
+    ${datadir}/${BPN}/${PV}/lisp/register.elc \
+    ${datadir}/${BPN}/${PV}/lisp/replace.elc \
+    ${datadir}/${BPN}/${PV}/lisp/rfn-eshadow.elc \
+    ${datadir}/${BPN}/${PV}/lisp/select.elc \
+    ${datadir}/${BPN}/${PV}/lisp/simple.elc \
+    ${datadir}/${BPN}/${PV}/lisp/simple.elc \
+    ${datadir}/${BPN}/${PV}/lisp/startup.elc \
+    ${datadir}/${BPN}/${PV}/lisp/subr.elc \
+    ${datadir}/${BPN}/${PV}/lisp/tab-bar.elc \
+    ${datadir}/${BPN}/${PV}/lisp/term/tty-colors.elc \
+    ${datadir}/${BPN}/${PV}/lisp/term/xterm.elc \
+    ${datadir}/${BPN}/${PV}/lisp/textmodes/fill.elc \
+    ${datadir}/${BPN}/${PV}/lisp/textmodes/page.elc \
+    ${datadir}/${BPN}/${PV}/lisp/textmodes/paragraphs.elc \
+    ${datadir}/${BPN}/${PV}/lisp/textmodes/text-mode.elc \
+    ${datadir}/${BPN}/${PV}/lisp/tooltip.elc \
+    ${datadir}/${BPN}/${PV}/lisp/uniquify.elc \
+    ${datadir}/${BPN}/${PV}/lisp/vc/ediff-hook.elc \
+    ${datadir}/${BPN}/${PV}/lisp/vc/vc-hooks.elc \
+    ${datadir}/${BPN}/${PV}/lisp/version.elc \
+    ${datadir}/${BPN}/${PV}/lisp/widget.elc \
+    ${datadir}/${BPN}/${PV}/lisp/window.elc \
     ${prefix}/libexec \
 "
 
-
 # What works for "most" is relative, but this can be easily extended if needed
 FILES:${PN}-base = " \
-    ${datadir}/${BPN}/${PV}/etc/srecode \
     ${datadir}/${BPN}/${PV}/etc/e \
     ${datadir}/${BPN}/${PV}/etc/forms \
-    ${datadir}/${BPN}/${PV}/lisp/cedet \
-    ${datadir}/${BPN}/${PV}/site-lisp/ \
-    ${datadir}/${BPN}/${PV}/lisp/subdirs.el \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-mode.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-defs.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-vars.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-engine.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-styles.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-fonts.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-cmds.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-align.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-menus.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-guess.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-lib.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-macs.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/pcase.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/inline.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-seq.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/easymenu.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/python* \
-    ${datadir}/${BPN}/${PV}/lisp/ansi-color.elc \
-    ${datadir}/${BPN}/${PV}/lisp/comint.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/ring.elc \
-    ${datadir}/${BPN}/${PV}/lisp/json.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/map.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/seq.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/subr-x.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/seq.elc \
-    ${datadir}/${BPN}/${PV}/lisp/net/tramp-sh.elc \
-    ${datadir}/${BPN}/${PV}/lisp/net/tramp.elc \
-    ${datadir}/${BPN}/${PV}/lisp/net/tramp-compat.elc \
-    ${datadir}/${BPN}/${PV}/lisp/auth-source.elc \
-    ${datadir}/${BPN}/${PV}/lisp/password-cache.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/eieio.elc \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/eieio-core.elc \
-    ${datadir}/${BPN}/${PV}/lisp/format-spec.elc \
-    ${datadir}/${BPN}/${PV}/lisp/ls-lisp.elc \
-    ${datadir}/${BPN}/${PV}/lisp/calendar/parse-time.elc \
-    ${datadir}/${BPN}/${PV}/lisp/calendar/iso8601.elc \
-    ${datadir}/${BPN}/${PV}/lisp/calendar/time-date.elc \
-    ${datadir}/${BPN}/${PV}/lisp/shell.elc \
-    ${datadir}/${BPN}/${PV}/lisp/pcomplete.elc \
-    ${datadir}/${BPN}/${PV}/lisp/net/tramp-integration.elc \
-    ${datadir}/${BPN}/${PV}/lisp/files-x.elc \
-    ${datadir}/${BPN}/${PV}/lisp/net/trampver.elc \
-    ${datadir}/${BPN}/${PV}/lisp/net/tramp-loaddefs.el \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/*perl* \
-    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/smie.elc \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/*asm* \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/cpp* \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/make* \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/sh-script* \
+    ${datadir}/${BPN}/${PV}/etc/srecode \
     ${datadir}/${BPN}/${PV}/etc/themes/adwaita-theme.el \
-    ${datadir}/${BPN}/${PV}/etc/themes/wheatgrass-theme.el \
     ${datadir}/${BPN}/${PV}/etc/themes/deeper-blue-theme.el \
     ${datadir}/${BPN}/${PV}/etc/themes/light-blue-theme.el \
     ${datadir}/${BPN}/${PV}/etc/themes/misterioso-theme.el \
     ${datadir}/${BPN}/${PV}/etc/themes/tango-theme.el \
+    ${datadir}/${BPN}/${PV}/etc/themes/wheatgrass-theme.el \
     ${datadir}/${BPN}/${PV}/etc/themes/wombat-theme.el \
-    ${datadir}/${BPN}/${PV}/lisp/progmodes/prog* \
+    ${datadir}/${BPN}/${PV}/lisp/ansi-color.elc \
+    ${datadir}/${BPN}/${PV}/lisp/auth-source.elc \
+    ${datadir}/${BPN}/${PV}/lisp/calendar/iso8601.elc \
+    ${datadir}/${BPN}/${PV}/lisp/calendar/parse-time.elc \
+    ${datadir}/${BPN}/${PV}/lisp/calendar/time-date.elc \
+    ${datadir}/${BPN}/${PV}/lisp/cedet \
+    ${datadir}/${BPN}/${PV}/lisp/comint.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-lib.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-macs.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/cl-seq.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/eieio-core.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/eieio.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/inline.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/pcase.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/ring.elc \
+    ${datadir}/${BPN}/${PV}/lisp/emacs-lisp/smie.elc \
+    ${datadir}/${BPN}/${PV}/lisp/files-x.elc \
+    ${datadir}/${BPN}/${PV}/lisp/format-spec.elc \
+    ${datadir}/${BPN}/${PV}/lisp/json.elc \
+    ${datadir}/${BPN}/${PV}/lisp/ls-lisp.elc \
+    ${datadir}/${BPN}/${PV}/lisp/net/tramp-compat.elc \
+    ${datadir}/${BPN}/${PV}/lisp/net/tramp-integration.elc \
+    ${datadir}/${BPN}/${PV}/lisp/net/tramp-loaddefs.el \
+    ${datadir}/${BPN}/${PV}/lisp/net/tramp-sh.elc \
+    ${datadir}/${BPN}/${PV}/lisp/net/tramp.elc \
+    ${datadir}/${BPN}/${PV}/lisp/net/trampver.elc \
+    ${datadir}/${BPN}/${PV}/lisp/password-cache.elc \
+    ${datadir}/${BPN}/${PV}/lisp/pcomplete.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/*asm* \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/*perl* \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-align.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-cmds.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-defs.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-engine.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-fonts.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-guess.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-menus.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-mode.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-styles.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cc-vars.elc \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/cpp* \
     ${datadir}/${BPN}/${PV}/lisp/progmodes/executable* \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/make* \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/prog* \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/python* \
+    ${datadir}/${BPN}/${PV}/lisp/progmodes/sh-script* \
+    ${datadir}/${BPN}/${PV}/lisp/shell.elc \
+    ${datadir}/${BPN}/${PV}/lisp/subdirs.el \
+    ${datadir}/${BPN}/${PV}/site-lisp/ \
 "
 
 # Restore FILES for the full package to catch everything left
diff --git a/meta-openembedded/meta-oe/recipes-support/emacs/files/emacs-glibc-2.34.patch b/meta-openembedded/meta-oe/recipes-support/emacs/files/emacs-glibc-2.34.patch
deleted file mode 100644
index 6d764aa..0000000
--- a/meta-openembedded/meta-oe/recipes-support/emacs/files/emacs-glibc-2.34.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-From f97e07ea807cc6d38774a3888a15091b20645ac6 Mon Sep 17 00:00:00 2001
-From: Paul Eggert <eggert@cs.ucla.edu>
-Date: Tue, 9 Mar 2021 11:22:59 -0800
-Subject: [PATCH] Port alternate signal stack to upcoming glibc 2.34
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-* src/sysdep.c (sigsegv_stack): Increase size to 64 KiB and align
-it to max_align_t.  This copies from Gnulib’s c-stack.c, and works
-around a portability bug in draft glibc 2.34, which no longer
-defines SIGSTKSZ when _GNU_SOURCE is defined.
----
- src/sysdep.c | 10 +++++++++-
- 1 file changed, 9 insertions(+), 1 deletion(-)
-
-diff --git a/src/sysdep.c b/src/sysdep.c
-index 941b4e2fa2..24d8832b2f 100644
---- a/src/sysdep.c
-+++ b/src/sysdep.c
-@@ -1785,7 +1785,15 @@ handle_arith_signal (int sig)
- 
- /* Alternate stack used by SIGSEGV handler below.  */
- 
--static unsigned char sigsegv_stack[SIGSTKSZ];
-+/* Storage for the alternate signal stack.
-+   64 KiB is not too large for Emacs, and is large enough
-+   for all known platforms.  Smaller sizes may run into trouble.
-+   For example, libsigsegv 2.6 through 2.8 have a bug where some
-+   architectures use more than the Linux default of an 8 KiB alternate
-+   stack when deciding if a fault was caused by stack overflow.  */
-+static max_align_t sigsegv_stack[(64 * 1024
-+				  + sizeof (max_align_t) - 1)
-+				 / sizeof (max_align_t)];
- 
- 
- /* Return true if SIGINFO indicates a stack overflow.  */
--- 
-2.29.2
-
diff --git a/meta-openembedded/meta-oe/recipes-support/emacs/files/usemake-docfile-native.patch b/meta-openembedded/meta-oe/recipes-support/emacs/files/usemake-docfile-native.patch
index cc0b05c..95b4281 100644
--- a/meta-openembedded/meta-oe/recipes-support/emacs/files/usemake-docfile-native.patch
+++ b/meta-openembedded/meta-oe/recipes-support/emacs/files/usemake-docfile-native.patch
@@ -11,11 +11,11 @@
 
 Signed-off-by: Alejandro Enedino Hernandez Samaniego <alejandro@enedino.org>
 
-Index: emacs-27.1/src/Makefile.in
+Index: emacs-28.2/src/Makefile.in
 ===================================================================
---- emacs-27.1.orig/src/Makefile.in
-+++ emacs-27.1/src/Makefile.in
-@@ -472,7 +472,7 @@ ifeq ($(CHECK_STRUCTS),true)
+--- emacs-28.2.orig/src/Makefile.in
++++ emacs-28.2/src/Makefile.in
+@@ -462,7 +462,7 @@ ifeq ($(CHECK_STRUCTS),true)
  pdumper.o: dmpstruct.h
  endif
  dmpstruct.h: $(srcdir)/dmpstruct.awk
@@ -24,7 +24,7 @@
  	$(AM_V_GEN)POSIXLY_CORRECT=1 awk -f $(srcdir)/dmpstruct.awk \
  		$(dmpstruct_headers) > $@
  
-@@ -566,8 +566,7 @@ ${lispintdir}/characters.elc: ${charscri
+@@ -563,8 +563,7 @@ SYSTEM_TYPE = @SYSTEM_TYPE@
  ## Strictly speaking, emacs does not depend directly on all of $lisp,
  ## since not all pieces are used on all platforms.  But DOC depends
  ## on all of $lisp, and emacs depends on DOC, so it is ok to use $lisp here.
@@ -32,9 +32,9 @@
 -                lisp.mk $(etc)/DOC $(lisp) \
 +emacs$(EXEEXT): lisp.mk $(etc)/DOC $(lisp) \
                  $(lispsource)/international/charprop.el ${charsets}
- ifeq ($(DUMPING),unexec)
- 	LC_ALL=C $(RUN_TEMACS) -batch $(BUILD_DETAILS) -l loadup --temacs=dump
-@@ -596,15 +595,15 @@ endif
+ ifeq ($(SYSTEM_TYPE),cygwin)
+ 	find ${top_builddir} -name '*.eln' | rebase -v -O -T -
+@@ -597,15 +596,15 @@ endif
  ## for the first time, this prevents any variation between configurations
  ## in the contents of the DOC file.
  ##
@@ -54,9 +54,9 @@
    $(lib)/libgnu.a
  	$(MAKE) -C $(dir $@) $(notdir $@)
  
-@@ -622,8 +621,8 @@ am__v_GLOBALS_ = $(am__v_GLOBALS_@AM_DEF
- am__v_GLOBALS_0 = @echo "  GEN     " globals.h;
- am__v_GLOBALS_1 =
+@@ -618,8 +617,8 @@ buildobj.h: Makefile
+ 
+ GLOBAL_SOURCES = $(base_obj:.o=.c) $(NS_OBJC_OBJ:.o=.m)
  
 -gl-stamp: $(libsrc)/make-docfile$(EXEEXT) $(GLOBAL_SOURCES)
 -	$(AM_V_GLOBALS)$(libsrc)/make-docfile -d $(srcdir) -g $(obj) > globals.tmp
@@ -65,7 +65,7 @@
  	$(AM_V_at)$(top_srcdir)/build-aux/move-if-change globals.tmp globals.h
  	$(AM_V_at)echo timestamp > $@
  
-@@ -637,7 +636,7 @@ $(LIBEGNU_ARCHIVE): $(config_h)
+@@ -633,7 +632,7 @@ $(LIBEGNU_ARCHIVE): $(config_h)
  	$(MAKE) -C $(dir $@) all
  
  ifeq ($(HAVE_PDUMPER),yes)
@@ -74,12 +74,12 @@
  else
    MAKE_PDUMPER_FINGERPRINT =
  endif
-@@ -647,7 +646,7 @@ endif
+@@ -643,7 +642,7 @@ endif
  ## This goes on to affect various things, and the emacs binary fails
  ## to start if Vinstallation_directory has the wrong value.
  temacs$(EXEEXT): $(LIBXMENU) $(ALLOBJS) $(LIBEGNU_ARCHIVE) $(EMACSRES) \
--  $(charsets) $(charscript) $(MAKE_PDUMPER_FINGERPRINT)
-+  $(charsets) $(charscript)
+-  $(charsets) $(charscript) ${emoji-zwj} $(MAKE_PDUMPER_FINGERPRINT)
++  $(charsets) $(charscript) ${emoji-zwj}
  	$(AM_V_CCLD)$(CC) -o $@.tmp \
  	  $(ALL_CFLAGS) $(TEMACS_LDFLAGS) $(LDFLAGS) \
  	  $(ALLOBJS) $(LIBEGNU_ARCHIVE) $(W32_RES_LINK) $(LIBES)
diff --git a/meta-openembedded/meta-oe/recipes-support/epeg/epeg/0001-configure-Fix-checks-for-libjpeg-and-libexif.patch b/meta-openembedded/meta-oe/recipes-support/epeg/epeg/0001-configure-Fix-checks-for-libjpeg-and-libexif.patch
new file mode 100644
index 0000000..1568fe9
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/epeg/epeg/0001-configure-Fix-checks-for-libjpeg-and-libexif.patch
@@ -0,0 +1,32 @@
+From 4e0c62634b0db6dbad0d41377bb71a8012abd75b Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 12:56:54 -0700
+Subject: [PATCH] configure: Fix checks for libjpeg and libexif
+
+Use the functions from the respective library instead of using main
+function which is not part of library
+
+Upstream-Status: Submitted [https://github.com/mattes/epeg/pull/27]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index a4f6ebd..e474b1d 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -31,8 +31,8 @@ AC_SUBST(version_info)
+ dnl AC_CHECK_FUNCS(fmemopen)
+ dnl AC_CHECK_FUNCS(open_memstream)
+ 
+-AC_CHECK_LIB([jpeg], [main], [], [echo "libjpeg library not found. Please install it before proceeding"; exit -1])
+-AC_CHECK_LIB([exif], [main], [], [echo "libexif library not found. Please install it before proceeding"; exit -1])
++AC_CHECK_LIB([jpeg], [jpeg_simple_progression], [], [echo "libjpeg library not found. Please install it before proceeding"; exit -1])
++AC_CHECK_LIB([exif], [exif_data_new_from_file], [], [echo "libexif library not found. Please install it before proceeding"; exit -1])
+ 
+ my_includes=""
+ my_libs="-ljpeg -lexif"
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/epeg/epeg_git.bb b/meta-openembedded/meta-oe/recipes-support/epeg/epeg_git.bb
index bdffe4b..e5277b1 100644
--- a/meta-openembedded/meta-oe/recipes-support/epeg/epeg_git.bb
+++ b/meta-openembedded/meta-oe/recipes-support/epeg/epeg_git.bb
@@ -5,7 +5,9 @@
                     file://COPYING-PLAIN;md5=f59cacc08235a546b0c34a5422133035"
 DEPENDS = "jpeg libexif"
 
-SRC_URI = "git://github.com/mattes/epeg.git;branch=master;protocol=https"
+SRC_URI = "git://github.com/mattes/epeg.git;branch=master;protocol=https \
+           file://0001-configure-Fix-checks-for-libjpeg-and-libexif.patch \
+           "
 SRCREV = "9a175cd67eaa61fe45413d8da82da72936567047"
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/exiv2/exiv2_0.27.3.bb b/meta-openembedded/meta-oe/recipes-support/exiv2/exiv2_0.27.3.bb
index 31afe78..1380638 100644
--- a/meta-openembedded/meta-oe/recipes-support/exiv2/exiv2_0.27.3.bb
+++ b/meta-openembedded/meta-oe/recipes-support/exiv2/exiv2_0.27.3.bb
@@ -21,3 +21,10 @@
 S = "${WORKDIR}/${BPN}-${PV}-Source"
 
 inherit cmake gettext
+
+do_install:append:class-target() {
+    # reproducibility: remove build host path
+    sed -i ${D}${libdir}/cmake/exiv2/exiv2Config.cmake \
+        -e 's:${STAGING_DIR_HOST}::g'
+}
+
diff --git a/meta-openembedded/meta-oe/recipes-support/fltk/fltk.bb b/meta-openembedded/meta-oe/recipes-support/fltk/fltk.bb
index 3c9ee5e..82007c4 100644
--- a/meta-openembedded/meta-oe/recipes-support/fltk/fltk.bb
+++ b/meta-openembedded/meta-oe/recipes-support/fltk/fltk.bb
@@ -25,7 +25,9 @@
 PACKAGECONFIG[xcursor] = "-DOPTION_USE_XCURSOR=ON,-DOPTION_USE_XCURSOR=OFF,libxcursor"
 
 do_install:append() {
-    sed -i -e 's,${STAGING_DIR_HOST},,g' ${D}${bindir}/fltk-config
+    sed -i -e 's,${TMPDIR},,g' ${D}${bindir}/fltk-config
+    sed -i -e 's,${TMPDIR},,g' ${D}${datadir}/fltk/UseFLTK.cmake
+    sed -i -e 's,${TMPDIR},,g' ${D}${datadir}/fltk/FLTK-Targets.cmake
 }
 
 python populate_packages:prepend () {
diff --git a/meta-openembedded/meta-oe/recipes-support/hddtemp/hddtemp/0001-sata.c-Declare-ata_get_powermode-prototype.patch b/meta-openembedded/meta-oe/recipes-support/hddtemp/hddtemp/0001-sata.c-Declare-ata_get_powermode-prototype.patch
new file mode 100644
index 0000000..5703d3c
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/hddtemp/hddtemp/0001-sata.c-Declare-ata_get_powermode-prototype.patch
@@ -0,0 +1,33 @@
+From c5ca31940d1d1889ef2cc6974c18ff24ab406748 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 16:42:23 -0700
+Subject: [PATCH] sata.c: Declare ata_get_powermode prototype
+
+Fixes build warnings/errors
+
+sata.c:127:10: error: call to undeclared function 'ata_get_powermode'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
+|   switch(ata_get_powermode(dsk->fd))
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/sata.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/src/sata.c b/src/sata.c
+index d67621f..4172245 100644
+--- a/src/sata.c
++++ b/src/sata.c
+@@ -52,7 +52,8 @@
+                 (((u16)(__x) & (u16)0xff00U) >> 8) )); \
+ })
+ 
+- 
++extern enum e_powermode ata_get_powermode(int device);
++
+ static int sata_probe(int device) {
+   int bus_num;
+   unsigned char cmd[4] = { WIN_IDENTIFY, 0, 0, 1 };
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/hddtemp/hddtemp_0.3-beta15.bb b/meta-openembedded/meta-oe/recipes-support/hddtemp/hddtemp_0.3-beta15.bb
index b4be39f..e9d3097 100644
--- a/meta-openembedded/meta-oe/recipes-support/hddtemp/hddtemp_0.3-beta15.bb
+++ b/meta-openembedded/meta-oe/recipes-support/hddtemp/hddtemp_0.3-beta15.bb
@@ -10,6 +10,7 @@
            file://hddtemp-0.3-beta15-autodetect-717479.patch \
            file://0001-backtrace-Replace-struct-ucontext-with-ucontext_t.patch \
            file://0001-configure.ac-Rename-to-configure.ac-and-use-external.patch \
+           file://0001-sata.c-Declare-ata_get_powermode-prototype.patch \
            file://hddtemp.db \
            file://init \
 "
diff --git a/meta-openembedded/meta-oe/recipes-support/hdf5/files/0001-cmake-remove-build-flags.patch b/meta-openembedded/meta-oe/recipes-support/hdf5/files/0001-cmake-remove-build-flags.patch
index 288e642..b896770 100644
--- a/meta-openembedded/meta-oe/recipes-support/hdf5/files/0001-cmake-remove-build-flags.patch
+++ b/meta-openembedded/meta-oe/recipes-support/hdf5/files/0001-cmake-remove-build-flags.patch
@@ -13,53 +13,46 @@
  config/cmake/libhdf5.settings.cmake.in | 12 ++++++------
  2 files changed, 7 insertions(+), 7 deletions(-)
 
-diff --git a/config/cmake/libh5cc.in b/config/cmake/libh5cc.in
-index f462d56..93c7137 100644
 --- a/config/cmake/libh5cc.in
 +++ b/config/cmake/libh5cc.in
-@@ -29,4 +29,4 @@ printf 'dir is %s\n' "$dir"
- 
- export PKG_CONFIG_PATH=$dir/lib/pkgconfig
- 
--@_PKG_CONFIG_COMPILER@ `pkg-config --define-variable=prefix=$dir --cflags --libs @_PKG_CONFIG_LIBNAME@-@_PKG_CONFIG_VERSION@` $@
-+`pkg-config --define-variable=prefix=$dir --cflags --libs @_PKG_CONFIG_LIBNAME@-@_PKG_CONFIG_VERSION@` $@
-diff --git a/config/cmake/libhdf5.settings.cmake.in b/config/cmake/libhdf5.settings.cmake.in
-index c337dea..0ab9066 100644
+@@ -45,7 +45,7 @@ for arg in $@ ; do
+       exit $status
+       ;;
+     *)
+-      @_PKG_CONFIG_COMPILER@ $@ `pkg-config --define-variable=prefix=$dir --cflags --libs @_PKG_CONFIG_LIBNAME@`
++      `pkg-config --define-variable=prefix=$dir --cflags --libs @_PKG_CONFIG_LIBNAME@-@_PKG_CONFIG_VERSION@` $@
+       status=$?
+       exit $status
+       ;;
 --- a/config/cmake/libhdf5.settings.cmake.in
 +++ b/config/cmake/libhdf5.settings.cmake.in
-@@ -16,8 +16,8 @@ General Information:
- Compiling Options:
- ------------------
-                Compilation Mode: @CMAKE_BUILD_TYPE@ @CMAKE_C_COMPILER_VERSION@
--                     C Compiler: @CMAKE_C_COMPILER@
--                         CFLAGS: @CMAKE_C_FLAGS@
-+                     C Compiler:
-+                         CFLAGS:
-                       H5_CFLAGS: @H5_CFLAGS@
-                       AM_CFLAGS: @AM_CFLAGS@
-                        CPPFLAGS: @CPPFLAGS@
-@@ -29,8 +29,8 @@ Compiling Options:
-                         LDFLAGS: @CMAKE_SHARED_LINKER_FLAGS@
+@@ -27,14 +27,14 @@ Linking Options:
+                      H5_LDFLAGS: @H5_LDFLAGS@
                       AM_LDFLAGS: @AM_LDFLAGS@
                  Extra libraries: @LINK_LIBS@
 -                       Archiver: @CMAKE_AR@
 -                         Ranlib: @CMAKE_RANLIB@
 +                       Archiver:
 +                         Ranlib:
-               Debugged Packages: @DEBUG_PKG@
-                     API Tracing: @HDF5_ENABLE_TRACE@
  
-@@ -46,8 +46,8 @@ Languages:
- @BUILD_FORTRAN_CONDITIONAL_TRUE@         Static Fortran Library: YES
+ Languages:
+ ----------
+                               C: YES
+-                     C Compiler: @CMAKE_C_COMPILER@ @CMAKE_C_COMPILER_VERSION@
+-                       CPPFLAGS: @CPPFLAGS@
++                     C Compiler:
++                       CPPFLAGS:
+                     H5_CPPFLAGS: @H5_CPPFLAGS@
+                     AM_CPPFLAGS: @AM_CPPFLAGS@
+                          CFLAGS: @CMAKE_C_FLAGS@
+@@ -52,8 +52,8 @@ Languages:
+ @BUILD_FORTRAN_CONDITIONAL_TRUE@         Static Fortran Library: @H5_ENABLE_STATIC_LIB@
  
                              C++: @HDF5_BUILD_CPP_LIB@
 -@BUILD_CXX_CONDITIONAL_TRUE@                   C++ Compiler: @CMAKE_CXX_COMPILER@ @CMAKE_CXX_COMPILER_VERSION@
 -@BUILD_CXX_CONDITIONAL_TRUE@                      C++ Flags: @CMAKE_CXX_FLAGS@
 +@BUILD_CXX_CONDITIONAL_TRUE@                   C++ Compiler:
 +@BUILD_CXX_CONDITIONAL_TRUE@                      C++ Flags:
- @BUILD_CXX_CONDITIONAL_TRUE@                   H5 C++ Flags: @H5_CXXFLAGS@
+ @BUILD_CXX_CONDITIONAL_TRUE@                   H5 C++ Flags: @HDF5_CMAKE_CXX_FLAGS@
  @BUILD_CXX_CONDITIONAL_TRUE@                   AM C++ Flags: @AM_CXXFLAGS@
  @BUILD_CXX_CONDITIONAL_TRUE@             Shared C++ Library: @H5_ENABLE_SHARED_LIB@
--- 
-2.17.1
-
diff --git a/meta-openembedded/meta-oe/recipes-support/hdf5/files/0001-cross-compiling-support.patch b/meta-openembedded/meta-oe/recipes-support/hdf5/files/0001-cross-compiling-support.patch
deleted file mode 100644
index bdc1fa5..0000000
--- a/meta-openembedded/meta-oe/recipes-support/hdf5/files/0001-cross-compiling-support.patch
+++ /dev/null
@@ -1,68 +0,0 @@
-From 1b4fadf550587b46e405d7ccb777dcb00aa791a2 Mon Sep 17 00:00:00 2001
-From: Hongxu Jia <hongxu.jia@windriver.com>
-Date: Tue, 29 Jan 2019 15:39:15 +0800
-Subject: [PATCH 1/2] cross compiling support
-
-Do not generate config files at build time, use pre-generated
-files to instead.
-
-Upstream-Status: Inappropriate [oe specific]
-
-Signed-off-by: Hongxu Jia <hongxu.jia@windriver.com>
----
- CMakeLists.txt     |  6 ++++++
- src/CMakeLists.txt | 23 -----------------------
- 2 files changed, 6 insertions(+), 23 deletions(-)
-
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -770,4 +770,10 @@ endif ()
- #-----------------------------------------------------------------------------
- configure_file (${HDF_RESOURCES_DIR}/H5pubconf.h.in ${HDF5_BINARY_DIR}/H5pubconf.h @ONLY)
- 
-+#-----------------------------------------------------------------------------
-+# Copy the pre-generated files in place
-+#-----------------------------------------------------------------------------
-+configure_file(H5Tinit.c H5Tinit.c COPYONLY)
-+configure_file(H5lib_settings.c H5lib_settings.c COPYONLY)
-+
- include (CMakeInstallation.cmake)
---- a/src/CMakeLists.txt
-+++ b/src/CMakeLists.txt
-@@ -613,36 +613,9 @@ set (H5_PRIVATE_HEADERS
- #-----------------------------------------------------------------------------
- add_executable (H5detect ${HDF5_SRC_DIR}/H5detect.c)
- TARGET_C_PROPERTIES (H5detect STATIC " " " ")
--if (MSVC OR MINGW)
--  target_link_libraries (H5detect "ws2_32.lib")
--endif ()
--if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
--  set_property(TARGET H5detect PROPERTY LINK_FLAGS "-O0")
--endif ()
--
--add_custom_command (
--    OUTPUT ${HDF5_BINARY_DIR}/H5Tinit.c
--    COMMAND $<TARGET_FILE:H5detect>
--    ARGS > ${HDF5_BINARY_DIR}/H5Tinit.c
--    DEPENDS H5detect
--)
- 
- add_executable (H5make_libsettings ${HDF5_SRC_DIR}/H5make_libsettings.c)
- TARGET_C_PROPERTIES (H5make_libsettings STATIC " " " ")
--if (MSVC OR MINGW)
--  target_link_libraries (H5make_libsettings "ws2_32.lib")
--endif ()
--if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
--  set_property(TARGET H5make_libsettings PROPERTY LINK_FLAGS "-O0")
--endif ()
--
--add_custom_command (
--    OUTPUT ${HDF5_BINARY_DIR}/H5lib_settings.c
--    COMMAND $<TARGET_FILE:H5make_libsettings>
--    ARGS > ${HDF5_BINARY_DIR}/H5lib_settings.c
--    DEPENDS H5make_libsettings
--    WORKING_DIRECTORY ${HDF5_BINARY_DIR}
--)
- 
- if (GENERATE_ERROR_HEADERS)
-   find_package (Perl)
diff --git a/meta-openembedded/meta-oe/recipes-support/hdf5/files/0002-Remove-suffix-shared-from-shared-library-name.patch b/meta-openembedded/meta-oe/recipes-support/hdf5/files/0002-Remove-suffix-shared-from-shared-library-name.patch
index 6f89378..de05245 100644
--- a/meta-openembedded/meta-oe/recipes-support/hdf5/files/0002-Remove-suffix-shared-from-shared-library-name.patch
+++ b/meta-openembedded/meta-oe/recipes-support/hdf5/files/0002-Remove-suffix-shared-from-shared-library-name.patch
@@ -10,20 +10,19 @@
  CMakeLists.txt | 24 ++++++++++++------------
  1 file changed, 12 insertions(+), 12 deletions(-)
 
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 9af14d5..05100e3 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -153,18 +153,18 @@ set (HDF5_F90_TEST_LIB_TARGET     "${HDF5_F90_TEST_LIB_CORENAME}-static")
- set (HDF5_F90_C_TEST_LIB_TARGET   "${HDF5_F90_C_TEST_LIB_CORENAME}-static")
- set (HDF5_HL_F90_LIB_TARGET       "${HDF5_HL_F90_LIB_CORENAME}-static")
- set (HDF5_HL_F90_C_LIB_TARGET     "${HDF5_HL_F90_C_LIB_CORENAME}-static")
+@@ -196,19 +196,19 @@ set (HDF5_HL_F90_C_LIB_TARGET     "${HDF
+ set (HDF5_JAVA_JNI_LIB_TARGET     "${HDF5_JAVA_JNI_LIB_CORENAME}")
+ set (HDF5_JAVA_HDF5_LIB_TARGET    "${HDF5_JAVA_HDF5_LIB_CORENAME}")
+ set (HDF5_JAVA_TEST_LIB_TARGET    "${HDF5_JAVA_TEST_LIB_CORENAME}")
 -set (HDF5_LIBSH_TARGET            "${HDF5_LIB_CORENAME}-shared")
 -set (HDF5_TEST_LIBSH_TARGET       "${HDF5_TEST_LIB_CORENAME}-shared")
 -set (HDF5_CPP_LIBSH_TARGET        "${HDF5_CPP_LIB_CORENAME}-shared")
 -set (HDF5_HL_LIBSH_TARGET         "${HDF5_HL_LIB_CORENAME}-shared")
 -set (HDF5_HL_CPP_LIBSH_TARGET     "${HDF5_HL_CPP_LIB_CORENAME}-shared")
 -set (HDF5_TOOLS_LIBSH_TARGET      "${HDF5_TOOLS_LIB_CORENAME}-shared")
+-set (HDF5_UTILS_LIBSH_TARGET      "${HDF5_UTILS_LIB_CORENAME}-shared")
 -set (HDF5_F90_LIBSH_TARGET        "${HDF5_F90_LIB_CORENAME}-shared")
 -set (HDF5_F90_C_LIBSH_TARGET      "${HDF5_F90_C_LIB_CORENAME}-shared")
 -set (HDF5_F90_TEST_LIBSH_TARGET   "${HDF5_F90_TEST_LIB_CORENAME}-shared")
@@ -36,6 +35,7 @@
 +set (HDF5_HL_LIBSH_TARGET         "${HDF5_HL_LIB_CORENAME}")
 +set (HDF5_HL_CPP_LIBSH_TARGET     "${HDF5_HL_CPP_LIB_CORENAME}")
 +set (HDF5_TOOLS_LIBSH_TARGET      "${HDF5_TOOLS_LIB_CORENAME}")
++set (HDF5_UTILS_LIBSH_TARGET      "${HDF5_UTILS_LIB_CORENAME}")
 +set (HDF5_F90_LIBSH_TARGET        "${HDF5_F90_LIB_CORENAME}")
 +set (HDF5_F90_C_LIBSH_TARGET      "${HDF5_F90_C_LIB_CORENAME}")
 +set (HDF5_F90_TEST_LIBSH_TARGET   "${HDF5_F90_TEST_LIB_CORENAME}")
@@ -45,6 +45,3 @@
  
  #-----------------------------------------------------------------------------
  # Define some CMake variables for use later in the project
--- 
-2.7.4
-
diff --git a/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5Tinit-32.c b/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5Tinit-32.c
deleted file mode 100755
index 8057a7a..0000000
--- a/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5Tinit-32.c
+++ /dev/null
@@ -1,975 +0,0 @@
-/* Generated automatically by H5detect -- do not edit */
-
-
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- * Copyright by The HDF Group.                                               *
- * Copyright by the Board of Trustees of the University of Illinois.         *
- * All rights reserved.                                                      *
- *                                                                           *
- * This file is part of HDF5.  The full HDF5 copyright notice, including     *
- * terms governing use, modification, and redistribution, is contained in    *
- * the COPYING file, which can be found at the root of the source code       *
- * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
- * If you do not have access to either file, you may request a copy from     *
- * help@hdfgroup.org.                                                        *
- * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- *
- * Created:		Mar  5, 2019
- *			root <root@qemuarm>
- *
- * Purpose:		This machine-generated source code contains
- *			information about the various integer and
- *			floating point numeric formats found on this
- *			architecture.  The parameters below should be
- *			checked carefully and errors reported to the
- *			HDF5 maintainer.
- *			
- *			Each of the numeric formats listed below are
- *			printed from most significant bit to least
- *			significant bit even though the actual bytes
- *			might be stored in a different order in
- *			memory.	 The integers above each binary byte
- *			indicate the relative order of the bytes in
- *			memory; little-endian machines have
- *			decreasing numbers while big-endian machines
- *			have increasing numbers.
- *			
- *			The fields of the numbers are printed as
- *			letters with `S' for the mantissa sign bit,
- *			`M' for the mantissa magnitude, and `E' for
- *			the exponent.  The exponent has an associated
- *			bias which can be subtracted to find the
- *			true exponent.	The radix point is assumed
- *			to be before the first `M' bit.	 Any bit
- *			of a floating-point value not falling into one
- *			of these categories is printed as a question
- *			mark.  Bits of integer types are printed as
- *			`I' for 2's complement and `U' for magnitude.
- *			
- *			If the most significant bit of the normalized
- *			mantissa (always a `1' except for `0.0') is
- *			not stored then an `implicit=yes' appears
- *			under the field description.  In thie case,
- *			the radix point is still assumed to be
- *			before the first `M' but after the implicit
- *			bit.
- *
- * Modifications:
- *
- *	DO NOT MAKE MODIFICATIONS TO THIS FILE!
- *	It was generated by code in `H5detect.c'.
- *
- *-------------------------------------------------------------------------
- */
-
-/****************/
-/* Module Setup */
-/****************/
-
-#define H5T_PACKAGE /*suppress error about including H5Tpkg.h*/
-
-
-/***********/
-/* Headers */
-/***********/
-#include "H5private.h"		/* Generic Functions			*/
-#include "H5Eprivate.h"		/* Error handling		  	*/
-#include "H5FLprivate.h"	/* Free Lists				*/
-#include "H5Iprivate.h"		/* IDs			  		*/
-#include "H5Tpkg.h"		/* Datatypes 				*/
-
-
-/****************/
-/* Local Macros */
-/****************/
-
-
-/******************/
-/* Local Typedefs */
-/******************/
-
-
-/********************/
-/* Package Typedefs */
-/********************/
-
-
-/********************/
-/* Local Prototypes */
-/********************/
-
-
-/********************/
-/* Public Variables */
-/********************/
-
-
-/*****************************/
-/* Library Private Variables */
-/*****************************/
-
-
-/*********************/
-/* Package Variables */
-/*********************/
-
-
-
-/*******************/
-/* Local Variables */
-/*******************/
-
-
-
-/*-------------------------------------------------------------------------
- * Function:	H5TN_init_interface
- *
- * Purpose:	Initialize pre-defined native datatypes from code generated
- *              during the library configuration by H5detect.
- *
- * Return:	Success:	non-negative
- *		Failure:	negative
- *
- * Programmer:	Robb Matzke
- *              Wednesday, December 16, 1998
- *
- *-------------------------------------------------------------------------
- */
-herr_t
-H5TN_init_interface(void)
-{
-    H5T_t	*dt = NULL;
-    herr_t	ret_value = SUCCEED;
-
-    FUNC_ENTER_NOAPI(FAIL)
-
-   /*
-    *    0
-    * IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_SCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_SCHAR_ALIGN_g = 1;
-    H5T_NATIVE_SCHAR_COMP_ALIGN_g = 1;
-
-   /*
-    *    0
-    * UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UCHAR_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_SHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_SHORT_ALIGN_g = 1;
-    H5T_NATIVE_SHORT_COMP_ALIGN_g = 2;
-
-   /*
-    *    1        0
-    * UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_USHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_USHORT_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_ALIGN_g = 1;
-    H5T_NATIVE_INT_COMP_ALIGN_g = 4;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_LONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_LONG_ALIGN_g = 1;
-    H5T_NATIVE_LONG_COMP_ALIGN_g = 4;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_ULONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_ULONG_ALIGN_g = 1;
-
-   /*
-    *    0
-    * IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_LEAST8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_LEAST8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_FAST8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_FAST8_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT16_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT16_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_LEAST16_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_LEAST16_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_FAST16_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_FAST16_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT32_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT32_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_LEAST32_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_LEAST32_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_FAST32_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_FAST32_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_LEAST64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_LEAST64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_FAST64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_FAST64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_LLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_LLONG_ALIGN_g = 1;
-    H5T_NATIVE_LLONG_COMP_ALIGN_g = 8;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_ULLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_ULLONG_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * SEEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
-    * Implicit bit? yes
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_FLOAT;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.f.sign = 31;
-    dt->shared->u.atomic.u.f.epos = 23;
-    dt->shared->u.atomic.u.f.esize = 8;
-    dt->shared->u.atomic.u.f.ebias = 0x0000007f;
-    dt->shared->u.atomic.u.f.mpos = 0;
-    dt->shared->u.atomic.u.f.msize = 23;
-    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
-    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
-    if((H5T_NATIVE_FLOAT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_FLOAT_ALIGN_g = 1;
-    H5T_NATIVE_FLOAT_COMP_ALIGN_g = 4;
-
-   /*
-    *    7        6        5        4
-    * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
-    *    3        2        1        0
-    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
-    * Implicit bit? yes
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_FLOAT;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.f.sign = 63;
-    dt->shared->u.atomic.u.f.epos = 52;
-    dt->shared->u.atomic.u.f.esize = 11;
-    dt->shared->u.atomic.u.f.ebias = 0x000003ff;
-    dt->shared->u.atomic.u.f.mpos = 0;
-    dt->shared->u.atomic.u.f.msize = 52;
-    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
-    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
-    if((H5T_NATIVE_DOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_DOUBLE_ALIGN_g = 1;
-    H5T_NATIVE_DOUBLE_COMP_ALIGN_g = 8;
-
-   /*
-    *    7        6        5        4
-    * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
-    *    3        2        1        0
-    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
-    * Implicit bit? yes
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_FLOAT;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.f.sign = 63;
-    dt->shared->u.atomic.u.f.epos = 52;
-    dt->shared->u.atomic.u.f.esize = 11;
-    dt->shared->u.atomic.u.f.ebias = 0x000003ff;
-    dt->shared->u.atomic.u.f.mpos = 0;
-    dt->shared->u.atomic.u.f.msize = 52;
-    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
-    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
-    if((H5T_NATIVE_LDOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_LDOUBLE_ALIGN_g = 1;
-    H5T_NATIVE_LDOUBLE_COMP_ALIGN_g = 8;
-
-    /* Set the native order for this machine */
-    H5T_native_order_g = H5T_ORDER_LE;
-
-    /* Structure alignment for pointers, hvl_t, hobj_ref_t, hdset_reg_ref_t */
-    H5T_POINTER_COMP_ALIGN_g = 4;
-    H5T_HVL_COMP_ALIGN_g = 4;
-    H5T_HOBJREF_COMP_ALIGN_g = 8;
-    H5T_HDSETREGREF_COMP_ALIGN_g = 1;
-
-done:
-    if(ret_value < 0) {
-        if(dt != NULL) {
-            dt->shared = H5FL_FREE(H5T_shared_t, dt->shared);
-            dt = H5FL_FREE(H5T_t, dt);
-        } /* end if */
-    } /* end if */
-
-    FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5TN_init_interface() */
-
-/****************************************/
-/* ALIGNMENT and signal-handling status */
-/****************************************/
-/* Signal() support: yes */
-/* setjmp() support: yes */
-/* longjmp() support: yes */
-/* sigsetjmp() support: yes */
-/* siglongjmp() support: yes */
-/* sigprocmask() support: yes */
-
-/******************************/
-/* signal handlers statistics */
-/******************************/
-/* signal_handlers tested: 15 times */
-/* sigbus_handler called: 5 times */
-/* sigsegv_handler called: 5 times */
-/* sigill_handler called: 5 times */
diff --git a/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5Tinit-64.c b/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5Tinit-64.c
deleted file mode 100644
index 85f79a6..0000000
--- a/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5Tinit-64.c
+++ /dev/null
@@ -1,991 +0,0 @@
-/* Generated automatically by H5detect -- do not edit */
-
-
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- * Copyright by The HDF Group.                                               *
- * Copyright by the Board of Trustees of the University of Illinois.         *
- * All rights reserved.                                                      *
- *                                                                           *
- * This file is part of HDF5.  The full HDF5 copyright notice, including     *
- * terms governing use, modification, and redistribution, is contained in    *
- * the COPYING file, which can be found at the root of the source code       *
- * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
- * If you do not have access to either file, you may request a copy from     *
- * help@hdfgroup.org.                                                        *
- * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- *
- * Created:		Jan 29, 2019
- *			root <root@qemux86-64>
- *
- * Purpose:		This machine-generated source code contains
- *			information about the various integer and
- *			floating point numeric formats found on this
- *			architecture.  The parameters below should be
- *			checked carefully and errors reported to the
- *			HDF5 maintainer.
- *			
- *			Each of the numeric formats listed below are
- *			printed from most significant bit to least
- *			significant bit even though the actual bytes
- *			might be stored in a different order in
- *			memory.	 The integers above each binary byte
- *			indicate the relative order of the bytes in
- *			memory; little-endian machines have
- *			decreasing numbers while big-endian machines
- *			have increasing numbers.
- *			
- *			The fields of the numbers are printed as
- *			letters with `S' for the mantissa sign bit,
- *			`M' for the mantissa magnitude, and `E' for
- *			the exponent.  The exponent has an associated
- *			bias which can be subtracted to find the
- *			true exponent.	The radix point is assumed
- *			to be before the first `M' bit.	 Any bit
- *			of a floating-point value not falling into one
- *			of these categories is printed as a question
- *			mark.  Bits of integer types are printed as
- *			`I' for 2's complement and `U' for magnitude.
- *			
- *			If the most significant bit of the normalized
- *			mantissa (always a `1' except for `0.0') is
- *			not stored then an `implicit=yes' appears
- *			under the field description.  In thie case,
- *			the radix point is still assumed to be
- *			before the first `M' but after the implicit
- *			bit.
- *
- * Modifications:
- *
- *	DO NOT MAKE MODIFICATIONS TO THIS FILE!
- *	It was generated by code in `H5detect.c'.
- *
- *-------------------------------------------------------------------------
- */
-
-/****************/
-/* Module Setup */
-/****************/
-
-#define H5T_PACKAGE /*suppress error about including H5Tpkg.h*/
-
-
-/***********/
-/* Headers */
-/***********/
-#include "H5private.h"		/* Generic Functions			*/
-#include "H5Eprivate.h"		/* Error handling		  	*/
-#include "H5FLprivate.h"	/* Free Lists				*/
-#include "H5Iprivate.h"		/* IDs			  		*/
-#include "H5Tpkg.h"		/* Datatypes 				*/
-
-
-/****************/
-/* Local Macros */
-/****************/
-
-
-/******************/
-/* Local Typedefs */
-/******************/
-
-
-/********************/
-/* Package Typedefs */
-/********************/
-
-
-/********************/
-/* Local Prototypes */
-/********************/
-
-
-/********************/
-/* Public Variables */
-/********************/
-
-
-/*****************************/
-/* Library Private Variables */
-/*****************************/
-
-
-/*********************/
-/* Package Variables */
-/*********************/
-
-
-
-/*******************/
-/* Local Variables */
-/*******************/
-
-
-
-/*-------------------------------------------------------------------------
- * Function:	H5TN_init_interface
- *
- * Purpose:	Initialize pre-defined native datatypes from code generated
- *              during the library configuration by H5detect.
- *
- * Return:	Success:	non-negative
- *		Failure:	negative
- *
- * Programmer:	Robb Matzke
- *              Wednesday, December 16, 1998
- *
- *-------------------------------------------------------------------------
- */
-herr_t
-H5TN_init_interface(void)
-{
-    H5T_t	*dt = NULL;
-    herr_t	ret_value = SUCCEED;
-
-    FUNC_ENTER_NOAPI(FAIL)
-
-   /*
-    *    0
-    * IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_SCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_SCHAR_ALIGN_g = 1;
-    H5T_NATIVE_SCHAR_COMP_ALIGN_g = 1;
-
-   /*
-    *    0
-    * UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UCHAR_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UCHAR_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_SHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_SHORT_ALIGN_g = 1;
-    H5T_NATIVE_SHORT_COMP_ALIGN_g = 2;
-
-   /*
-    *    1        0
-    * UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_USHORT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_USHORT_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_ALIGN_g = 1;
-    H5T_NATIVE_INT_COMP_ALIGN_g = 4;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_LONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_LONG_ALIGN_g = 1;
-    H5T_NATIVE_LONG_COMP_ALIGN_g = 8;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_ULONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_ULONG_ALIGN_g = 1;
-
-   /*
-    *    0
-    * IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_LEAST8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_LEAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_LEAST8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_FAST8_ALIGN_g = 1;
-
-   /*
-    *    0
-    * UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 1;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 8;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_FAST8_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_FAST8_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT16_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT16_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_LEAST16_ALIGN_g = 1;
-
-   /*
-    *    1        0
-    * UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 2;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 16;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_LEAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_LEAST16_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_FAST16_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_FAST16_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_FAST16_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT32_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT32_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_LEAST32_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_LEAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_LEAST32_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_FAST32_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_FAST32_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_FAST32_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_LEAST64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_LEAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_LEAST64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_INT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_INT_FAST64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_UINT_FAST64_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_UINT_FAST64_ALIGN_g = 1;
-
-   /*
-    *    7        6        5        4
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    *    3        2        1        0
-    * IIIIIIII IIIIIIII IIIIIIII IIIIIIII
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_2;
-    if((H5T_NATIVE_LLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_LLONG_ALIGN_g = 1;
-    H5T_NATIVE_LLONG_COMP_ALIGN_g = 8;
-
-   /*
-    *    7        6        5        4
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    *    3        2        1        0
-    * UUUUUUUU UUUUUUUU UUUUUUUU UUUUUUUU
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_INTEGER;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.i.sign = H5T_SGN_NONE;
-    if((H5T_NATIVE_ULLONG_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_ULLONG_ALIGN_g = 1;
-
-   /*
-    *    3        2        1        0
-    * SEEEEEEE EMMMMMMM MMMMMMMM MMMMMMMM
-    * Implicit bit? yes
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_FLOAT;
-    dt->shared->size = 4;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 32;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.f.sign = 31;
-    dt->shared->u.atomic.u.f.epos = 23;
-    dt->shared->u.atomic.u.f.esize = 8;
-    dt->shared->u.atomic.u.f.ebias = 0x0000007f;
-    dt->shared->u.atomic.u.f.mpos = 0;
-    dt->shared->u.atomic.u.f.msize = 23;
-    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
-    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
-    if((H5T_NATIVE_FLOAT_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_FLOAT_ALIGN_g = 1;
-    H5T_NATIVE_FLOAT_COMP_ALIGN_g = 4;
-
-   /*
-    *    7        6        5        4
-    * SEEEEEEE EEEEMMMM MMMMMMMM MMMMMMMM
-    *    3        2        1        0
-    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
-    * Implicit bit? yes
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_FLOAT;
-    dt->shared->size = 8;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 64;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.f.sign = 63;
-    dt->shared->u.atomic.u.f.epos = 52;
-    dt->shared->u.atomic.u.f.esize = 11;
-    dt->shared->u.atomic.u.f.ebias = 0x000003ff;
-    dt->shared->u.atomic.u.f.mpos = 0;
-    dt->shared->u.atomic.u.f.msize = 52;
-    dt->shared->u.atomic.u.f.norm = H5T_NORM_IMPLIED;
-    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
-    if((H5T_NATIVE_DOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_DOUBLE_ALIGN_g = 1;
-    H5T_NATIVE_DOUBLE_COMP_ALIGN_g = 8;
-
-   /*
-    *   15       14       13       12
-    * ???????? ???????? ???????? ????????
-    *   11       10        9        8
-    * ???????? ???????? SEEEEEEE EEEEEEEE
-    *    7        6        5        4
-    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
-    *    3        2        1        0
-    * MMMMMMMM MMMMMMMM MMMMMMMM MMMMMMMM
-    * Implicit bit? no
-    * Alignment: none
-    */
-    if(NULL == (dt = H5T__alloc()))
-        HGOTO_ERROR(H5E_DATATYPE, H5E_NOSPACE, FAIL, "datatype allocation failed")
-    dt->shared->state = H5T_STATE_IMMUTABLE;
-    dt->shared->type = H5T_FLOAT;
-    dt->shared->size = 16;
-    dt->shared->u.atomic.order = H5T_ORDER_LE;
-    dt->shared->u.atomic.offset = 0;
-    dt->shared->u.atomic.prec = 80;
-    dt->shared->u.atomic.lsb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.msb_pad = H5T_PAD_ZERO;
-    dt->shared->u.atomic.u.f.sign = 79;
-    dt->shared->u.atomic.u.f.epos = 64;
-    dt->shared->u.atomic.u.f.esize = 15;
-    dt->shared->u.atomic.u.f.ebias = 0x00003fff;
-    dt->shared->u.atomic.u.f.mpos = 0;
-    dt->shared->u.atomic.u.f.msize = 64;
-    dt->shared->u.atomic.u.f.norm = H5T_NORM_NONE;
-    dt->shared->u.atomic.u.f.pad = H5T_PAD_ZERO;
-    if((H5T_NATIVE_LDOUBLE_g = H5I_register(H5I_DATATYPE, dt, FALSE)) < 0)
-        HGOTO_ERROR(H5E_DATATYPE, H5E_CANTINIT, FAIL, "can't register ID for built-in datatype")
-    H5T_NATIVE_LDOUBLE_ALIGN_g = 1;
-    H5T_NATIVE_LDOUBLE_COMP_ALIGN_g = 16;
-
-    /* Set the native order for this machine */
-    H5T_native_order_g = H5T_ORDER_LE;
-
-    /* Structure alignment for pointers, hvl_t, hobj_ref_t, hdset_reg_ref_t */
-    H5T_POINTER_COMP_ALIGN_g = 8;
-    H5T_HVL_COMP_ALIGN_g = 8;
-    H5T_HOBJREF_COMP_ALIGN_g = 8;
-    H5T_HDSETREGREF_COMP_ALIGN_g = 1;
-
-done:
-    if(ret_value < 0) {
-        if(dt != NULL) {
-            dt->shared = H5FL_FREE(H5T_shared_t, dt->shared);
-            dt = H5FL_FREE(H5T_t, dt);
-        } /* end if */
-    } /* end if */
-
-    FUNC_LEAVE_NOAPI(ret_value);
-} /* end H5TN_init_interface() */
-
-/****************************************/
-/* ALIGNMENT and signal-handling status */
-/****************************************/
-/* Signal() support: yes */
-/* setjmp() support: yes */
-/* longjmp() support: yes */
-/* sigsetjmp() support: yes */
-/* siglongjmp() support: yes */
-/* sigprocmask() support: yes */
-
-/******************************/
-/* signal handlers statistics */
-/******************************/
-/* signal_handlers tested: 15 times */
-/* sigbus_handler called: 5 times */
-/* sigsegv_handler called: 5 times */
-/* sigill_handler called: 5 times */
diff --git a/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5lib_settings.c b/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5lib_settings.c
deleted file mode 100644
index c243aee..0000000
--- a/meta-openembedded/meta-oe/recipes-support/hdf5/files/H5lib_settings.c
+++ /dev/null
@@ -1,98 +0,0 @@
-/* Generated automatically by H5make_libsettings -- do not edit */
-
-
-
-/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- * Copyright by The HDF Group.                                               *
- * Copyright by the Board of Trustees of the University of Illinois.         *
- * All rights reserved.                                                      *
- *                                                                           *
- * This file is part of HDF5.  The full HDF5 copyright notice, including     *
- * terms governing use, modification, and redistribution, is contained in    *
- * the COPYING file, which can be found at the root of the source code       *
- * distribution tree, or in https://support.hdfgroup.org/ftp/HDF5/releases.  *
- * If you do not have access to either file, you may request a copy from     *
- * help@hdfgroup.org.                                                        *
- * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
- *
- * Created:		Jan 29, 2019
- *			root <root@qemux86-64>
- *
- * Purpose:		This machine-generated source code contains
- *			information about the library build configuration
- *
- * Modifications:
- *
- *	DO NOT MAKE MODIFICATIONS TO THIS FILE!
- *	It was generated by code in `H5make_libsettings.c'.
- *
- *-------------------------------------------------------------------------
- */
-
-char H5libhdf5_settings[]=
-	"      SUMMARY OF THE HDF5 CONFIGURATION\n"
-	"      =================================\n"
-	"\n"
-	"General Information:\n"
-	"-------------------\n"
-	"                   HDF5 Version: 1.8.19\n"
-	"                  Configured on: 2019-01-29\n"
-	"                  Configured by: Ninja\n"
-	"                 Configure mode: CMAKE 3.12.2\n"
-	"                    Host system: Linux-4.4.0-31-generic\n"
-	"              Uname information: Linux\n"
-	"                       Byte sex: little-endian\n"
-	"                      Libraries: \n"
-	"             Installation point: /usr\n"
-	"\n"
-	"Compiling Options:\n"
-	"------------------\n"
-	"               Compilation Mode: \n"
-	"                      H5_CFLAGS: \n"
-	"                      AM_CFLAGS: \n"
-	"                       CPPFLAGS: \n"
-	"                    H5_CPPFLAGS: \n"
-	"                    AM_CPPFLAGS: \n"
-	"               Shared C Library: YES\n"
-	"               Static C Library: YES\n"
-	"  Statically Linked Executables: OFF\n"
-	"                        LDFLAGS: -Wl,-O1 -Wl,--hash-style=gnu -Wl,--as-needed\n"
-	"                     AM_LDFLAGS: \n"
-	"                Extra libraries: m;dl\n"
-	"              Debugged Packages: \n"
-	"                    API Tracing: OFF\n"
-	"\n"
-	"Languages:\n"
-	"----------\n"
-	"                        Fortran: OFF\n"
-	"               Fortran Compiler: \n"
-	"          Fortran 2003 Compiler: \n"
-	"                  Fortran Flags: \n"
-	"               H5 Fortran Flags: \n"
-	"               AM Fortran Flags: \n"
-	"         Shared Fortran Library: YES\n"
-	"         Static Fortran Library: YES\n"
-	"\n"
-	"                   H5 C++ Flags: \n"
-	"                   AM C++ Flags: \n"
-	"             Shared C++ Library: YES\n"
-	"             Static C++ Library: YES\n"
-	"\n"
-	"Features:\n"
-	"---------\n"
-	"                  Parallel HDF5: OFF\n"
-	"             High Level library: ON\n"
-	"                   Threadsafety: OFF\n"
-	"            Default API Mapping: v18\n"
-	" With Deprecated Public Symbols: ON\n"
-	"         I/O filters (external): \n"
-	"                            MPE: \n"
-	"                     Direct VFD: \n"
-	"                        dmalloc: \n"
-	"Clear file buffers before write: ON\n"
-	"           Using memory checker: OFF\n"
-	"         Function Stack Tracing: OFF\n"
-	"      Strict File Format Checks: OFF\n"
-	"   Optimization Instrumentation: \n"
-;
-
diff --git a/meta-openembedded/meta-oe/recipes-support/hdf5/hdf5_1.13.2.bb b/meta-openembedded/meta-oe/recipes-support/hdf5/hdf5_1.13.2.bb
new file mode 100644
index 0000000..80611c6
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/hdf5/hdf5_1.13.2.bb
@@ -0,0 +1,54 @@
+SUMMARY = "Management suite for extremely large and complex data collections"
+DESCRIPTION = "Unique technology suite that makes possible the management of \
+extremely large and complex data collections"
+HOMEPAGE = "https://www.hdfgroup.org/"
+SECTION = "libs"
+
+LICENSE = "HDF5"
+LIC_FILES_CHKSUM = "file://COPYING;md5=ac1039f6bf7c9ab2b3693836f46d0735"
+
+inherit cmake siteinfo qemu
+
+DEPENDS += "qemu-native"
+
+SRC_URI = " \
+    https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.13/hdf5-${PV}/src/${BPN}-${PV}.tar.bz2 \
+    file://0002-Remove-suffix-shared-from-shared-library-name.patch \
+    file://0001-cmake-remove-build-flags.patch \
+"
+SRC_URI[sha256sum] = "9c51b3da426977ec622a43dca8adaf4e81eabf838c1ff80c6225ad1d3ed54b5c"
+
+FILES:${PN} += "${libdir}/libhdf5.settings ${datadir}/*"
+
+EXTRA_OECMAKE = " \
+    -DHDF5_INSTALL_CMAKE_DIR=${libdir}/cmake \
+    -DCMAKE_INSTALL_PREFIX='${prefix}' \
+    -DHDF5_INSTALL_LIB_DIR='${baselib}' \
+"
+EXTRA_OECMAKE:prepend:class-target = "-DCMAKE_CROSSCOMPILING_EMULATOR=${WORKDIR}/qemuwrapper "
+
+gen_emu() {
+        # Write out a qemu wrapper that will be used by cmake
+        # so that it can run target helper binaries through that.
+        qemu_binary="${@qemu_wrapper_cmdline(d, d.getVar('STAGING_DIR_HOST'), [d.expand('${STAGING_DIR_HOST}${libdir}'),d.expand('${STAGING_DIR_HOST}${base_libdir}')])}"
+        cat > ${WORKDIR}/qemuwrapper << EOF
+#!/bin/sh
+$qemu_binary "\$@"
+EOF
+        chmod +x ${WORKDIR}/qemuwrapper
+}
+
+do_unpack[postfuncs] += "gen_emu"
+
+do_install:append() {
+    # Used for generating config files on target
+    install -m 755 ${B}/bin/H5detect ${D}${bindir}
+    install -m 755 ${B}/bin/H5make_libsettings ${D}${bindir}
+}
+
+BBCLASSEXTEND = "native"
+
+SRC_DISTRIBUTE_LICENSES += "HDF5"
+
+# h5fuse.sh script needs bash
+RDEPENDS:${PN} += "bash"
diff --git a/meta-openembedded/meta-oe/recipes-support/hdf5/hdf5_1.8.21.bb b/meta-openembedded/meta-oe/recipes-support/hdf5/hdf5_1.8.21.bb
deleted file mode 100644
index 7b886a4..0000000
--- a/meta-openembedded/meta-oe/recipes-support/hdf5/hdf5_1.8.21.bb
+++ /dev/null
@@ -1,59 +0,0 @@
-SUMMARY = "Management suite for extremely large and complex data collections"
-DESCRIPTION = "Unique technology suite that makes possible the management of \
-extremely large and complex data collections"
-HOMEPAGE = "https://www.hdfgroup.org/"
-SECTION = "libs"
-
-LICENSE = "HDF5"
-LIC_FILES_CHKSUM = "file://COPYING;md5=57e5351b17591e659eedae107265c606"
-
-inherit cmake siteinfo
-
-SRC_URI = " \
-    https://support.hdfgroup.org/ftp/HDF5/releases/hdf5-1.8/hdf5-${PV}/src/${BPN}-${PV}.tar.bz2 \
-    file://H5lib_settings.c \
-    file://H5Tinit-64.c \
-    file://H5Tinit-32.c \
-    file://0001-cross-compiling-support.patch \
-    file://0002-Remove-suffix-shared-from-shared-library-name.patch \
-    file://0001-cmake-remove-build-flags.patch \
-"
-SRC_URI[md5sum] = "2d2408f2a9dfb5c7b79998002e9a90e9"
-SRC_URI[sha256sum] = "e5b1b1dee44a64b795a91c3321ab7196d9e0871fe50d42969761794e3899f40d"
-
-FILES:${PN} += "${libdir}/libhdf5.settings ${datadir}/*"
-
-EXTRA_OECMAKE = " \
-    -DTEST_LFS_WORKS_RUN=0 \
-    -DTEST_LFS_WORKS_RUN__TRYRUN_OUTPUT=0 \
-    -DH5_PRINTF_LL_TEST_RUN=1 \
-    -DH5_PRINTF_LL_TEST_RUN__TRYRUN_OUTPUT='8' \
-    -DH5_LDOUBLE_TO_LONG_SPECIAL_RUN=0 \
-    -DH5_LDOUBLE_TO_LONG_SPECIAL_RUN__TRYRUN_OUTPUT= \
-    -DH5_LONG_TO_LDOUBLE_SPECIAL_RUN=0 \
-    -DH5_LONG_TO_LDOUBLE_SPECIAL_RUN__TRYRUN_OUTPUT= \
-    -DH5_LDOUBLE_TO_LLONG_ACCURATE_RUN=0 \
-    -DH5_LDOUBLE_TO_LLONG_ACCURATE_RUN__TRYRUN_OUTPUT= \
-    -DH5_LLONG_TO_LDOUBLE_CORRECT_RUN=0 \
-    -DH5_LLONG_TO_LDOUBLE_CORRECT_RUN__TRYRUN_OUTPUT= \
-    -DH5_NO_ALIGNMENT_RESTRICTIONS_RUN=0 \
-    -DH5_NO_ALIGNMENT_RESTRICTIONS_RUN__TRYRUN_OUTPUT= \
-    -DCMAKE_INSTALL_PREFIX='${prefix}' \
-    -DHDF5_INSTALL_LIB_DIR='${baselib}' \
-"
-
-do_unpack[postfuncs] += "gen_hd5file"
-gen_hd5file() {
-    install -m 544 ${WORKDIR}/H5lib_settings.c ${S}
-    install -m 544 ${WORKDIR}/H5Tinit-${SITEINFO_BITS}.c ${S}/H5Tinit.c
-}
-
-do_install:append() {
-    # Used for generating config files on target
-    install -m 755 ${B}/bin/H5detect ${D}${bindir}
-    install -m 755 ${B}/bin/H5make_libsettings ${D}${bindir}
-}
-
-BBCLASSEXTEND = "native"
-
-SRC_DISTRIBUTE_LICENSES += "HDF5"
diff --git a/meta-openembedded/meta-oe/recipes-support/hwdata/hwdata_0.361.bb b/meta-openembedded/meta-oe/recipes-support/hwdata/hwdata_0.362.bb
similarity index 91%
rename from meta-openembedded/meta-oe/recipes-support/hwdata/hwdata_0.361.bb
rename to meta-openembedded/meta-oe/recipes-support/hwdata/hwdata_0.362.bb
index c8daf7d..e408c4d 100644
--- a/meta-openembedded/meta-oe/recipes-support/hwdata/hwdata_0.361.bb
+++ b/meta-openembedded/meta-oe/recipes-support/hwdata/hwdata_0.362.bb
@@ -5,7 +5,7 @@
 LICENSE = "GPL-2.0-or-later | XFree86-1.0"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=1556547711e8246992b999edd9445a57"
 
-SRCREV = "a934c616e2e666552586caa85af9fc583855df49"
+SRCREV = "8a3647e5da0a157cbf1bff1845b0cdaf1ce1bdb0"
 SRC_URI = "git://github.com/vcrhonek/${BPN}.git;branch=master;protocol=https"
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/libcereal/files/0001-sandbox-Do-not-use-int8_t-in-std-uniform_int_distrib.patch b/meta-openembedded/meta-oe/recipes-support/libcereal/files/0001-sandbox-Do-not-use-int8_t-in-std-uniform_int_distrib.patch
new file mode 100644
index 0000000..26a8223
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/libcereal/files/0001-sandbox-Do-not-use-int8_t-in-std-uniform_int_distrib.patch
@@ -0,0 +1,54 @@
+From 36054278304945c6aef7d44e58788ca882c67d05 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 15:54:13 -0700
+Subject: [PATCH] sandbox: Do not use int8_t in std::uniform_int_distribution
+
+Newer versions of libc++ has dropped supporting this usecase since its
+an UB see.
+
+https://reviews.llvm.org/D114920?id=400571
+
+Fixes
+
+uniform_int_distribution.h:162:5: error: static assertion failed due to requirement '__libcpp_random_is_valid_inttype<char>::value': IntType must be a supported integer type
+    static_assert(__libcpp_random_is_valid_inttype<_IntType>::value, "IntType must be a supported integer type");
+    ^             ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+/mnt/b/yoe/master/build/tmp/work/core2-64-yoe-linux-musl/libcereal/1.3.2+gitAUTOINC+ebef1e9298-r0/git/sandbox/performance.cpp:261:9: note: in instantiation of template class 'std::uniform_int_distribution<char>' requested here
+    c = std::uniform_int_distribution<char>(' ', '~')(gen);
+        ^
+/mnt/b/yoe/master/build/tmp/work/core2-64-yoe-linux-musl/libcereal/1.3.2+gitAUTOINC+ebef1e9298-r0/git/sandbox/performance.cpp:261:9: error: type 'std::uniform_int_distribution<char>' does not provide a call operator
+    c = std::uniform_int_distribution<char>(' ', '~')(gen);
+        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+2 errors generated.
+
+Upstream-Status: Submitted [https://github.com/USCiLab/cereal/pull/764]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ sandbox/performance.cpp | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/sandbox/performance.cpp b/sandbox/performance.cpp
+index f9307870..aca8c78c 100644
+--- a/sandbox/performance.cpp
++++ b/sandbox/performance.cpp
+@@ -258,7 +258,7 @@ random_value(std::mt19937 & gen)
+ {
+   std::string s(std::uniform_int_distribution<int>(3, 30)(gen), ' ');
+   for(char & c : s)
+-    c = std::uniform_int_distribution<char>(' ', '~')(gen);
++    c = static_cast<char>( std::uniform_int_distribution<int>(' ', '~')(gen) );
+   return s;
+ }
+ 
+@@ -277,7 +277,7 @@ std::string random_binary_string(std::mt19937 & gen)
+ {
+   std::string s(N, ' ');
+   for(auto & c : s )
+-    c = std::uniform_int_distribution<char>('0', '1')(gen);
++    c = static_cast<char>( std::uniform_int_distribution<int>( '0', '1' )(gen) );
+   return s;
+ }
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/libcereal/libcereal_1.3.2.bb b/meta-openembedded/meta-oe/recipes-support/libcereal/libcereal_1.3.2.bb
index 80c962e..5248b1e 100644
--- a/meta-openembedded/meta-oe/recipes-support/libcereal/libcereal_1.3.2.bb
+++ b/meta-openembedded/meta-oe/recipes-support/libcereal/libcereal_1.3.2.bb
@@ -19,6 +19,7 @@
 PV .= "+git${SRCPV}"
 SRCREV = "ebef1e929807629befafbb2918ea1a08c7194554"
 SRC_URI = "git://github.com/USCiLab/cereal.git;branch=master;protocol=https \
+           file://0001-sandbox-Do-not-use-int8_t-in-std-uniform_int_distrib.patch \
            file://run-ptest \
 "
 
diff --git a/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi/0001-Define-createIdentifierRef-prototype-in-yang-complex.patch b/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi/0001-Define-createIdentifierRef-prototype-in-yang-complex.patch
new file mode 100644
index 0000000..3374f84
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi/0001-Define-createIdentifierRef-prototype-in-yang-complex.patch
@@ -0,0 +1,53 @@
+From 77b520a8ee193b6d9f12e049f505d8d98204c11c Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 17:13:32 -0700
+Subject: [PATCH] Define createIdentifierRef prototype in yang-complex-types.c
+
+Provide needed prototype to make compiler happy
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ lib/parser-yang.y        | 3 ++-
+ lib/yang-complex-types.c | 3 ++-
+ 2 files changed, 4 insertions(+), 2 deletions(-)
+
+--- a/lib/parser-yang.y
++++ b/lib/parser-yang.y
+@@ -41,7 +41,8 @@
+ #include "yang-complex-types.h"
+ #include "util.h"
+ #include "error.h"
+-    
++#include "yang-check.h"
++
+ #ifdef HAVE_DMALLOC_H
+ #include <dmalloc.h>
+ #endif
+@@ -61,7 +62,8 @@
+ #define debug
+ #endif
+ 
+-    
++extern void createIdentifierRef(struct _YangNode *node, char* prefix, char* ident);
++extern void smiyyerror(char *msg, Parser *parserPtr);
+ /*
+  * NOTE: The argument lvalp ist not really a void pointer. Unfortunately,
+  * we don't know it better at this point. bison generated C code declares
+--- a/lib/yang-complex-types.c
++++ b/lib/yang-complex-types.c
+@@ -35,12 +35,13 @@
+ #include "yang-data.h"
+ #include "yang-check.h"
+ #include "yang-complex-types.h"
+-
+ /*
+  * Current parser defined in parser-yang. Workaround - can't include data.h
+  */
+ extern Parser *currentParser;
+ 
++extern void createIdentifierRef(struct _YangNode *node, char* prefix, char* ident);
++
+ static const int parents_complex_type[] = {
+     YANG_DECL_MODULE, YANG_DECL_SUBMODULE, YANG_DECL_CONTAINER,
+     YANG_DECL_LIST, YANG_DECL_RPC, YANG_DECL_INPUT,
diff --git a/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi/0001-parser-yang-Define-_DEFAULT_SOURCE.patch b/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi/0001-parser-yang-Define-_DEFAULT_SOURCE.patch
new file mode 100644
index 0000000..9354a88
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi/0001-parser-yang-Define-_DEFAULT_SOURCE.patch
@@ -0,0 +1,28 @@
+From 86b706f694b822d8028fd36320147767056d2ffa Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 6 Sep 2022 12:09:00 -0700
+Subject: [PATCH] parser-yang: Define _DEFAULT_SOURCE
+
+This is needed for timegm API
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ lib/parser-yang.y | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/lib/parser-yang.y b/lib/parser-yang.y
+index b111607..0ded9d2 100644
+--- a/lib/parser-yang.y
++++ b/lib/parser-yang.y
+@@ -21,6 +21,7 @@
+ #ifdef BACKEND_YANG
+ 
+ #define _ISOC99_SOURCE
++#define _DEFAULT_SOURCE
+ #include <stdio.h>
+ #include <errno.h>
+ #include <stdlib.h>
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi_0.5.0.bb b/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi_0.5.0.bb
index b238671..33e1b6f 100644
--- a/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi_0.5.0.bb
+++ b/meta-openembedded/meta-oe/recipes-support/libsmi/libsmi_0.5.0.bb
@@ -7,14 +7,18 @@
 SRC_URI = "https://www.ibr.cs.tu-bs.de/projects/${BPN}/download/${BP}.tar.gz \
            file://smi.conf \
            file://libsmi-fix-the-test-dump-files.patch \
+           file://0001-Define-createIdentifierRef-prototype-in-yang-complex.patch \
+           file://0001-parser-yang-Define-_DEFAULT_SOURCE.patch \
           "
 
 SRC_URI[md5sum] = "4bf47483c06c9f07d1b10fbc74eddf11"
 SRC_URI[sha256sum] = "f21accdadb1bb328ea3f8a13fc34d715baac6e2db66065898346322c725754d3"
 
+DEPENDS += "bison-native flex-native"
+
 RDEPENDS:${PN} += "wget"
 
-inherit autotools
+inherit autotools-brokensep
 
 EXTRA_OECONF = "ac_cv_path_SH=/bin/sh ac_cv_path_WGET=${bindir}/wget ac_cv_path_AWK=${bindir}/awk"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/libusbgx/libusbgx_git.bb b/meta-openembedded/meta-oe/recipes-support/libusbgx/libusbgx_git.bb
index 023f7bf..16552fd 100644
--- a/meta-openembedded/meta-oe/recipes-support/libusbgx/libusbgx_git.bb
+++ b/meta-openembedded/meta-oe/recipes-support/libusbgx/libusbgx_git.bb
@@ -3,8 +3,6 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263 \
                     file://COPYING.LGPL;md5=4fbd65380cdd255951079008b364516c"
 
-DEPENDS = "libconfig"
-
 inherit autotools pkgconfig systemd update-rc.d update-alternatives
 
 PV = "0.2.0+git${SRCPV}"
@@ -19,27 +17,38 @@
 
 S = "${WORKDIR}/git"
 
-SYSTEMD_PACKAGES = "${PN}"
-SYSTEMD_SERVICE:${PN} = "usbgx.service"
+PACKAGECONFIG ??= "examples gadget-schemes libconfig"
+PACKAGECONFIG[libconfig] = "--with-libconfig=yes,--without-libconfig,libconfig"
+PACKAGECONFIG[examples] = "--enable-examples,--disable-examples"
+PACKAGECONFIG[gadget-schemes] = "--enable-gadget-schemes,--disable-gadget-schemes"
+PACKAGECONFIG[tests] = "--enable-tests,--disable-tests,cmocka"
 
+PACKAGE_BEFORE_PN = "${PN}-examples"
+
+SYSTEMD_PACKAGES = "${PN}-examples"
+SYSTEMD_SERVICE:${PN}-examples = "usbgx.service"
+SYSTEMD_AUTO_ENABLE:${PN}-examples = "${@bb.utils.contains('PACKAGECONFIG', 'examples', 'enable', 'disable', d)}"
+
+INITSCRIPT_PACKAGES = "${PN}-examples"
 INITSCRIPT_NAME = "usbgx"
 INITSCRIPT_PARAMS = "defaults"
-
-EXTRA_OECONF = "--includedir=${includedir}/usbgx"
+INHIBIT_UPDATERCD_BBCLASS = "${@bb.utils.contains('PACKAGECONFIG', 'examples', '1', '0', d)}"
 
 do_install:append() {
     install -Dm 0755 ${WORKDIR}/gadget-start ${D}/${bindir}/gadget-start
     if ${@bb.utils.contains('DISTRO_FEATURES','systemd','true','false',d)}; then
         install -Dm 0644 ${WORKDIR}/usbgx.service ${D}${systemd_system_unitdir}/usbgx.service
     fi
+
     if ${@bb.utils.contains('DISTRO_FEATURES', 'sysvinit', 'true', 'false', d)}; then
-		install -Dm 0755 ${WORKDIR}/usbgx.initd ${D}${sysconfdir}/init.d/usbgx
-	fi
+        install -Dm 0755 ${WORKDIR}/usbgx.initd ${D}${sysconfdir}/init.d/usbgx
+    fi
 }
 
-RDEPENDS:${PN} += "libusbgx-config"
+FILES:${PN}-examples = "${bindir}/* ${sysconfdir}/*"
+RDEPENDS:${PN}-examples += "${@bb.utils.contains('PACKAGECONFIG', 'examples', 'libusbgx-config', '', d)}"
 
 ALTERNATIVE_PRIORITY = "90"
-ALTERNATIVE:${PN} = "gadget-acm-ecm show-gadgets"
+ALTERNATIVE:${PN}-examples = "${@bb.utils.contains('PACKAGECONFIG', 'examples', 'gadget-acm-ecm show-gadgets', '', d)}"
 ALTERNATIVE_LINK_NAME[gadget-acm-ecm] = "${bindir}/gadget-acm-ecm"
 ALTERNATIVE_LINK_NAME[show-gadgets] = "${bindir}/show-gadgets"
diff --git a/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter/0001-Fix-macro-error.patch b/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter/0001-Fix-macro-error.patch
index 8140ea3..00119af 100644
--- a/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter/0001-Fix-macro-error.patch
+++ b/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter/0001-Fix-macro-error.patch
@@ -11,15 +11,13 @@
  iface.c | 9 ++++++++-
  1 file changed, 8 insertions(+), 1 deletion(-)
 
-diff --git a/iface.c b/iface.c
-index 5951d81..27793f0 100644
 --- a/iface.c
 +++ b/iface.c
 @@ -43,7 +43,14 @@
         __result; }))
  #endif
  
--#define	UTEMPTER_DEFAULT_PATHNAME	LIBEXECDIR "/utempter/utempter"
+-#define UTEMPTER_DEFAULT_PATHNAME	LIBEXECDIR "/utempter/utempter"
 +#ifdef  LIBEXECDIR
 +#  define CAT_PATH(DIR1,DIR2)           DIR1##DIR2
 +#  define RAW_UTEMPTER_PATH             CAT_PATH(LIBEXECDIR,/utempter/utempter)
@@ -31,6 +29,3 @@
  
  static const char *utempter_pathname;
  static int saved_fd = -1;
--- 
-1.8.3.1
-
diff --git a/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter/libutempter-remove-glibc-assumption.patch b/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter/libutempter-remove-glibc-assumption.patch
deleted file mode 100644
index 6ed9335..0000000
--- a/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter/libutempter-remove-glibc-assumption.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-diff -Naur libutempter-1.1.6.orig/utempter.c libutempter-1.1.6/utempter.c
---- libutempter-1.1.6.orig/utempter.c	2010-11-04 13:14:53.000000000 -0400
-+++ libutempter-1.1.6/utempter.c	2014-06-20 16:37:09.762403323 -0400
-@@ -34,13 +34,7 @@
- #include <sys/stat.h>
- #include <utmp.h>
- 
--#ifdef __GLIBC__
- # include <pty.h>
--#elif defined(__FreeBSD__)
--# include <libutil.h>
--#else
--# error Unsupported platform
--#endif /* __GLIBC__ || __FreeBSD__ */
- 
- #define	DEV_PREFIX	"/dev/"
- #define	DEV_PREFIX_LEN	(sizeof(DEV_PREFIX)-1)
-@@ -106,17 +100,12 @@
- 
- static int
- write_uwtmp_record(const char *user, const char *term, const char *host,
--#ifdef __GLIBC__
- 		   pid_t pid,
--#endif
- 		   int add)
- {
- 	struct utmp ut;
- 	struct timeval tv;
--
--#ifdef __GLIBC__
- 	size_t offset;
--#endif
- 
- 	memset(&ut, 0, sizeof(ut));
- 
-@@ -128,8 +117,6 @@
- 	if (host)
- 		strncpy(ut.ut_host, host, sizeof(ut.ut_host));
- 
--#ifdef __GLIBC__
--
- 	offset = (strlen(term) <= sizeof(ut.ut_id)) ? 0 :
- 			strlen(term) - sizeof(ut.ut_id);
- 	strncpy(ut.ut_id, term + offset, sizeof(ut.ut_id));
-@@ -156,27 +143,6 @@
- 
- 	(void) updwtmp(_PATH_WTMP, &ut);
- 
--#elif defined(__FreeBSD__)
--
--	ut.ut_time = tv.tv_sec;
--
--	if (add)
--	{
--		login(&ut);
--	} else
--	{
--		if (logout(term) != 1)
--		{
--#ifdef	UTEMPTER_DEBUG
--			fprintf(stderr, "utempter: logout: %s\n",
--				strerror(errno));
--#endif
--			exit(EXIT_FAILURE);
--		}
--	}
--
--#endif /* __GLIBC__ || __FreeBSD__ */
--
- #ifdef	UTEMPTER_DEBUG
- 	fprintf(stderr,
- 		"utempter: DEBUG: utmp/wtmp record %s for terminal '%s'\n",
-@@ -255,8 +221,6 @@
- 	validate_device(device);
- 
- 	return write_uwtmp_record(pw->pw_name, device + DEV_PREFIX_LEN, host,
--#ifdef __GLIBC__
- 				  pid,
--#endif
- 				  add);
- }
diff --git a/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter.bb b/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter_1.2.2-alt1.bb
similarity index 83%
rename from meta-openembedded/meta-oe/recipes-support/libutempter/libutempter.bb
rename to meta-openembedded/meta-oe/recipes-support/libutempter/libutempter_1.2.2-alt1.bb
index 5c05271..0d2cd13 100644
--- a/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter.bb
+++ b/meta-openembedded/meta-oe/recipes-support/libutempter/libutempter_1.2.2-alt1.bb
@@ -5,16 +5,14 @@
 HOMEPAGE = "ftp://ftp.altlinux.org/pub/people/ldv/utempter"
 SECTION = "System Environment/Libraries"
 LICENSE = "GPL-2.0-only & GPL-2.0-or-later & LGPL-2.0-only & MIT"
-LIC_FILES_CHKSUM = "file://COPYING;md5=2d5025d4aa3495befef8f17206a5b0a1"
+LIC_FILES_CHKSUM = "file://COPYING;md5=2caced0b25dfefd4c601d92bd15116de"
 
-SRCREV = "3ef74fff310f09e2601e241b9f042cd39d591018"
-PV = "1.1.6-alt2+git${SRCPV}"
+SRCREV = "63825e2244629d44dae21132b1065d7ecc0491c0"
 
 SRC_URI = "git://git.altlinux.org/people/ldv/packages/libutempter.git;branch=master \
            file://0001-Fix-macro-error.patch \
            file://0002-Proper-macro-path-generation.patch \
-           file://libutempter-remove-glibc-assumption.patch \
-          "
+           "
 
 S = "${WORKDIR}/git/${BPN}"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/lvm2/files/0005-do-not-build-manual.patch b/meta-openembedded/meta-oe/recipes-support/lvm2/files/0005-do-not-build-manual.patch
index 7117d99..7979efb 100644
--- a/meta-openembedded/meta-oe/recipes-support/lvm2/files/0005-do-not-build-manual.patch
+++ b/meta-openembedded/meta-oe/recipes-support/lvm2/files/0005-do-not-build-manual.patch
@@ -15,25 +15,26 @@
 
 Signed-off-by: Hongxu Jia <hongxu.jia@windriver.com>
 
-
 Update context for lvm2 2.03.02.
-
 Signed-off-by: Kai Kang <kai.kang@windriver.com>
+
+Update context for lvm2 2.03.16.
+Signed-off-by: Wang Mingyu <wangmy@fujitsu.com>
 ---
  Makefile.in  | 6 +++---
  configure.ac | 1 -
  2 files changed, 3 insertions(+), 4 deletions(-)
 
 diff --git a/Makefile.in b/Makefile.in
-index 6a1a990..80cba91 100644
+index 3b7e0ec..37f14eb 100644
 --- a/Makefile.in
 +++ b/Makefile.in
 @@ -18,7 +18,7 @@ top_builddir = @top_builddir@
  abs_top_builddir = @abs_top_builddir@
  abs_top_srcdir = @abs_top_srcdir@
  
--SUBDIRS = conf daemons include lib libdaemon libdm man scripts tools
-+SUBDIRS = conf daemons include lib libdaemon libdm scripts tools
+-SUBDIRS = libdm conf daemons include lib libdaemon man scripts tools
++SUBDIRS = libdm conf daemons include lib libdaemon scripts tools
  
  ifeq ("@UDEV_RULES@", "yes")
    SUBDIRS += udev
@@ -46,20 +47,20 @@
      libdaemon lib tools daemons libdm \
      udev po
  tools.distclean: test.distclean
-@@ -65,7 +65,7 @@ libdm.device-mapper: include.device-mapper
+@@ -59,7 +59,7 @@ unit-test  run-unit-test: test
+ 
  daemons.device-mapper: libdm.device-mapper
  tools.device-mapper: libdm.device-mapper
- scripts.device-mapper: include.device-mapper
 -device-mapper: tools.device-mapper daemons.device-mapper man.device-mapper
 +device-mapper: tools.device-mapper daemons.device-mapper
+ device_mapper: device-mapper
  
  ifeq ("@INTL@", "yes")
- lib.pofile: include.pofile
 diff --git a/configure.ac b/configure.ac
-index 7fd0561..4154dc0 100644
+index 6def519..18a5c97 100644
 --- a/configure.ac
 +++ b/configure.ac
-@@ -1812,7 +1812,6 @@ libdaemon/server/Makefile
+@@ -2018,7 +2018,6 @@ libdaemon/server/Makefile
  libdm/Makefile
  libdm/dm-tools/Makefile
  libdm/libdevmapper.pc
@@ -68,5 +69,5 @@
  scripts/lvm2-pvscan.service
  scripts/blkdeactivate.sh
 -- 
-2.7.4
+2.25.1
 
diff --git a/meta-openembedded/meta-oe/recipes-support/lvm2/files/0006-start-lvm2-monitor.service-after-tmp.mount.patch b/meta-openembedded/meta-oe/recipes-support/lvm2/files/0006-start-lvm2-monitor.service-after-tmp.mount.patch
index 9f9383e..5b9b2d5 100644
--- a/meta-openembedded/meta-oe/recipes-support/lvm2/files/0006-start-lvm2-monitor.service-after-tmp.mount.patch
+++ b/meta-openembedded/meta-oe/recipes-support/lvm2/files/0006-start-lvm2-monitor.service-after-tmp.mount.patch
@@ -11,23 +11,26 @@
 
 Rebase to v2_03_05
 Signed-off-by: Hongxu Jia <hongxu.jia@windriver.com>
+
+Rebase to v2_03_16
+Signed-off-by: Wang Mingyu <wangmy@fujitsu.com>
 ---
  scripts/lvm2_monitoring_systemd_red_hat.service.in | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/scripts/lvm2_monitoring_systemd_red_hat.service.in b/scripts/lvm2_monitoring_systemd_red_hat.service.in
-index 4bf744a..46766cb 100644
+index c0c96e3..2a99a37 100644
 --- a/scripts/lvm2_monitoring_systemd_red_hat.service.in
 +++ b/scripts/lvm2_monitoring_systemd_red_hat.service.in
 @@ -2,7 +2,7 @@
  Description=Monitoring of LVM2 mirrors, snapshots etc. using dmeventd or progress polling
  Documentation=man:dmeventd(8) man:lvcreate(8) man:lvchange(8) man:vgchange(8)
  Requires=dm-event.socket
--After=dm-event.socket dm-event.service lvm2-activation.service
-+After=dm-event.socket dm-event.service lvm2-activation.service tmp.mount
+-After=dm-event.socket dm-event.service
++After=dm-event.socket dm-event.service tmp.mount
  Before=local-fs-pre.target shutdown.target
  DefaultDependencies=no
  Conflicts=shutdown.target
 -- 
-2.7.4
+2.25.1
 
diff --git a/meta-openembedded/meta-oe/recipes-support/lvm2/libdevmapper_2.03.11.bb b/meta-openembedded/meta-oe/recipes-support/lvm2/libdevmapper_2.03.16.bb
similarity index 100%
rename from meta-openembedded/meta-oe/recipes-support/lvm2/libdevmapper_2.03.11.bb
rename to meta-openembedded/meta-oe/recipes-support/lvm2/libdevmapper_2.03.16.bb
diff --git a/meta-openembedded/meta-oe/recipes-support/lvm2/lvm2.inc b/meta-openembedded/meta-oe/recipes-support/lvm2/lvm2.inc
index 0c593b2..277e76f 100644
--- a/meta-openembedded/meta-oe/recipes-support/lvm2/lvm2.inc
+++ b/meta-openembedded/meta-oe/recipes-support/lvm2/lvm2.inc
@@ -18,7 +18,7 @@
            file://reproducible-build.patch \
            "
 
-SRCREV = "3e8bd8d1bd70691f09a170785836aeb4f83154e6"
+SRCREV = "6d1e894a867681af7a811ed63bc435f3aa55fec8"
 S = "${WORKDIR}/git"
 
 UPSTREAM_CHECK_GITTAGREGEX = "v(?P<pver>\d+(\_\d+)+)"
@@ -57,3 +57,7 @@
                 --with-thin-repair=${sbindir}/thin_repair \
                 --with-thin-restore=${sbindir}/thin_restore \
 "
+
+# gold doesn't like multiple dm_bitset_parse_list definitions in libdm/.exported_symbols.DM_1_02_138 and libdm/.exported_symbols.DM_1_02_129
+# after it was uncommented in the later in 2.03.12 with https://github.com/lvmteam/lvm2/commit/60eb608d66c2056a78e81f27db3da14139d9faab
+LDFLAGS:append = "${@bb.utils.contains('DISTRO_FEATURES', 'ld-is-gold', " -fuse-ld=bfd", '', d)}"
diff --git a/meta-openembedded/meta-oe/recipes-support/lvm2/lvm2_2.03.11.bb b/meta-openembedded/meta-oe/recipes-support/lvm2/lvm2_2.03.16.bb
similarity index 98%
rename from meta-openembedded/meta-oe/recipes-support/lvm2/lvm2_2.03.11.bb
rename to meta-openembedded/meta-oe/recipes-support/lvm2/lvm2_2.03.16.bb
index a729324..6a6cdc7 100644
--- a/meta-openembedded/meta-oe/recipes-support/lvm2/lvm2_2.03.11.bb
+++ b/meta-openembedded/meta-oe/recipes-support/lvm2/lvm2_2.03.16.bb
@@ -68,6 +68,7 @@
 # Specified explicitly for the udev rules, just in case that it does not get picked
 # up automatically:
 FILES:${PN}-udevrules = "${nonarch_base_libdir}/udev/rules.d"
+RDEPENDS:${PN} = "bash"
 RDEPENDS:${PN}-udevrules = "libdevmapper"
 RDEPENDS:${PN}:append:class-target = " libdevmapper"
 RDEPENDS:${PN}:append:class-nativesdk = " libdevmapper"
diff --git a/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog/0001-client-Include-string.h-form-mem-function-prototypes.patch b/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog/0001-client-Include-string.h-form-mem-function-prototypes.patch
new file mode 100644
index 0000000..3b6fd7d
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog/0001-client-Include-string.h-form-mem-function-prototypes.patch
@@ -0,0 +1,26 @@
+From c7753f82261d49862d5e0f9691bf87799d36ef0c Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 16:20:03 -0700
+Subject: [PATCH] client: Include string.h form mem* function prototypes
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ client.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/client.c b/client.c
+index 883b1de..e474e55 100644
+--- a/client.c
++++ b/client.c
+@@ -16,6 +16,7 @@
+    on your Linux system; if not, write to the Free Software Foundation, 
+    Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
+ #include <stdio.h>
++#include <string.h>
+ #include <sys/socket.h>
+ #include <sys/un.h>
+ #include <unistd.h>
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog/0001-genconfig.py-update-shebang.patch b/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog/0001-genconfig.py-update-shebang.patch
deleted file mode 100644
index 10d17f5..0000000
--- a/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog/0001-genconfig.py-update-shebang.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From b09ff147e9383d33e6adbcb356b52236610fceda Mon Sep 17 00:00:00 2001
-From: Kai Kang <kai.kang@windriver.com>
-Date: Fri, 1 Apr 2022 10:00:00 +0800
-Subject: [PATCH] genconfig.py: update shebang
-
-It fails to call genconfig.py if no /usr/bin/python on build machine:
-
-| ./genconfig.py mcelog.conf config-intro.man > mcelog.conf.5
-| /bin/sh: ./genconfig.py: /usr/bin/python: bad interpreter: No such file or directory
-| Makefile:71: recipe for target 'mcelog.conf.5' failed
-
-Update shebang with 'env python3' instead.
-
-Upstream-Status: Pending
-
-Signed-off-by: Kai Kang <kai.kang@windriver.com>
----
- genconfig.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/genconfig.py b/genconfig.py
-index ce9987f..982cac6 100755
---- a/genconfig.py
-+++ b/genconfig.py
-@@ -1,4 +1,4 @@
--#!/usr/bin/python
-+#!/usr/bin/env python3
- # generate man config documentation from mcelog.conf example
- # genconfig.py mcelog.conf intro.html
- from __future__ import print_function
--- 
-2.17.1
-
diff --git a/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog_180.bb b/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog_189.bb
similarity index 89%
rename from meta-openembedded/meta-oe/recipes-support/mcelog/mcelog_180.bb
rename to meta-openembedded/meta-oe/recipes-support/mcelog/mcelog_189.bb
index a64aaca..2f8af9a 100644
--- a/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog_180.bb
+++ b/meta-openembedded/meta-oe/recipes-support/mcelog/mcelog_189.bb
@@ -7,11 +7,11 @@
 
 SRC_URI = "\
     git://git.kernel.org/pub/scm/utils/cpu/mce/mcelog.git;protocol=http;;branch=master \
+    file://0001-client-Include-string.h-form-mem-function-prototypes.patch \
     file://run-ptest \
-    file://0001-genconfig.py-update-shebang.patch \
 "
 
-SRCREV = "4146c9296a0cbd26f1c5e411cb44877f350053bd"
+SRCREV = "b8dfb1fa34eb627a03adfb315326a29ec51214b3"
 
 LICENSE = "GPL-2.0-only"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=b234ee4d69f5fce4486a80fdaf4a4263"
diff --git a/meta-openembedded/meta-oe/recipes-support/mg/mg_20210609.bb b/meta-openembedded/meta-oe/recipes-support/mg/mg_20220614.bb
similarity index 88%
rename from meta-openembedded/meta-oe/recipes-support/mg/mg_20210609.bb
rename to meta-openembedded/meta-oe/recipes-support/mg/mg_20220614.bb
index 5deb5c6..01ccef5 100644
--- a/meta-openembedded/meta-oe/recipes-support/mg/mg_20210609.bb
+++ b/meta-openembedded/meta-oe/recipes-support/mg/mg_20220614.bb
@@ -1,11 +1,11 @@
 SUMMARY = "A portable version of the mg maintained by the OpenBSD team"
 HOMEPAGE = "http://homepage.boetes.org/software/mg/"
 LICENSE = "PD"
-LIC_FILES_CHKSUM = "file://version.c;md5=1895eb37bf6bd79cdc5c89d8166fabfb"
+LIC_FILES_CHKSUM = "file://version.c;md5=43616508f42ab3fdb082c5f932bd2eb4"
 DEPENDS = "ncurses libbsd"
 SECTION = "console/editors"
 
-SRCREV = "688f49cd67ab30dfa6482c74815e117cbf7af63a"
+SRCREV = "ea58e622fad20622ac0b723d021baaaf5dc0eb49"
 SRC_URI = "git://github.com/hboetes/mg;branch=master;protocol=https \
            file://0001-fileio-Include-sys-param.h-for-MAXNAMLEN.patch \
            file://0002-fileio-Define-DEFFILEMODE-if-platform-is-missing.patch \
diff --git a/meta-openembedded/meta-oe/recipes-support/neon/neon_0.32.2.bb b/meta-openembedded/meta-oe/recipes-support/neon/neon_0.32.4.bb
similarity index 95%
rename from meta-openembedded/meta-oe/recipes-support/neon/neon_0.32.2.bb
rename to meta-openembedded/meta-oe/recipes-support/neon/neon_0.32.4.bb
index 0f4e971..c16a2c1 100644
--- a/meta-openembedded/meta-oe/recipes-support/neon/neon_0.32.2.bb
+++ b/meta-openembedded/meta-oe/recipes-support/neon/neon_0.32.4.bb
@@ -11,7 +11,7 @@
            file://run-ptest \
            "
 
-SRC_URI[sha256sum] = "7a25ba2c9223676b9aaec22a585a0ca118127bad71deed0b9ed6cd960fe5c353"
+SRC_URI[sha256sum] = "1bcdaabb63206b5e00c9a8c4b3672168b523b49eb559c5a486e3256463af34fd"
 
 inherit autotools-brokensep binconfig-disabled lib_package pkgconfig ptest
 
diff --git a/meta-openembedded/meta-oe/recipes-support/openldap/openldap/0001-configure-Pass-pthread_t-to-pthread_detach.patch b/meta-openembedded/meta-oe/recipes-support/openldap/openldap/0001-configure-Pass-pthread_t-to-pthread_detach.patch
new file mode 100644
index 0000000..6e73f8b
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/openldap/openldap/0001-configure-Pass-pthread_t-to-pthread_detach.patch
@@ -0,0 +1,32 @@
+From 7577b120acda087bf3f5f613c2c72663b3864ad8 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Sun, 4 Sep 2022 09:43:06 -0700
+Subject: [PATCH] configure: Pass pthread_t to pthread_detach
+
+This helps compilers when using C2X standard
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 5 +----
+ 1 file changed, 1 insertion(+), 4 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index 0978eeb..58d15f8 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -1467,10 +1467,7 @@ pthread_rwlock_t rwlock;
+ 				dnl save the flags
+ 				AC_LINK_IFELSE([AC_LANG_PROGRAM([[
+ #include <pthread.h>
+-#ifndef NULL
+-#define NULL (void*)0
+-#endif
+-]], [[pthread_detach(NULL);]])],[ol_cv_func_pthread_detach=yes],[ol_cv_func_pthread_detach=no])
++]], [[pthread_detach((pthread_t)-1);]])],[ol_cv_func_pthread_detach=yes],[ol_cv_func_pthread_detach=no])
+ 			])
+ 
+ 			if test $ol_cv_func_pthread_detach = no ; then
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/openldap/openldap_2.5.12.bb b/meta-openembedded/meta-oe/recipes-support/openldap/openldap_2.5.12.bb
index c01db08..a8ea84d 100644
--- a/meta-openembedded/meta-oe/recipes-support/openldap/openldap_2.5.12.bb
+++ b/meta-openembedded/meta-oe/recipes-support/openldap/openldap_2.5.12.bb
@@ -22,6 +22,7 @@
     file://0001-build-top.mk-unset-STRIP_OPTS.patch \
     file://0001-libraries-Makefile.in-ignore-the-mkdir-errors.patch \
     file://0001-librewrite-include-ldap_pvt_thread.h-before-redefini.patch \
+    file://0001-configure-Pass-pthread_t-to-pthread_detach.patch \
 "
 
 SRC_URI[sha256sum] = "d5086cbfc49597fa7d0670a429a9054552d441b16ee8b2435412797ab0e37b96"
diff --git a/meta-openembedded/meta-oe/recipes-support/poppler/poppler_22.08.0.bb b/meta-openembedded/meta-oe/recipes-support/poppler/poppler_22.09.0.bb
similarity index 95%
rename from meta-openembedded/meta-oe/recipes-support/poppler/poppler_22.08.0.bb
rename to meta-openembedded/meta-oe/recipes-support/poppler/poppler_22.09.0.bb
index c75bf79..2cae897 100644
--- a/meta-openembedded/meta-oe/recipes-support/poppler/poppler_22.08.0.bb
+++ b/meta-openembedded/meta-oe/recipes-support/poppler/poppler_22.09.0.bb
@@ -7,7 +7,7 @@
            file://0001-Do-not-overwrite-all-our-build-flags.patch \
            file://basename-include.patch \
            "
-SRC_URI[sha256sum] = "b493328721402f25cb7523f9cdc2f7d7c59f45ad999bde75c63c90604db0f20b"
+SRC_URI[sha256sum] = "d7a8f748211359cadb774ba3e18ecda6464b34027045c0648eb30d5852a41e2e"
 
 DEPENDS = "fontconfig zlib cairo lcms glib-2.0"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/raptor2/files/0001-Match-reutrn-type-from-RAPTOR_ASSERT_OBJECT_POINTER_.patch b/meta-openembedded/meta-oe/recipes-support/raptor2/files/0001-Match-reutrn-type-from-RAPTOR_ASSERT_OBJECT_POINTER_.patch
new file mode 100644
index 0000000..4c1afb4
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/raptor2/files/0001-Match-reutrn-type-from-RAPTOR_ASSERT_OBJECT_POINTER_.patch
@@ -0,0 +1,47 @@
+From 97b5dcaa6b221eb403cc92e953225d38aee18f70 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 14:48:39 -0700
+Subject: [PATCH] Match reutrn type from
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE
+
+This ensures that integer type 0 is returned and not NULL
+Fixes
+raptor_serialize.c:243:66: error: incompatible pointer to integer conversion returning 'void *' from a function with result type 'int' [-Wint-conversion]
+  RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
+                                                                 ^~~~
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/raptor_parse.c     | 2 +-
+ src/raptor_serialize.c | 2 +-
+ 2 files changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/src/raptor_parse.c b/src/raptor_parse.c
+index 26911f4..0091e1e 100644
+--- a/src/raptor_parse.c
++++ b/src/raptor_parse.c
+@@ -257,7 +257,7 @@ raptor_world_get_parser_factory(raptor_world *world, const char *name)
+ int
+ raptor_world_get_parsers_count(raptor_world* world)
+ {
+-  RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
++  RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, 0);
+ 
+   raptor_world_open(world);
+ 
+diff --git a/src/raptor_serialize.c b/src/raptor_serialize.c
+index a1f29d7..2bf4ab2 100644
+--- a/src/raptor_serialize.c
++++ b/src/raptor_serialize.c
+@@ -240,7 +240,7 @@ raptor_get_serializer_factory(raptor_world* world, const char *name)
+ int
+ raptor_world_get_serializers_count(raptor_world* world)
+ {
+-  RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
++  RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, 0);
+ 
+   raptor_world_open(world);
+ 
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/raptor2/raptor2_2.0.15.bb b/meta-openembedded/meta-oe/recipes-support/raptor2/raptor2_2.0.15.bb
index 577c6ee..22306a5 100644
--- a/meta-openembedded/meta-oe/recipes-support/raptor2/raptor2_2.0.15.bb
+++ b/meta-openembedded/meta-oe/recipes-support/raptor2/raptor2_2.0.15.bb
@@ -12,6 +12,7 @@
 SRC_URI = " \
     http://download.librdf.org/source/${BPN}-${PV}.tar.gz \
     file://0001-configure.ac-do-additional-checks-on-libxml2-also-wh.patch \
+    file://0001-Match-reutrn-type-from-RAPTOR_ASSERT_OBJECT_POINTER_.patch \
 "
 SRC_URI[md5sum] = "a39f6c07ddb20d7dd2ff1f95fa21e2cd"
 SRC_URI[sha256sum] = "ada7f0ba54787b33485d090d3d2680533520cd4426d2f7fb4782dd4a6a1480ed"
diff --git a/meta-openembedded/meta-oe/recipes-support/rdfind/rdfind/0001-configure-Fix-check-for-AC_CHECK_LIB.patch b/meta-openembedded/meta-oe/recipes-support/rdfind/rdfind/0001-configure-Fix-check-for-AC_CHECK_LIB.patch
new file mode 100644
index 0000000..af69c02
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/rdfind/rdfind/0001-configure-Fix-check-for-AC_CHECK_LIB.patch
@@ -0,0 +1,25 @@
+From 823a4deb61f6f9b91b0cfc4a7e7b20922c635777 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 13:13:50 -0700
+Subject: [PATCH] configure: Fix check for AC_CHECK_LIB
+
+Check for nettle_pbkdf2_hmac_sha256 from libnettle instead of main()
+which is not in nettle library
+
+Upstream-Status: Submitted [https://github.com/pauldreik/rdfind/pull/115]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/configure.ac
++++ b/configure.ac
+@@ -46,7 +46,7 @@ AC_CHECK_HEADER(nettle/sha.h,,[AC_MSG_ER
+  On Debian-ish systems, use \"apt-get install nettle-dev\" to get a system
+  wide nettle install.
+ ])]) 
+-AC_CHECK_LIB(nettle,main,,[AC_MSG_ERROR([
++AC_CHECK_LIB(nettle,nettle_pbkdf2_hmac_sha256,,[AC_MSG_ERROR([
+  Could not link to libnettle. Please install nettle
+  first. If you have already done so; please run ldconfig
+  as root or check whether the path libnettle was installed
diff --git a/meta-openembedded/meta-oe/recipes-support/rdfind/rdfind_1.4.1.bb b/meta-openembedded/meta-oe/recipes-support/rdfind/rdfind_1.4.1.bb
index 64ae62a..f8eb5d3 100644
--- a/meta-openembedded/meta-oe/recipes-support/rdfind/rdfind_1.4.1.bb
+++ b/meta-openembedded/meta-oe/recipes-support/rdfind/rdfind_1.4.1.bb
@@ -6,6 +6,7 @@
 DEPENDS = "nettle autoconf-archive"
 
 SRC_URI = "https://rdfind.pauldreik.se/${BP}.tar.gz \
+           file://0001-configure-Fix-check-for-AC_CHECK_LIB.patch \
 "
 
 SRC_URI[md5sum] = "180418c863b861d1df221bc486a07ce7"
diff --git a/meta-openembedded/meta-oe/recipes-support/serial-utils/serial-forward/0001-correct-the-typo-in-include-file-name-string.h.patch b/meta-openembedded/meta-oe/recipes-support/serial-utils/serial-forward/0001-correct-the-typo-in-include-file-name-string.h.patch
new file mode 100644
index 0000000..f65d2c4
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/serial-utils/serial-forward/0001-correct-the-typo-in-include-file-name-string.h.patch
@@ -0,0 +1,27 @@
+From 2897794c337625d1951eb0b402cd8840ae2182a8 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 10:33:57 -0700
+Subject: [PATCH] correct the typo in include file name string.h
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ tools/serial_forward/src/forward.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/tools/serial_forward/src/forward.c b/tools/serial_forward/src/forward.c
+index 9fad6f9b..4c0dd9df 100644
+--- a/tools/serial_forward/src/forward.c
++++ b/tools/serial_forward/src/forward.c
+@@ -20,7 +20,7 @@
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <termios.h>
+-#include <strings.h>
++#include <string.h>
+ #include <getopt.h>
+ 
+ #include "forward.h"
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/serial-utils/serial-forward_git.bb b/meta-openembedded/meta-oe/recipes-support/serial-utils/serial-forward_git.bb
index 2eef1db..fe5cfc0 100644
--- a/meta-openembedded/meta-oe/recipes-support/serial-utils/serial-forward_git.bb
+++ b/meta-openembedded/meta-oe/recipes-support/serial-utils/serial-forward_git.bb
@@ -8,6 +8,7 @@
 
 SRC_URI = "git://github.com/freesmartphone/cornucopia.git;branch=master;protocol=https \
            file://0001-serial_forward-Disable-default-static-linking.patch;striplevel=3 \
+           file://0001-correct-the-typo-in-include-file-name-string.h.patch;striplevel=3 \
           "
 S = "${WORKDIR}/git/tools/serial_forward"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/smarty/smarty_4.2.0.bb b/meta-openembedded/meta-oe/recipes-support/smarty/smarty_4.2.1.bb
similarity index 93%
rename from meta-openembedded/meta-oe/recipes-support/smarty/smarty_4.2.0.bb
rename to meta-openembedded/meta-oe/recipes-support/smarty/smarty_4.2.1.bb
index 2cd96a2..71040d0 100644
--- a/meta-openembedded/meta-oe/recipes-support/smarty/smarty_4.2.0.bb
+++ b/meta-openembedded/meta-oe/recipes-support/smarty/smarty_4.2.1.bb
@@ -9,7 +9,7 @@
 
 SRC_URI = "git://github.com/smarty-php/smarty.git;protocol=https;branch=master"
 
-SRCREV = "97aeb14c6fc2fb733938809926e2f9d6c581a70d"
+SRCREV = "ffa2b81a8e354a49fd8a2f24742dc9dc399e8007"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/srecord/files/0001-configure-Fix-AC_CHECK_LIB-test-for-gpg-error.patch b/meta-openembedded/meta-oe/recipes-support/srecord/files/0001-configure-Fix-AC_CHECK_LIB-test-for-gpg-error.patch
new file mode 100644
index 0000000..9a411f0
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/srecord/files/0001-configure-Fix-AC_CHECK_LIB-test-for-gpg-error.patch
@@ -0,0 +1,29 @@
+From f9e50aba10f9d2c58e5d88f51810ca7143f4372b Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 13:20:01 -0700
+Subject: [PATCH] configure: Fix AC_CHECK_LIB test for gpg-error
+
+Use check function which is found in libgpg-error
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ etc/configure.ac | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/etc/configure.ac b/etc/configure.ac
+index d4ae290..ea0936d 100644
+--- a/etc/configure.ac
++++ b/etc/configure.ac
+@@ -30,7 +30,7 @@ AC_CHECK_PROGS(SOELIM, gsoelim soelim roffpp)
+ AC_CHECK_FUNCS(snprintf vsnprintf)
+ 
+ dnl! This is needed for MingGW build, but not for Unix or Linux, etc.
+-AC_CHECK_LIB(gpg-error, main)
++AC_CHECK_LIB(gpg-error, gpg_strerror_r)
+ 
+ dnl! @synopsis AC_ADD_CFLAGS
+ dnl!
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/srecord/srecord_1.64.bb b/meta-openembedded/meta-oe/recipes-support/srecord/srecord_1.64.bb
index 5765272..0629c0f 100644
--- a/meta-openembedded/meta-oe/recipes-support/srecord/srecord_1.64.bb
+++ b/meta-openembedded/meta-oe/recipes-support/srecord/srecord_1.64.bb
@@ -7,6 +7,7 @@
     http://srecord.sourceforge.net/srecord-${PV}.tar.gz \
     file://add-option-to-remove-docs.patch \
     file://libtool.patch \
+    file://0001-configure-Fix-AC_CHECK_LIB-test-for-gpg-error.patch \
 "
 
 SRC_URI[md5sum] = "4de4a7497472d7972645c2af91313769"
diff --git a/meta-openembedded/meta-oe/recipes-support/syslog-ng/syslog-ng_3.37.1.bb b/meta-openembedded/meta-oe/recipes-support/syslog-ng/syslog-ng_3.38.1.bb
similarity index 97%
rename from meta-openembedded/meta-oe/recipes-support/syslog-ng/syslog-ng_3.37.1.bb
rename to meta-openembedded/meta-oe/recipes-support/syslog-ng/syslog-ng_3.38.1.bb
index 10d63cc..7694509 100644
--- a/meta-openembedded/meta-oe/recipes-support/syslog-ng/syslog-ng_3.37.1.bb
+++ b/meta-openembedded/meta-oe/recipes-support/syslog-ng/syslog-ng_3.38.1.bb
@@ -10,7 +10,7 @@
 HOMEPAGE = "http://www.balabit.com/network-security/syslog-ng/opensource-logging-system"
 
 LICENSE = "GPL-2.0-only & LGPL-2.1-only"
-LIC_FILES_CHKSUM = "file://COPYING;md5=189c3826d32deaf83ad8d0d538a10023"
+LIC_FILES_CHKSUM = "file://COPYING;md5=71d15c2fb22f43e1a380f3f799ebde30"
 
 # util-linux added to get libuuid
 DEPENDS = "libpcre flex glib-2.0 openssl util-linux bison-native"
@@ -24,7 +24,7 @@
            file://syslog-ng.service-the-syslog-ng-service.patch \
 "
 
-SRC_URI[sha256sum] = "d67a320cb896cd5d62f24d9e1bec138847fa4618ae13a3946cae2b75c528ee14"
+SRC_URI[sha256sum] = "5491f686d0b829b69b2e0fc0d66a62f51991aafaee005475bfa38fab399441f7"
 
 UPSTREAM_CHECK_URI = "https://github.com/balabit/syslog-ng/releases"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/tokyocabinet/tokyocabinet/0001-configure-Fix-check-functions-for-AC_CHECK_LIB-test.patch b/meta-openembedded/meta-oe/recipes-support/tokyocabinet/tokyocabinet/0001-configure-Fix-check-functions-for-AC_CHECK_LIB-test.patch
new file mode 100644
index 0000000..001fa9d
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/tokyocabinet/tokyocabinet/0001-configure-Fix-check-functions-for-AC_CHECK_LIB-test.patch
@@ -0,0 +1,58 @@
+From e192da006dd9b0ecbbef540a3e86b65ff88e89e2 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 13:28:29 -0700
+Subject: [PATCH] configure: Fix check functions for AC_CHECK_LIB test
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.in | 18 +++++++++---------
+ 1 file changed, 9 insertions(+), 9 deletions(-)
+
+diff --git a/configure.in b/configure.in
+index 5b24947..0542490 100644
+--- a/configure.in
++++ b/configure.in
+@@ -247,30 +247,30 @@ test -n "$LDFLAGS" && MYLDFLAGS="$LDFLAGS $MYLDFLAGS"
+ AC_C_BIGENDIAN(MYCPPFLAGS="$MYCPPFLAGS -D_MYBIGEND")
+ 
+ # Underlying libraries
+-AC_CHECK_LIB(c, main)
+-AC_CHECK_LIB(m, main)
++AC_CHECK_LIB(c, printf)
++AC_CHECK_LIB(m, pow)
+ if test "$enable_pthread" != "no"
+ then
+-  AC_CHECK_LIB(pthread, main)
+-  AC_CHECK_LIB(rt, main)
++  AC_CHECK_LIB(pthread, pthread_create)
++  AC_CHECK_LIB(rt, clock_gettime)
+ fi
+ if test "$enable_zlib" != "no"
+ then
+-  AC_CHECK_LIB(z, main)
++  AC_CHECK_LIB(z, inflate)
+ fi
+ if test "$enable_bzip" != "no"
+ then
+-  AC_CHECK_LIB(bz2, main)
++  AC_CHECK_LIB(bz2, BZ2_bzCompressInit)
+ fi
+ if test "$enable_exlzma" = "yes"
+ then
+-  AC_CHECK_LIB(lzma, main)
++  AC_CHECK_LIB(lzma, lzma_easy_buffer_encode)
+ fi
+ if test "$enable_exlzo" = "yes"
+ then
+-  AC_CHECK_LIB(lzo2, main)
++  AC_CHECK_LIB(lzo2, lzo1x_1_compress)
+ fi
+-AC_CHECK_LIB(tokyocabinet, main, AC_MSG_WARN([old version of Tokyo Cabinet was detected]))
++AC_CHECK_LIB(tokyocabinet, tcbdbopen, AC_MSG_WARN([old version of Tokyo Cabinet was detected]))
+ 
+ # Necessary headers
+ AC_CHECK_HEADER(stdlib.h, true, AC_MSG_ERROR([stdlib.h is required]))
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/tokyocabinet/tokyocabinet_1.4.48.bb b/meta-openembedded/meta-oe/recipes-support/tokyocabinet/tokyocabinet_1.4.48.bb
index 22cbcc0..ae00ff4 100644
--- a/meta-openembedded/meta-oe/recipes-support/tokyocabinet/tokyocabinet_1.4.48.bb
+++ b/meta-openembedded/meta-oe/recipes-support/tokyocabinet/tokyocabinet_1.4.48.bb
@@ -17,6 +17,7 @@
 
 SRC_URI = "http://fallabs.com/tokyocabinet/${BP}.tar.gz \
            file://remove-hard-coded-include-and-lib-paths.patch \
+           file://0001-configure-Fix-check-functions-for-AC_CHECK_LIB-test.patch \
 "
 
 SRC_URI[md5sum] = "fd03df6965f8f56dd5b8518ca43b4f5e"
diff --git a/meta-openembedded/meta-oe/recipes-support/tree/tree_2.0.3.bb b/meta-openembedded/meta-oe/recipes-support/tree/tree_2.0.4.bb
similarity index 86%
rename from meta-openembedded/meta-oe/recipes-support/tree/tree_2.0.3.bb
rename to meta-openembedded/meta-oe/recipes-support/tree/tree_2.0.4.bb
index c5f3364..5ef5336 100644
--- a/meta-openembedded/meta-oe/recipes-support/tree/tree_2.0.3.bb
+++ b/meta-openembedded/meta-oe/recipes-support/tree/tree_2.0.4.bb
@@ -5,7 +5,7 @@
 LIC_FILES_CHKSUM = "file://LICENSE;md5=393a5ca445f6965873eca0259a17f833"
 
 SRC_URI = "http://mama.indstate.edu/users/ice/tree/src/${BP}.tgz"
-SRC_URI[sha256sum] = "ba14e77b5f9dc7f8250c3f702ec5b6be2f93cd0fa87311bab3239676866a3b1d"
+SRC_URI[sha256sum] = "b0ea92197849579a3f09a50dbefc3d4708caf555d304a830e16e20b73b4ffa74"
 
 # tree's default CFLAGS for Linux
 CFLAGS += "-Wall -DLINUX -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64"
diff --git a/meta-openembedded/meta-oe/recipes-support/vboxguestdrivers/vboxguestdrivers_6.1.36.bb b/meta-openembedded/meta-oe/recipes-support/vboxguestdrivers/vboxguestdrivers_6.1.38.bb
similarity index 97%
rename from meta-openembedded/meta-oe/recipes-support/vboxguestdrivers/vboxguestdrivers_6.1.36.bb
rename to meta-openembedded/meta-oe/recipes-support/vboxguestdrivers/vboxguestdrivers_6.1.38.bb
index 7eb497a..235bc30 100644
--- a/meta-openembedded/meta-oe/recipes-support/vboxguestdrivers/vboxguestdrivers_6.1.36.bb
+++ b/meta-openembedded/meta-oe/recipes-support/vboxguestdrivers/vboxguestdrivers_6.1.38.bb
@@ -15,7 +15,7 @@
     file://Makefile.utils \
 "
 
-SRC_URI[sha256sum] = "e47942e42892c13c621869865e2b7b320340154f0fa74ecbdaf18fdaf70ef047"
+SRC_URI[sha256sum] = "56d997b58154df3974ce040a64970fa774add41e84b23dfb84b279b24545d7e4"
 
 S ?= "${WORKDIR}/vbox_module"
 S:task-patch = "${WORKDIR}/${VBOX_NAME}"
diff --git a/meta-openembedded/meta-oe/recipes-support/xorg-xrdp/xorgxrdp_0.2.18.bb b/meta-openembedded/meta-oe/recipes-support/xorg-xrdp/xorgxrdp_0.9.19.bb
similarity index 71%
rename from meta-openembedded/meta-oe/recipes-support/xorg-xrdp/xorgxrdp_0.2.18.bb
rename to meta-openembedded/meta-oe/recipes-support/xorg-xrdp/xorgxrdp_0.9.19.bb
index eadf8c1..9650d43 100644
--- a/meta-openembedded/meta-oe/recipes-support/xorg-xrdp/xorgxrdp_0.2.18.bb
+++ b/meta-openembedded/meta-oe/recipes-support/xorg-xrdp/xorgxrdp_0.9.19.bb
@@ -10,11 +10,11 @@
 inherit features_check
 REQUIRED_DISTRO_FEATURES = "x11 pam"
 
-SRC_URI = "git://github.com/neutrinolabs/xorgxrdp.git;branch=devel;protocol=https"
+SRC_URI = "git://github.com/neutrinolabs/xorgxrdp.git;branch=v0.9;protocol=https"
 
-SRCREV = "752b7659773a11ca6d4600645547c06fdb8a23d9"
+SRCREV = "d463bad9639c910fadc2f30dac473c7688b11cfc"
 
-PV = "0.2.18"
+PV = "0.9.19"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-oe/recipes-support/xrdp/xrdp/0001-mark-count-with-unused-attribute.patch b/meta-openembedded/meta-oe/recipes-support/xrdp/xrdp/0001-mark-count-with-unused-attribute.patch
new file mode 100644
index 0000000..77a394d
--- /dev/null
+++ b/meta-openembedded/meta-oe/recipes-support/xrdp/xrdp/0001-mark-count-with-unused-attribute.patch
@@ -0,0 +1,31 @@
+From 492f74dbea1d9a15fbc3e870e78ab52e7fc5583b Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 31 Aug 2022 20:19:32 -0700
+Subject: [PATCH] mark count with unused attribute
+
+This may throw a warning when devel logs are disabled
+Fixed
+../../../xrdp-0.9.19/sesman/chansrv/chansrv.c:198:9: error: variable 'count' set but not used [-Werror,-Wunused-but-set-variable]                                                                                                                     int count;                                                                                                                   ^                                                                                                                1 error generated.
+
+Upstream-Status: Submitted [https://github.com/neutrinolabs/xrdp/pull/2353]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ sesman/chansrv/chansrv.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/sesman/chansrv/chansrv.c b/sesman/chansrv/chansrv.c
+index 4452d998..b818bff3 100644
+--- a/sesman/chansrv/chansrv.c
++++ b/sesman/chansrv/chansrv.c
+@@ -195,7 +195,7 @@ check_timeout(void)
+     struct timeout_obj *tobj;
+     struct timeout_obj *last_tobj;
+     struct timeout_obj *temp_tobj;
+-    int count;
++    int count __attribute__((unused));
+     tui32 now;
+ 
+     LOG_DEVEL(LOG_LEVEL_DEBUG, "check_timeout:");
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-oe/recipes-support/xrdp/xrdp_0.9.19.bb b/meta-openembedded/meta-oe/recipes-support/xrdp/xrdp_0.9.19.bb
index 3e2e84f..b839a68 100644
--- a/meta-openembedded/meta-oe/recipes-support/xrdp/xrdp_0.9.19.bb
+++ b/meta-openembedded/meta-oe/recipes-support/xrdp/xrdp_0.9.19.bb
@@ -15,6 +15,7 @@
            file://0001-Added-req_distinguished_name-in-etc-xrdp-openssl.con.patch \
            file://0001-Fix-the-compile-error.patch \
            file://0001-arch-Define-NO_NEED_ALIGN-on-ppc64.patch \
+           file://0001-mark-count-with-unused-attribute.patch \
            "
 
 SRC_URI[sha256sum] = "94017d30e475c6d7a24f651e16791551862ae46f82d8de62385e63393f5f93d0"
diff --git a/meta-openembedded/meta-oe/recipes-support/zbar/zbar_git.bb b/meta-openembedded/meta-oe/recipes-support/zbar/zbar_git.bb
index 17084f8..3be1f27 100644
--- a/meta-openembedded/meta-oe/recipes-support/zbar/zbar_git.bb
+++ b/meta-openembedded/meta-oe/recipes-support/zbar/zbar_git.bb
@@ -24,7 +24,7 @@
     ${@bb.utils.contains('DISTRO_FEATURES', 'x11', 'x11', '', d)} \
 "
 
-PACKAGECONFIG ??= "video python3"
+PACKAGECONFIG ??= "video"
 
 inherit autotools pkgconfig gettext \
     ${@bb.utils.contains('PACKAGECONFIG', 'python3', 'python3native', '', d)} \
diff --git a/meta-openembedded/meta-oe/recipes-test/bats/bats_1.7.0.bb b/meta-openembedded/meta-oe/recipes-test/bats/bats_1.8.0.bb
similarity index 94%
rename from meta-openembedded/meta-oe/recipes-test/bats/bats_1.7.0.bb
rename to meta-openembedded/meta-oe/recipes-test/bats/bats_1.8.0.bb
index 81235e2..4b90dcc 100644
--- a/meta-openembedded/meta-oe/recipes-test/bats/bats_1.7.0.bb
+++ b/meta-openembedded/meta-oe/recipes-test/bats/bats_1.8.0.bb
@@ -11,7 +11,7 @@
   "
 
 # v1.7.0
-SRCREV = "410dd229a5ed005c68167cc90ed0712ad2a1c909"
+SRCREV = "e9b286bb39ad7b0cb7b7d2e819d44d1aff387522"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-perl/recipes-perl/libconfig-tiny/libconfig-tiny-perl_2.28.bb b/meta-openembedded/meta-perl/recipes-perl/libconfig-tiny/libconfig-tiny-perl_2.28.bb
new file mode 100644
index 0000000..ff28c76
--- /dev/null
+++ b/meta-openembedded/meta-perl/recipes-perl/libconfig-tiny/libconfig-tiny-perl_2.28.bb
@@ -0,0 +1,20 @@
+SUMMARY = "Perl module for read/write .ini style files"
+DESCRIPTION = "Config::Tiny is a Perl class to read and write .ini \
+configuration files with as little code as possible, reducing load time and \
+memory overhead."
+
+HOMEPAGE = "https://search.cpan.org/dist/Config-Tiny"
+SECTION = "libraries"
+
+LICENSE = "Artistic-1.0 | GPL-1.0-or-later"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=ab27c3cedbdb0eb6e656a8722476191a"
+
+RDEPENDS:${PN} += "perl"
+
+S = "${WORKDIR}/Config-Tiny-${PV}"
+
+SRC_URI = "http://search.cpan.org/CPAN/authors/id/R/RS/RSAVAGE/Config-Tiny-${PV}.tgz"
+
+SRC_URI[sha256sum] = "12df843a0d29d48f61bcc14c4f18f0858fd27a8dd829a00319529d654fe01500"
+
+inherit cpan
diff --git a/meta-openembedded/meta-perl/recipes-perl/libcurses/libcurses-perl/0001-testtyp.c-Use-proper-prototype-for-main-function.patch b/meta-openembedded/meta-perl/recipes-perl/libcurses/libcurses-perl/0001-testtyp.c-Use-proper-prototype-for-main-function.patch
new file mode 100644
index 0000000..ab5af3f
--- /dev/null
+++ b/meta-openembedded/meta-perl/recipes-perl/libcurses/libcurses-perl/0001-testtyp.c-Use-proper-prototype-for-main-function.patch
@@ -0,0 +1,40 @@
+From 01d0904a987ec3d8850ae3b54252c9d3bbe75962 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 30 Aug 2022 15:43:42 -0700
+Subject: [PATCH] testtyp.c: Use proper prototype for main function
+
+Newer compilers like clang-15+ will complain about the prototype
+vigorously, and the test will end up failing for this reason instead of
+real check it should be failing/passing for.
+
+Fixes
+testint.c:11:1: error: type specifier missing, defaults to 'int'; ISO C99 and later do not support implicit int [-Wimplicit-int]
+main() {
+^
+int
+1 error generated.
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ testtyp.c | 5 +++--
+ 1 file changed, 3 insertions(+), 2 deletions(-)
+
+diff --git a/testtyp.c b/testtyp.c
+index 949c3d4..4042f73 100644
+--- a/testtyp.c
++++ b/testtyp.c
+@@ -7,7 +7,8 @@
+ #define _XOPEN_SOURCE_EXTENDED 1  /* We expect wide character functions */
+ 
+ #include "c-config.h"
+-
+-main() {
++int
++main(int argc, char *argv[]) {
+   typedef SYM c_sym_t;
++  return 0;
+ }
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-perl/recipes-perl/libcurses/libcurses-perl_1.41.bb b/meta-openembedded/meta-perl/recipes-perl/libcurses/libcurses-perl_1.41.bb
index 6a6f012..421823d 100644
--- a/meta-openembedded/meta-perl/recipes-perl/libcurses/libcurses-perl_1.41.bb
+++ b/meta-openembedded/meta-perl/recipes-perl/libcurses/libcurses-perl_1.41.bb
@@ -8,13 +8,19 @@
 
 DEPENDS += "perl ncurses "
 
-SRC_URI = "http://www.cpan.org/authors/id/G/GI/GIRAFFED/Curses-${PV}.tar.gz"
+SRC_URI = "http://www.cpan.org/authors/id/G/GI/GIRAFFED/Curses-${PV}.tar.gz \
+           file://0001-testtyp.c-Use-proper-prototype-for-main-function.patch \
+           "
 
 SRC_URI[sha256sum] = "fb9efea8c7b5ed5f8ea5dee49d35252accfc05ee6e75cb9a37ab7c847cd261d7"
 
 S = "${WORKDIR}/Curses-${PV}"
 
-EXTRA_CPANFLAGS = "INC=-I${STAGING_INCDIR} LIBS=-L${STAGING_LIBDIR}"
+EXTRA_CPANFLAGS = "INC=-I${STAGING_INCDIR} LIBS=-L${STAGING_LIBDIR} TEST_SYMS_OPTS=-v"
+
+# enable NCURSES_WIDECHAR=1 only if ENABLE_WIDEC has not been explicitly disabled (e.g. by the distro config).
+# When compiling against the ncurses library, NCURSES_WIDECHAR needs to explicitly set to 0 in this case.
+CFLAGS:append:libc-musl = "${@' -DNCURSES_WIDECHAR=1' if bb.utils.to_boolean((d.getVar('ENABLE_WIDEC') or 'True')) else ' -DNCURSES_WIDECHAR=0'} -DNCURSES_INTERNALS"
 
 inherit cpan
 
diff --git a/meta-openembedded/meta-perl/recipes-perl/libio/libio-socket-ssl-perl_2.074.bb b/meta-openembedded/meta-perl/recipes-perl/libio/libio-socket-ssl-perl_2.075.bb
similarity index 93%
rename from meta-openembedded/meta-perl/recipes-perl/libio/libio-socket-ssl-perl_2.074.bb
rename to meta-openembedded/meta-perl/recipes-perl/libio/libio-socket-ssl-perl_2.075.bb
index 6249fd1..0a88d93 100644
--- a/meta-openembedded/meta-perl/recipes-perl/libio/libio-socket-ssl-perl_2.074.bb
+++ b/meta-openembedded/meta-perl/recipes-perl/libio/libio-socket-ssl-perl_2.075.bb
@@ -22,7 +22,7 @@
 SRC_URI = "http://search.cpan.org/CPAN/authors/id/S/SU/SULLR/IO-Socket-SSL-${PV}.tar.gz \
            file://run-ptest \
           "
-SRC_URI[sha256sum] = "36486b6be49da4d029819cf7069a7b41ed48af0c87e23be0f8e6aba23d08a832"
+SRC_URI[sha256sum] = "c30ee2220b1e181a968ebbc81861d0cadf334b001377a44105ae5a8637ddae8c"
 
 S = "${WORKDIR}/IO-Socket-SSL-${PV}"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-aiofiles_0.8.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-aiofiles_22.1.0.bb
similarity index 80%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-aiofiles_0.8.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-aiofiles_22.1.0.bb
index d50cb8e..40ac76e 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-aiofiles_0.8.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-aiofiles_22.1.0.bb
@@ -4,7 +4,7 @@
 LICENSE = "Apache-2.0"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=d2794c0df5b907fdace235a619d80314"
 
-SRC_URI[sha256sum] = "8334f23235248a3b2e83b2c3a78a22674f39969b96397126cc93664d9a901e59"
+SRC_URI[sha256sum] = "9107f1ca0b2a5553987a94a3c9959fe5b491fdf731389aa5b7b1bd0733e32de6"
 
 PYPI_PACKAGE = "aiofiles"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-aiohttp_3.8.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-aiohttp_3.8.3.bb
similarity index 80%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-aiohttp_3.8.1.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-aiohttp_3.8.3.bb
index f2b8d52..b233b3a 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-aiohttp_3.8.1.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-aiohttp_3.8.3.bb
@@ -2,9 +2,9 @@
 DESCRIPTION = "Asynchronous HTTP client/server framework for asyncio and Python"
 HOMEPAGE = "https://github.com/aio-libs/aiohttp"
 LICENSE = "Apache-2.0"
-LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=8074d6c6e217873b2a018a4522243ea3"
+LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=748073912af33aa59430d3702aa32d41"
 
-SRC_URI[sha256sum] = "fc5471e1a54de15ef71c1bc6ebe80d4dc681ea600e68bfd1cbce40427f0b7578"
+SRC_URI[sha256sum] = "3828fb41b7203176b82fe5d699e0d845435f2374750a44b480ea6b930f6be269"
 
 PYPI_PACKAGE = "aiohttp"
 inherit python_setuptools_build_meta pypi
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-antlr4-runtime_4.10.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-antlr4-runtime_4.10.bb
deleted file mode 100644
index 01f1720..0000000
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-antlr4-runtime_4.10.bb
+++ /dev/null
@@ -1,11 +0,0 @@
-SUMMARY = "ANTLR runtime for Python"
-LICENSE = "BSD-3-Clause"
-LIC_FILES_CHKSUM = "file://PKG-INFO;md5=6e514123aedb5d9fb133d1bc6c598d46"
-
-SRC_URI[sha256sum] = "061a49bc72ae05a35d9b61c0ba0ac36c0397708819f02fbfb20a80e47d287a1b"
-
-PYPI_PACKAGE = "antlr4-python3-runtime"
-
-inherit pypi python_setuptools_build_meta
-
-BBCLASSEXTEND = "nativesdk native"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-antlr4-runtime_4.11.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-antlr4-runtime_4.11.1.bb
new file mode 100644
index 0000000..535dc02
--- /dev/null
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-antlr4-runtime_4.11.1.bb
@@ -0,0 +1,11 @@
+SUMMARY = "ANTLR runtime for Python"
+LICENSE = "BSD-3-Clause"
+LIC_FILES_CHKSUM = "file://PKG-INFO;md5=9a0a1d20e667cf7ab7c59357cf8b4812"
+
+SRC_URI[sha256sum] = "a53de701312f9bdacc5258a6872cd6c62b90d3a90ae25e494026f76267333b60"
+
+PYPI_PACKAGE = "antlr4-python3-runtime"
+
+inherit pypi python_setuptools_build_meta
+
+BBCLASSEXTEND = "nativesdk native"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-astroid_2.12.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-astroid_2.12.10.bb
similarity index 89%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-astroid_2.12.2.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-astroid_2.12.10.bb
index ae2a410..c1d746b 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-astroid_2.12.2.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-astroid_2.12.10.bb
@@ -4,7 +4,7 @@
 LICENSE = "LGPL-2.1-only"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=a70cf540abf41acb644ac3b621b2fad1"
 
-SRC_URI[sha256sum] = "4675ef501edbbb143b3d9bb4c81d5f6338f08f960beed2ce41a03dc4cd20d777"
+SRC_URI[sha256sum] = "81f870105d892e73bf535da77a8261aa5bde838fa4ed12bb2f435291a098c581"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-awesomeversion_22.8.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-awesomeversion_22.9.0.bb
similarity index 82%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-awesomeversion_22.8.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-awesomeversion_22.9.0.bb
index 550a8c4..20ed189 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-awesomeversion_22.8.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-awesomeversion_22.9.0.bb
@@ -4,7 +4,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENCE.md;md5=92622b5a8e216099be741d78328bae5d"
 
-SRC_URI[sha256sum] = "0469a801faafb3a7a13c529edc977ea04c3bce825af9ebb602f58012bc487db5"
+SRC_URI[sha256sum] = "2f4190d333e81e10b2a4e156150ddb3596f5f11da67e9d51ba39057aa7a17f7e"
 
 RDEPENDS:${PN} += "python3-profile python3-logging"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-cachecontrol_0.12.11.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-cachecontrol_0.12.12.bb
similarity index 88%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-cachecontrol_0.12.11.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-cachecontrol_0.12.12.bb
index fc2b0ae..8f7185b 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-cachecontrol_0.12.11.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-cachecontrol_0.12.12.bb
@@ -7,7 +7,7 @@
 # override PYPI_PACKAGE so fetch succeeds.
 PYPI_PACKAGE = "CacheControl"
 
-SRC_URI[sha256sum] = "a5b9fcc986b184db101aa280b42ecdcdfc524892596f606858e0b7a8b4d9e144"
+SRC_URI[sha256sum] = "9c2e5208ea76ebd9921176569743ddf6d7f3bb4188dbf61806f0f8fc48ecad38"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-cantools_37.1.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-cantools_37.2.0.bb
similarity index 83%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-cantools_37.1.2.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-cantools_37.2.0.bb
index ec437ee..7d15934 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-cantools_37.1.2.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-cantools_37.2.0.bb
@@ -3,7 +3,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=d9aa4ec07de78abae21c490c9ffe61bd"
 
-SRC_URI[sha256sum] = "0d84b879a18d869d182023cdebae9318095a8959ceee6309de59fd3c399dbfef"
+SRC_URI[sha256sum] = "bbb0e802af02a1dc792c32d10b61b51deaefc1c8e9c776b50d2d2e194b702fac"
 
 PYPI_PACKAGE = "cantools"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-colorlog_6.6.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-colorlog_6.7.0.bb
similarity index 75%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-colorlog_6.6.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-colorlog_6.7.0.bb
index 2f8a0e5..84b67e7 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-colorlog_6.6.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-colorlog_6.7.0.bb
@@ -7,4 +7,4 @@
 
 PYPI_PACKAGE = "colorlog"
 
-SRC_URI[sha256sum] = "344f73204009e4c83c5b6beb00b3c45dc70fcdae3c80db919e0a4171d006fde8"
+SRC_URI[sha256sum] = "bd94bd21c1e13fac7bd3153f4bc3a7dc0eb0974b8bc2fdf1a989e474f6e582e5"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-croniter_1.3.5.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-croniter_1.3.7.bb
similarity index 77%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-croniter_1.3.5.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-croniter_1.3.7.bb
index 92962c0..c84affa 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-croniter_1.3.5.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-croniter_1.3.7.bb
@@ -5,7 +5,7 @@
 
 PYPI_PACKAGE = "croniter"
 
-SRC_URI[sha256sum] = "7592fc0e8a00d82af98dfa2768b75983b6fb4c2adc8f6d0d7c931a715b7cefee"
+SRC_URI[sha256sum] = "72ef78d0f8337eb35393b8893ebfbfbeb340f2d2ae47e0d2d78130e34b0dd8b9"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-django_3.2.12.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-django_3.2.12.bb
deleted file mode 100644
index 17d402d..0000000
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-django_3.2.12.bb
+++ /dev/null
@@ -1,9 +0,0 @@
-require python-django.inc
-inherit setuptools3
-
-SRC_URI[sha256sum] = "9772e6935703e59e993960832d66a614cf0233a1c5123bc6224ecc6ad69e41e2"
-
-RDEPENDS:${PN} += "\
-    ${PYTHON_PN}-sqlparse \
-    ${PYTHON_PN}-asgiref \
-"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-django_4.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-django_4.1.bb
new file mode 100644
index 0000000..44ea539
--- /dev/null
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-django_4.1.bb
@@ -0,0 +1,9 @@
+require python-django.inc
+inherit setuptools3
+
+SRC_URI[sha256sum] = "032f8a6fc7cf05ccd1214e4a2e21dfcd6a23b9d575c6573cacc8c67828dbe642"
+
+RDEPENDS:${PN} += "\
+    ${PYTHON_PN}-sqlparse \
+    ${PYTHON_PN}-asgiref \
+"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-djangorestframework_3.13.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-djangorestframework_3.14.0.bb
similarity index 79%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-djangorestframework_3.13.1.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-djangorestframework_3.14.0.bb
index be85827..fee8a3a 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-djangorestframework_3.13.1.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-djangorestframework_3.14.0.bb
@@ -5,7 +5,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE.md;md5=7879a5a716147a784f7e524c9cf103c1"
 
-SRC_URI[sha256sum] = "0c33407ce23acc68eca2a6e46424b008c9c02eceb8cf18581921d0092bc1f2ee"
+SRC_URI[sha256sum] = "579a333e6256b09489cbe0a067e66abe55c6595d8926be6b99423786334350c8"
 
 PYPI_PACKAGE = "djangorestframework"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-elementpath_3.0.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-elementpath_3.0.2.bb
similarity index 69%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-elementpath_3.0.1.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-elementpath_3.0.2.bb
index 13468eb..661259f 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-elementpath_3.0.1.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-elementpath_3.0.2.bb
@@ -3,13 +3,17 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=5dbb7fb7d72da3921202dd7b995d3ecf"
 
-SRC_URI[sha256sum] = "a75eed5aad3dad92ba577b1f3a268d8c3c98ceeda91cb8abae4269e920e7c8f6"
+SRC_URI[sha256sum] = "cca18742dc0f354f79874c41a906e6ce4cc15230b7858d22a861e1ec5946940f"
 
 PYPI_PACKAGE = "elementpath"
 inherit pypi setuptools3
 
 RDEPENDS:${PN} += " \
     ${PYTHON_PN}-xml \
+    ${PYTHON_PN}-core \
+    ${PYTHON_PN}-numbers \
+    ${PYTHON_PN}-datetime \
+    ${PYTHON_PN}-stringold \
 "
 
 BBCLASSEXTEND = "native nativesdk"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-email-validator_1.2.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-email-validator_1.3.0.bb
similarity index 77%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-email-validator_1.2.1.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-email-validator_1.3.0.bb
index a5cb09f..76dfdd6 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-email-validator_1.2.1.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-email-validator_1.3.0.bb
@@ -3,7 +3,7 @@
 LICENSE = "CC0-1.0"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=65d3616852dbf7b1a6d4b53b00626032"
 
-SRC_URI[sha256sum] = "6757aea012d40516357c0ac2b1a4c31219ab2f899d26831334c5d069e8b6c3d8"
+SRC_URI[sha256sum] = "553a66f8be2ec2dea641ae1d3f29017ab89e9d603d4a25cdaac39eefa283d769"
 
 PYPI_PACKAGE = "email_validator"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-eth-typing_3.1.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-eth-typing_3.2.0.bb
similarity index 74%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-eth-typing_3.1.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-eth-typing_3.2.0.bb
index 0e274c2..b810341 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-eth-typing_3.1.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-eth-typing_3.2.0.bb
@@ -4,6 +4,6 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=bf9691ead96f1163622689e47ce3f366"
 
-SRC_URI[sha256sum] = "92698268be83c75f65a028d47421f7c0aed459be76dd7ce916af166dca8bc53e"
+SRC_URI[sha256sum] = "177e2070da9bf557fe0fd46ee467a7be2d0b6476aa4dc18680603e7da1fc5690"
 
 inherit pypi setuptools3
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-fasteners_0.17.3.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-fasteners_0.18.bb
similarity index 78%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-fasteners_0.17.3.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-fasteners_0.18.bb
index 51691b7..5f63891 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-fasteners_0.17.3.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-fasteners_0.18.bb
@@ -3,7 +3,7 @@
 LICENSE = "Apache-2.0"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=4476c4be31402271e101d9a4a3430d52"
 
-SRC_URI[sha256sum] = "a9a42a208573d4074c77d041447336cf4e3c1389a256fd3e113ef59cf29b7980"
+SRC_URI[sha256sum] = "cb7c13ef91e0c7e4fe4af38ecaf6b904ec3f5ce0dda06d34924b6b74b869d953"
 
 inherit pypi python_setuptools_build_meta
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-flask-socketio_5.2.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-flask-socketio_5.3.1.bb
similarity index 81%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-flask-socketio_5.2.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-flask-socketio_5.3.1.bb
index 9deae8b..50e820b 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-flask-socketio_5.2.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-flask-socketio_5.3.1.bb
@@ -9,7 +9,7 @@
 
 PYPI_PACKAGE = "Flask-SocketIO"
 
-SRC_URI[sha256sum] = "19c3d0cea49c53505fa457fedc133b32cb6eeaaa30d28cdab9d6ca8f16045427"
+SRC_URI[sha256sum] = "fd0ed0fc1341671d92d5f5b2f5503916deb7aa7e2940e6636cfa2c087c828bf9"
 
 RDEPENDS:${PN} += "\
     ${PYTHON_PN}-flask \
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-gnupg_0.4.9.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-gnupg_0.5.0.bb
similarity index 78%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-gnupg_0.4.9.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-gnupg_0.5.0.bb
index 9e13e4c..b97c4f3 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-gnupg_0.4.9.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-gnupg_0.5.0.bb
@@ -4,7 +4,7 @@
 LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=5dabe659eadd6d97325b1582e41cfc11"
 
 PYPI_PACKAGE = "python-gnupg"
-SRC_URI[sha256sum] = "aaa748795572591aaf127b4ac8985684f3673ff82b39f370c836b006e68fc537"
+SRC_URI[sha256sum] = "70758e387fc0e0c4badbcb394f61acbe68b34970a8fed7e0f7c89469fe17912a"
 
 inherit pypi python_setuptools_build_meta
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-core_2.8.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-core_2.10.1.bb
similarity index 87%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-core_2.8.2.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-core_2.10.1.bb
index a2df7fb..c7ded90 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-core_2.8.2.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-core_2.10.1.bb
@@ -6,7 +6,7 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "06f7244c640322b508b125903bb5701bebabce8832f85aba9335ec00b3d02edc"
+SRC_URI[sha256sum] = "e16c15a11789bc5a3457afb2818a3540a03f341e6e710d7f9bbf6cde2ef4a7c8"
 
 RDEPENDS:${PN} += "\
     ${PYTHON_PN}-asyncio \
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-python-client_2.57.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-python-client_2.62.0.bb
similarity index 86%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-python-client_2.57.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-python-client_2.62.0.bb
index fdbad82..abceda8 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-python-client_2.57.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-google-api-python-client_2.62.0.bb
@@ -4,7 +4,7 @@
 LICENSE = "Apache-2.0"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=86d3f3a95c324c9479bd8986968f4327"
 
-SRC_URI[sha256sum] = "ec4412545b0c5978a833bb03993a46121ad2c700f32af0cba23f8439b3f5fb02"
+SRC_URI[sha256sum] = "8307b85f0b8f84252747326de50eda279220cc1a3966d6b82e94486618275637"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-google-auth_2.11.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-google-auth_2.11.1.bb
similarity index 87%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-google-auth_2.11.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-google-auth_2.11.1.bb
index d1698f5..747a57e 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-google-auth_2.11.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-google-auth_2.11.1.bb
@@ -6,7 +6,7 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "ed65ecf9f681832298e29328e1ef0a3676e3732b2e56f41532d45f70a22de0fb"
+SRC_URI[sha256sum] = "516e6623038b81430dd062a1a25ecd24f173d7c15cdf4e48a9e78bc87e97aeec"
 
 RDEPENDS:${PN} += "\
     ${PYTHON_PN}-asyncio \
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-greenlet_1.1.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-greenlet_1.1.3.bb
similarity index 78%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-greenlet_1.1.2.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-greenlet_1.1.3.bb
index d14987b..5175212 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-greenlet_1.1.2.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-greenlet_1.1.3.bb
@@ -4,6 +4,6 @@
 LIC_FILES_CHKSUM = "file://LICENSE;md5=e95668d68e4329085c7ab3535e6a7aee \
                     file://LICENSE.PSF;md5=c106931d9429eda0492617f037b8f69a"
 
-SRC_URI[sha256sum] = "e30f5ea4ae2346e62cedde8794a56858a67b878dd79f7df76a0767e356b1744a"
+SRC_URI[sha256sum] = "bcb6c6dd1d6be6d38d6db283747d07fda089ff8c559a835236560a4410340455"
 
 inherit pypi setuptools3
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools/0001-protobuf-Disable-musttail-attribute-on-mips.patch b/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools/0001-protobuf-Disable-musttail-attribute-on-mips.patch
new file mode 100644
index 0000000..8b13d7a
--- /dev/null
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools/0001-protobuf-Disable-musttail-attribute-on-mips.patch
@@ -0,0 +1,24 @@
+From 64d737318656286f69ddc5ad654072785ddcbbed Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 16 Sep 2022 19:09:15 -0700
+Subject: [PATCH] protobuf: Disable musttail attribute on mips
+
+See https://github.com/llvm/llvm-project/issues/57795
+
+Upstream-Status: Pending
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+Signed-off-by: Wang Mingyu <wangmy@fujitsu.com>
+---
+ third_party/protobuf/src/google/protobuf/port_def.inc | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+--- a/third_party/protobuf/src/google/protobuf/port_def.inc
++++ b/third_party/protobuf/src/google/protobuf/port_def.inc
+@@ -255,6 +255,7 @@
+ #error PROTOBUF_TAILCALL was previously defined
+ #endif
+ #if __has_cpp_attribute(clang::musttail) && !defined(__arm__) && \
++    !defined(__mips__) &&                                        \
+     !defined(_ARCH_PPC) && !defined(__wasm__) &&                 \
+     !(defined(_MSC_VER) && defined(_M_IX86)) &&                  \
+     !(defined(__NDK_MAJOR__) && __NDK_MAJOR <= 24)
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools_1.48.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools_1.49.1.bb
similarity index 68%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools_1.48.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools_1.49.1.bb
index fcd2840..e1b8a75 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools_1.48.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio-tools_1.49.1.bb
@@ -9,8 +9,10 @@
 
 DEPENDS += "${PYTHON_PN}-grpcio"
 
-SRC_URI += "file://0001-setup.py-Do-not-mix-C-and-C-compiler-options.patch"
-SRC_URI[sha256sum] = "dd7f757608e7dfae4ab2e7fc1e8951e6eb9526ebdc7ce90597329bc4c408c9a1"
+SRC_URI += "file://0001-setup.py-Do-not-mix-C-and-C-compiler-options.patch \
+            file://0001-protobuf-Disable-musttail-attribute-on-mips.patch \
+            "
+SRC_URI[sha256sum] = "84cc64e5b46bad43d5d7bd2fd772b656eba0366961187a847e908e2cb735db91"
 
 RDEPENDS:${PN} = "${PYTHON_PN}-grpcio"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio/0001-setup.py-Do-not-mix-C-and-C-compiler-options.patch b/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio/0001-setup.py-Do-not-mix-C-and-C-compiler-options.patch
index f416ca2..1f465db 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio/0001-setup.py-Do-not-mix-C-and-C-compiler-options.patch
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio/0001-setup.py-Do-not-mix-C-and-C-compiler-options.patch
@@ -23,16 +23,17 @@
 
 Signed-off-by: Khem Raj <raj.khem@gmail.com>
 Signed-off-by: Hongxu Jia <hongxu.jia@windriver.com>
+Signed-off-by: Wang Mingyu <wangmy@fujitsu.com>
 ---
- setup.py                      | 9 ++++++---
- src/python/grpcio/commands.py | 5 ++++-
- 2 files changed, 10 insertions(+), 4 deletions(-)
+ setup.py                      | 11 +++++++----
+ src/python/grpcio/commands.py |  5 ++++-
+ 2 files changed, 11 insertions(+), 5 deletions(-)
 
 diff --git a/setup.py b/setup.py
-index ec27fe4..b2d2d1a 100644
+index 3a50c97..bb2386d 100644
 --- a/setup.py
 +++ b/setup.py
-@@ -206,8 +206,11 @@ def check_linker_need_libatomic():
+@@ -205,8 +205,11 @@ def check_linker_need_libatomic():
      """Test if linker on system needs libatomic."""
      code_test = (b'#include <atomic>\n' +
                   b'int main() { return std::atomic<int64_t>{}; }')
@@ -46,15 +47,17 @@
                                  stdin=PIPE,
                                  stdout=PIPE,
                                  stderr=PIPE)
-@@ -217,7 +220,7 @@ def check_linker_need_libatomic():
+@@ -215,8 +218,8 @@ def check_linker_need_libatomic():
+         return False
      # Double-check to see if -latomic actually can solve the problem.
      # https://github.com/grpc/grpc/issues/22491
-     cpp_test = subprocess.Popen(
--        [cxx, '-x', 'c++', '-std=c++14', '-', '-latomic'],
-+        [cxx, cxx_args, '-x', 'c++', '-std=c++14', '-', '-latomic'],
-         stdin=PIPE,
-         stdout=PIPE,
-         stderr=PIPE)
+-    cpp_test = subprocess.Popen(cxx +
+-                                ['-x', 'c++', '-std=c++14', '-', '-latomic'],
++    cpp_test = subprocess.Popen(
++                                [cxx, cxx_args, '-x', 'c++', '-std=c++14', '-', '-latomic'],
+                                 stdin=PIPE,
+                                 stdout=PIPE,
+                                 stderr=PIPE)
 diff --git a/src/python/grpcio/commands.py b/src/python/grpcio/commands.py
 index d93b6c7..a8c4a51 100644
 --- a/src/python/grpcio/commands.py
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio_1.48.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio_1.49.1.bb
similarity index 93%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio_1.48.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio_1.49.1.bb
index a16b880..8efcfdc 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio_1.48.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-grpcio_1.49.1.bb
@@ -12,7 +12,7 @@
                                 file://mips_bigendian.patch \
                                 file://abseil-ppc-fixes.patch;patchdir=third_party/abseil-cpp \
 "
-SRC_URI[sha256sum] = "eaf4bb73819863440727195411ab3b5c304f6663625e66f348e91ebe0a039306"
+SRC_URI[sha256sum] = "d4725fc9ec8e8822906ae26bb26f5546891aa7fbc3443de970cc556d43a5c99f"
 
 RDEPENDS:${PN} = "${PYTHON_PN}-protobuf \
                   ${PYTHON_PN}-setuptools \
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-h11_0.13.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-h11_0.14.0.bb
similarity index 73%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-h11_0.13.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-h11_0.14.0.bb
index afc6439..7bda0ea 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-h11_0.13.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-h11_0.14.0.bb
@@ -5,4 +5,4 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "70813c1135087a248a4d38cc0e1a0181ffab2188141a93eaf567940c3957ff06"
+SRC_URI[sha256sum] = "8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-h5py/0001-fix-wrong-file-driver-version.patch b/meta-openembedded/meta-python/recipes-devtools/python/python3-h5py/0001-fix-wrong-file-driver-version.patch
new file mode 100644
index 0000000..2692acd
--- /dev/null
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-h5py/0001-fix-wrong-file-driver-version.patch
@@ -0,0 +1,53 @@
+From 5b0b1d0b941ba338d449f9261bdf4cb2b679d048 Mon Sep 17 00:00:00 2001
+From: Hongxu Jia <hongxu.jia@windriver.com>
+Date: Tue, 20 Sep 2022 02:53:11 -0700
+Subject: [PATCH] fix wrong file driver version
+
+Due to commit [1] applied in hdf5 (1.13.2), import hdf5 failed
+
+|>>> import h5py
+|Traceback (most recent call last):
+|  File "<stdin>", line 1, in <module>
+|  File "/usr/lib/python3.10/site-packages/h5py/__init__.py", line 56, in <module>
+|    from . import h5a, h5d, h5ds, h5f, h5fd, h5g, h5r, h5s, h5t, h5p, h5z, h5pl
+|  File "h5py/h5fd.pyx", line 220, in init h5py.h5fd
+|RuntimeError: Wrong file driver version # (wrong file driver version #)
+
+Initial driver version to fix the error
+
+[1] https://github.com/HDFGroup/hdf5/commit/42b767fc67ad1e13735e3cee2077f2e108f9463e
+
+Upstream-Status: Submitted  [https://github.com/h5py/h5py/pull/2153]
+Signed-off-by: Hongxu Jia <hongxu.jia@windriver.com>
+---
+ h5py/api_types_hdf5.pxd | 1 +
+ h5py/h5fd.pyx           | 1 +
+ 2 files changed, 2 insertions(+)
+
+diff --git a/h5py/api_types_hdf5.pxd b/h5py/api_types_hdf5.pxd
+index 6977f1a7..312fdaa0 100644
+--- a/h5py/api_types_hdf5.pxd
++++ b/h5py/api_types_hdf5.pxd
+@@ -237,6 +237,7 @@ cdef extern from "hdf5.h":
+ 
+   # Class information for each file driver
+   ctypedef struct H5FD_class_t:
++    unsigned version;
+     const char *name
+     haddr_t maxaddr
+     H5F_close_degree_t fc_degree
+diff --git a/h5py/h5fd.pyx b/h5py/h5fd.pyx
+index 04aff077..d41953d0 100644
+--- a/h5py/h5fd.pyx
++++ b/h5py/h5fd.pyx
+@@ -191,6 +191,7 @@ cdef herr_t H5FD_fileobj_flush(H5FD_fileobj_t *f, hid_t dxpl, hbool_t closing) e
+ cdef H5FD_class_t info
+ memset(&info, 0, sizeof(info))
+ 
++info.version = 0x01
+ info.name = 'fileobj'
+ info.maxaddr = libc.stdint.SIZE_MAX - 1
+ info.fc_degree = H5F_CLOSE_WEAK
+-- 
+2.37.1
+
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-h5py_3.7.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-h5py_3.7.0.bb
index 1153ea3..ce4f05d 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-h5py_3.7.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-h5py_3.7.0.bb
@@ -8,6 +8,7 @@
 
 SRC_URI:append = " \
            file://0001-setup_build.py-avoid-absolute-path.patch \
+           file://0001-fix-wrong-file-driver-version.patch \
           "
 
 inherit pkgconfig pypi setuptools3
@@ -28,4 +29,4 @@
                   python3-json \
                  "
 
-export HDF5_VERSION="1.8.21"
+export HDF5_VERSION="1.13.2"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-haversine_2.6.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-haversine_2.7.0.bb
similarity index 74%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-haversine_2.6.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-haversine_2.7.0.bb
index 6577918..bfe36b0 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-haversine_2.6.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-haversine_2.7.0.bb
@@ -2,7 +2,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://${COMMON_LICENSE_DIR}/MIT;md5=0835ade698e0bcf8506ecda2f7b4f302"
 
-SRC_URI[sha256sum] = "eb7c308ba721e86662c1d50427cb9b06f9e7eb9984803d9ec200582425cc4fb7"
+SRC_URI[sha256sum] = "9dd62c95bff9c43eb898604625e80db68b8b9e91a5111338f55ebcf470dd5a3d"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-humanize_4.3.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-humanize_4.4.0.bb
similarity index 82%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-humanize_4.3.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-humanize_4.4.0.bb
index 55386b6..7bb03b7 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-humanize_4.3.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-humanize_4.4.0.bb
@@ -5,7 +5,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENCE;md5=4ecc42519e84f6f3e23529464df7bd1d"
 
-SRC_URI[sha256sum] = "0dfac79fe8c1c0c734c14177b07b857bad9ae30dd50daa0a14e2c3d8054ee0c4"
+SRC_URI[sha256sum] = "efb2584565cc86b7ea87a977a15066de34cdedaf341b11c851cfcfd2b964779c"
 
 inherit pypi python_setuptools_build_meta
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-imageio_2.21.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-imageio_2.22.0.bb
similarity index 81%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-imageio_2.21.1.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-imageio_2.22.0.bb
index 5c792a5..6bc7b60 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-imageio_2.21.1.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-imageio_2.22.0.bb
@@ -5,7 +5,7 @@
 LICENSE = "BSD-2-Clause"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=24cb9a367a9e641b459a01c4d15256ba"
 
-SRC_URI[sha256sum] = "5f0278217c1cf99d90ef855dab948f93d9fce0ab7ab388e13a597c706b7ec4e5"
+SRC_URI[sha256sum] = "a332d127ec387b2d3dca967fd065a90f1c1a4ba2343570b03fe2cebb6ed064ea"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-joblib_1.1.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-joblib_1.2.0.bb
similarity index 80%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-joblib_1.1.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-joblib_1.2.0.bb
index e69cfef..ec02265 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-joblib_1.1.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-joblib_1.2.0.bb
@@ -4,7 +4,7 @@
 
 inherit setuptools3 pypi
 
-SRC_URI[sha256sum] = "4158fcecd13733f8be669be0683b96ebdbbd38d23559f54dca7205aea1bf1e35"
+SRC_URI[sha256sum] = "e1cee4a79e4af22881164f218d4311f60074197fb707e082e803b61f6d137018"
 
 RDEPENDS:${PN} += " \
     python3-asyncio \
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-jsonrpcserver_5.0.8.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-jsonrpcserver_5.0.9.bb
similarity index 85%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-jsonrpcserver_5.0.8.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-jsonrpcserver_5.0.9.bb
index 88b1d68..7e058a7 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-jsonrpcserver_5.0.8.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-jsonrpcserver_5.0.9.bb
@@ -3,7 +3,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=61b63ea9d36f6fb63ddaaaac8265304f"
 
-SRC_URI[sha256sum] = "5150071e4abc9a93f086aa0fd0004dfe0410de66adfaaf513613baa2c2fc00d7"
+SRC_URI[sha256sum] = "a71fb2cfa18541c80935f60987f92755d94d74141248c7438847b96eee5c4482"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-langtable_0.0.58.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-langtable_0.0.60.bb
similarity index 86%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-langtable_0.0.58.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-langtable_0.0.60.bb
index 025ceb6..eac069f 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-langtable_0.0.58.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-langtable_0.0.60.bb
@@ -6,7 +6,7 @@
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=d32239bcb673463ab874e80d47fae504"
 
-SRC_URI[sha256sum] = "47128c06008acedee1745f2d56151461d94526b2be95a4124692013af35496b6"
+SRC_URI[sha256sum] = "ae77d62fe6002308ce6197310c4a933c4e13632bbaf7219a3533dc45d36223f8"
 
 inherit pypi setuptools3 python3native
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-license-expression_21.6.14.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-license-expression_30.0.0.bb
similarity index 69%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-license-expression_21.6.14.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-license-expression_30.0.0.bb
index 2ee4909..eeb9172 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-license-expression_21.6.14.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-license-expression_30.0.0.bb
@@ -2,9 +2,9 @@
 HOMEPAGE = "https://github.com/nexB/license-expression"
 
 LICENSE = "Apache-2.0"
-LIC_FILES_CHKSUM = "file://apache-2.0.LICENSE;md5=9429839cdc4b292ff46e88b524c6e0c9"
+LIC_FILES_CHKSUM = "file://apache-2.0.LICENSE;md5=86d3f3a95c324c9479bd8986968f4327"
 
-SRC_URI[sha256sum] = "9de87a427c9a449eee7913472fb9ed03b63036295547369fdbf95f76a8b924b2"
+SRC_URI[sha256sum] = "ad638292aa8493f84354909b517922cb823582c2ce2c4d880e42544a86bea8dd"
 
 inherit pypi ptest python_setuptools_build_meta
 
@@ -12,6 +12,10 @@
 
 RDEPENDS:${PN} += "\
     ${PYTHON_PN}-booleanpy \
+    ${PYTHON_PN}-core \
+    ${PYTHON_PN}-json \
+    ${PYTHON_PN}-stringold \
+    ${PYTHON_PN}-logging \
 "
 
 BBCLASSEXTEND = "native nativesdk"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-marshmallow_3.17.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-marshmallow_3.18.0.bb
similarity index 93%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-marshmallow_3.17.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-marshmallow_3.18.0.bb
index f2f5281..c52ff30 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-marshmallow_3.17.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-marshmallow_3.18.0.bb
@@ -9,7 +9,7 @@
 
 SRC_URI = "git://github.com/marshmallow-code/marshmallow.git;protocol=https;branch=dev"
 
-SRCREV = "4a6c08d53f181195c78b505abe155b2f35cbc0c1"
+SRCREV = "2805f32461fc7801a5f7b6d83facf7cbb8bca278"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib/matplotlib-disable-download.patch b/meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib/matplotlib-disable-download.patch
index 42ffeb8..25c49af 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib/matplotlib-disable-download.patch
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib/matplotlib-disable-download.patch
@@ -6,10 +6,11 @@
 
 --- a/setup.py
 +++ b/setup.py
-@@ -317,7 +317,6 @@ setup(  # Finally, pass this all along t
+@@ -316,8 +316,6 @@ setup(  # Finally, pass this all along t
+     setup_requires=[
          "certifi>=2020.06.20",
          "numpy>=1.17",
-         "setuptools_scm>=4",
+-        "setuptools_scm>=4,<7",
 -        "setuptools_scm_git_archive",
      ],
      install_requires=[
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib_3.5.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib_3.5.3.bb
similarity index 94%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib_3.5.2.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib_3.5.3.bb
index eaa1447..1a0b93b 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib_3.5.2.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-matplotlib_3.5.3.bb
@@ -19,9 +19,9 @@
     python3-dateutil-native \
     python3-pytz-native \
     python3-certifi-native \
+    python3-setuptools-scm-native \
 "
-
-SRC_URI[sha256sum] = "48cf850ce14fa18067f2d9e0d646763681948487a8080ec0af2686468b4607a2"
+SRC_URI[sha256sum] = "339cac48b80ddbc8bfd05daae0a3a73414651a8596904c2a881cfd1edb65f26c"
 
 inherit pypi setuptools3 pkgconfig
 
@@ -40,7 +40,6 @@
 # LTO with clang needs lld
 LDFLAGS:append:toolchain-clang = " -fuse-ld=lld"
 LDFLAGS:remove:toolchain-clang:mips = "-fuse-ld=lld"
-
 RDEPENDS:${PN} = "\
     freetype \
     libpng \
@@ -57,6 +56,7 @@
 ENABLELTO:toolchain-clang:riscv64 = "echo enable_lto = False >> ${S}/mplsetup.cfg"
 ENABLELTO:toolchain-clang:riscv32 = "echo enable_lto = False >> ${S}/mplsetup.cfg"
 ENABLELTO:toolchain-clang:mips = "echo enable_lto = False >> ${S}/mplsetup.cfg"
+
 do_compile:prepend() {
     echo [libs] > ${S}/mplsetup.cfg
     echo system_freetype = True >> ${S}/mplsetup.cfg
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-natsort_8.1.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-natsort_8.2.0.bb
similarity index 75%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-natsort_8.1.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-natsort_8.2.0.bb
index 1ba40d3..bc14219 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-natsort_8.1.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-natsort_8.2.0.bb
@@ -4,7 +4,7 @@
 LIC_FILES_CHKSUM = "file://LICENSE;md5=58db8ac9e152dd9b700f4d39ff40a31a"
 
 PYPI_PACKAGE = "natsort"
-SRC_URI[sha256sum] = "c7c1f3f27c375719a4dfcab353909fe39f26c2032a062a8c80cc844eaaca0445"
+SRC_URI[sha256sum] = "57f85b72c688b09e053cdac302dd5b5b53df5f73ae20b4874fcbffd8bf783d11"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-networkx_2.8.5.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-networkx_2.8.6.bb
similarity index 88%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-networkx_2.8.5.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-networkx_2.8.6.bb
index 8d0b8db..3eebfcf 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-networkx_2.8.5.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-networkx_2.8.6.bb
@@ -3,7 +3,7 @@
 LICENSE = "BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=44614b6df7cf3c19be69d0a945e29904"
 
-SRC_URI[sha256sum] = "15a7b81a360791c458c55a417418ea136c13378cfdc06a2dcdc12bd2f9cf09c1"
+SRC_URI[sha256sum] = "bd2b7730300860cbd2dafe8e5af89ff5c9a65c3975b352799d87a6238b4301a6"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-oauthlib_3.2.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-oauthlib_3.2.1.bb
similarity index 92%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-oauthlib_3.2.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-oauthlib_3.2.1.bb
index e7f7f0b..8cc3048 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-oauthlib_3.2.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-oauthlib_3.2.1.bb
@@ -4,7 +4,7 @@
 LICENSE = "BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=abd2675e944a2011aed7e505290ba482"
 
-SRC_URI[sha256sum] = "23a8208d75b902797ea29fd31fa80a15ed9dc2c6c16fe73f5d346f83f6fa27a2"
+SRC_URI[sha256sum] = "1565237372795bf6ee3e5aba5e2a85bd5a65d0e2aa5c628b9a97b7d7a0da3721"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pandas_1.4.3.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pandas_1.5.0.bb
similarity index 80%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-pandas_1.4.3.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-pandas_1.5.0.bb
index 2d3333c..998aa12 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pandas_1.4.3.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pandas_1.5.0.bb
@@ -4,9 +4,9 @@
 the Python programming language."
 HOMEPAGE = "http://pandas.pydata.org/"
 LICENSE = "BSD-3-Clause"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=3f23c5c092b74d245d48eeef72bc3fd2"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=c1cc9ab35a8b2aabf933cd6d245b5db3"
 
-SRC_URI[sha256sum] = "2ff7788468e75917574f080cd4681b27e1a7bf36461fe968b49a87b5a54d007c"
+SRC_URI[sha256sum] = "3ee61b881d2f64dd90c356eb4a4a4de75376586cd3c9341c6c0fcaae18d52977"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-parsimonious_0.9.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-parsimonious_0.10.0.bb
similarity index 78%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-parsimonious_0.9.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-parsimonious_0.10.0.bb
index 5a604fa..c235222 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-parsimonious_0.9.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-parsimonious_0.10.0.bb
@@ -4,7 +4,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=3396ea30f9d21389d7857719816f83b5"
 
-SRC_URI[sha256sum] = "b2ad1ae63a2f65bd78f5e0a8ac510a98f3607a43f1db2a8d46636a5d9e4a30c1"
+SRC_URI[sha256sum] = "8281600da180ec8ae35427a4ab4f7b82bfec1e3d1e52f80cb60ea82b9512501c"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-path_16.4.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-path_16.5.0.bb
similarity index 74%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-path_16.4.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-path_16.5.0.bb
index 10b1128..3dc5d7c 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-path_16.4.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-path_16.5.0.bb
@@ -2,7 +2,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=7a7126e068206290f3fe9f8d6c713ea6"
 
-SRC_URI[sha256sum] = "baf2e757c4b19be8208f9e67e48fb475b4a577d5613590ce46693bdbdf082f52"
+SRC_URI[sha256sum] = "2722e500b370bc00d5934d2207e26b17a09ee73eb0150f651d5a255d8be935a2"
 
 inherit pypi python_setuptools_build_meta
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-portion_2.2.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-portion_2.3.0.bb
similarity index 67%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-portion_2.2.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-portion_2.3.0.bb
index 0dad8b0..1638d64 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-portion_2.2.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-portion_2.3.0.bb
@@ -7,6 +7,10 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "b6bfb08a7834787aca076da1200b735d97beef61b60a462b05213e7354a099cf"
+SRC_URI[sha256sum] = "0e9d42838099263201b25517e4c1bd57042b5fe44432d6df38cef72d84d1eb1f"
+
+RDEPENDS:${PN} = "\
+    ${PYTHON_PN}-sortedcontainers \
+"
 
 BBCLASSEXTEND = "native"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-prettytable_3.3.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-prettytable_3.4.1.bb
similarity index 92%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-prettytable_3.3.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-prettytable_3.4.1.bb
index b98ee49..c8b50f0 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-prettytable_3.3.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-prettytable_3.4.1.bb
@@ -3,7 +3,7 @@
 LICENSE = "BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://COPYING;md5=c9a6829fcd174d9535b46211917c7671"
 
-SRC_URI[sha256sum] = "118eb54fd2794049b810893653b20952349df6d3bc1764e7facd8a18064fa9b0"
+SRC_URI[sha256sum] = "7d7dd84d0b206f2daac4471a72f299d6907f34516064feb2838e333a4e2567bd"
 
 do_install:append() {
     perm_files=`find "${D}${PYTHON_SITEPACKAGES_DIR}/" -name "*.txt" -o -name "PKG-INFO"`
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-prompt-toolkit_3.0.30.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-prompt-toolkit_3.0.31.bb
similarity index 87%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-prompt-toolkit_3.0.30.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-prompt-toolkit_3.0.31.bb
index 15627b1..0b8962d 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-prompt-toolkit_3.0.30.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-prompt-toolkit_3.0.31.bb
@@ -3,7 +3,7 @@
 LICENSE = "BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=b2cde7da89f0c1f3e49bf968d00d554f"
 
-SRC_URI[sha256sum] = "859b283c50bde45f5f97829f77a4674d1c1fcd88539364f1b28a37805cfd89c0"
+SRC_URI[sha256sum] = "9ada952c9d1787f52ff6d5f3484d0b4df8952787c087edf6a1f7c2cb1ea88148"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-protobuf_4.21.5.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-protobuf_4.21.6.bb
similarity index 91%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-protobuf_4.21.5.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-protobuf_4.21.6.bb
index 5376830..96b3d02 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-protobuf_4.21.5.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-protobuf_4.21.6.bb
@@ -7,7 +7,7 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "eb1106e87e095628e96884a877a51cdb90087106ee693925ec0a300468a9be3a"
+SRC_URI[sha256sum] = "6b1040a5661cd5f6e610cbca9cfaa2a17d60e2bb545309bc1b278bb05be44bdd"
 
 # http://errors.yoctoproject.org/Errors/Details/184715/
 # Can't find required file: ../src/google/protobuf/descriptor.proto
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pycocotools_2.0.4.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pycocotools_2.0.5.bb
similarity index 83%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-pycocotools_2.0.4.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-pycocotools_2.0.5.bb
index ec8585e..234942b 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pycocotools_2.0.4.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pycocotools_2.0.5.bb
@@ -5,7 +5,7 @@
 
 inherit setuptools3 pypi
 
-SRC_URI[sha256sum] = "2ab586aa389b9657b6d73c2b9a827a3681f8d00f36490c2e8ab05902e3fd9e93"
+SRC_URI[sha256sum] = "41d1fb062df5bab5ebc3e92971455aa089479e7cd10553278ca54628b9dc9bf5"
 
 DEPENDS = "python3-cython-native python3-numpy-native virtual/crypt"
 RDEPENDS:${PN} = "python3-matplotlib python3-pillow python3-profile"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pydantic_1.10.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pydantic_1.10.2.bb
new file mode 100644
index 0000000..16bb1f6
--- /dev/null
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pydantic_1.10.2.bb
@@ -0,0 +1,20 @@
+SUMMARY = "Data validation and settings management using Python type hinting"
+HOMEPAGE = "https://github.com/samuelcolvin/pydantic"
+LICENSE = "MIT"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=2c02ea30650b91528657db64baea1757"
+
+inherit pypi setuptools3
+
+SRC_URI[sha256sum] = "91b8e218852ef6007c2b98cd861601c6a09f1aa32bbbb74fab5b1c33d4a1e410"
+
+RDEPENDS:${PN} += "\
+    ${PYTHON_PN}-typing-extensions \
+    ${PYTHON_PN}-core \
+    ${PYTHON_PN}-numbers \
+    ${PYTHON_PN}-json \
+    ${PYTHON_PN}-datetime \
+    ${PYTHON_PN}-io \
+    ${PYTHON_PN}-netclient \
+    ${PYTHON_PN}-image \
+    ${PYTHON_PN}-logging \
+"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pydantic_1.9.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pydantic_1.9.1.bb
deleted file mode 100644
index 1d113c9..0000000
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pydantic_1.9.1.bb
+++ /dev/null
@@ -1,11 +0,0 @@
-SUMMARY = "Data validation and settings management using Python type hinting"
-HOMEPAGE = "https://github.com/samuelcolvin/pydantic"
-LICENSE = "MIT"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=2c02ea30650b91528657db64baea1757"
-RDEPENDS:${PN} += "\
-    python3-typing-extensions \
-"
-
-inherit pypi setuptools3
-
-SRC_URI[sha256sum] = "1ed987c3ff29fff7fd8c3ea3a3ea877ad310aae2ef9889a119e22d3f2db0691a"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyjwt_2.4.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyjwt_2.5.0.bb
similarity index 83%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-pyjwt_2.4.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-pyjwt_2.5.0.bb
index a540e36..f587e5c 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyjwt_2.4.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyjwt_2.5.0.bb
@@ -5,7 +5,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=e4b56d2c9973d8cf54655555be06e551"
 
-SRC_URI[sha256sum] = "d42908208c699b3b973cbeb01a969ba6a96c821eefb1c5bfe4c390c01d67abba"
+SRC_URI[sha256sum] = "e77ab89480905d86998442ac5788f35333fa85f65047a534adc38edf3c88fc3b"
 
 PYPI_PACKAGE = "PyJWT"
 inherit pypi setuptools3
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pymetno_0.9.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pymetno_0.10.0.bb
similarity index 90%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-pymetno_0.9.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-pymetno_0.10.0.bb
index 50b202b..c5ab635 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pymetno_0.9.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pymetno_0.10.0.bb
@@ -4,7 +4,7 @@
 LIC_FILES_CHKSUM = "file://LICENSE;md5=5d503272f52c35147ec960cb56a03bf4"
 
 SRC_URI = "git://github.com/Danielhiversen/pyMetno.git;protocol=https;branch=master"
-SRCREV = "3b18971fb882deaaebb9aa511627c5fc6fb97526"
+SRCREV = "5fc18d7c5d627a9d9b2529db98d4e7e01a37dbf9"
 S = "${WORKDIR}/git"
 
 inherit setuptools3
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyproj_3.3.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyproj_3.4.0.bb
similarity index 83%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-pyproj_3.3.1.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-pyproj_3.4.0.bb
index 6085e48..62eceb2 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyproj_3.3.1.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyproj_3.4.0.bb
@@ -8,7 +8,7 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "b3d8e14d91cc95fb3dbc03a9d0588ac58326803eefa5bbb0978d109de3304fbe"
+SRC_URI[sha256sum] = "a708445927ace9857f52c3ba67d2915da7b41a8fdcd9b8f99a4c9ed60a75eb33"
 
 RDEPENDS:${PN} = "${PYTHON_PN}-certifi proj"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyscaffold_4.3.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyscaffold_4.3.1.bb
similarity index 87%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-pyscaffold_4.3.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-pyscaffold_4.3.1.bb
index 027d7b9..a19e387 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyscaffold_4.3.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyscaffold_4.3.1.bb
@@ -9,7 +9,7 @@
 LIC_FILES_CHKSUM = "file://LICENSE.txt;md5=14a49c74a1d91829908ac756c07e6b91"
 DEPENDS += "python3-setuptools-scm-native"
 
-SRC_URI[sha256sum] = "1a8c39bbad9abc121d6e126035740ba5f043434abb432f368a3a76596184b3ed"
+SRC_URI[sha256sum] = "50cb1f910163204caec30c7c6bbe70f1a81c377538b8c8340d23abe31f5ca5b4"
 
 inherit pypi python_setuptools_build_meta
 PYPI_PACKAGE = "PyScaffold"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pytz-deprecation-shim_0.1.0.post0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pytz-deprecation-shim_0.1.0.post0.bb
index d116c3b..022cf39 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pytz-deprecation-shim_0.1.0.post0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pytz-deprecation-shim_0.1.0.post0.bb
@@ -9,3 +9,7 @@
 PYPI_SRC_URI = "https://files.pythonhosted.org/packages/94/f0/909f94fea74759654390a3e1a9e4e185b6cd9aa810e533e3586f39da3097/${PYPI_PACKAGE}-${PV}.tar.gz"
 
 inherit pypi python_setuptools_build_meta
+
+RDEPENDS:${PN} += "\
+        ${PYTHON_PN}-zoneinfo \
+"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyudev_0.23.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyudev_0.24.0.bb
similarity index 84%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-pyudev_0.23.2.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-pyudev_0.24.0.bb
index 4c4c959..8415464 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyudev_0.23.2.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyudev_0.24.0.bb
@@ -3,7 +3,7 @@
 LICENSE = "LGPL-2.1-or-later"
 LIC_FILES_CHKSUM = "file://COPYING;md5=a6f89e2100d9b6cdffcea4f398e37343"
 
-SRC_URI[sha256sum] = "32ae3585b320a51bc283e0a04000fd8a25599edb44541e2f5034f6afee5d15cc"
+SRC_URI[sha256sum] = "b2a3afe1c99ea751f8296652557eac559874da2a1b1ec0625178706ec5a345f3"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyzmq_23.2.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyzmq_24.0.1.bb
similarity index 79%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-pyzmq_23.2.1.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-pyzmq_24.0.1.bb
index 40cb22b..b3b0aef 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-pyzmq_23.2.1.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-pyzmq_24.0.1.bb
@@ -13,7 +13,7 @@
     file://club-rpath-out.patch \
     file://run-ptest \
 "
-SRC_URI[sha256sum] = "2b381aa867ece7d0a82f30a0c7f3d4387b7cf2e0697e33efaa5bed6c5784abcd"
+SRC_URI[sha256sum] = "216f5d7dbb67166759e59b0479bca82b8acf9bed6015b526b8eb10143fb08e77"
 
 inherit pypi pkgconfig python_setuptools_build_meta ptest
 
@@ -32,6 +32,7 @@
 
 RDEPENDS:${PN}-ptest += "\
         ${PN}-test \
+        ${PYTHON_PN}-pytest \
 "
 
 do_compile:prepend() {
@@ -43,6 +44,11 @@
     echo no_libzmq_extension = True >> ${S}/setup.cfg
 }
 
+do_install:append() {
+        sed -i -e 's#${RECIPE_SYSROOT}##g' ${D}${PYTHON_SITEPACKAGES_DIR}/zmq/utils/config.json
+        sed -i -e 's#${RECIPE_SYSROOT}##g' ${D}${PYTHON_SITEPACKAGES_DIR}/zmq/utils/compiler.json
+}
+
 do_install_ptest() {
         install -d ${D}${PTEST_PATH}/tests
         cp -rf ${S}/zmq/tests/* ${D}${PTEST_PATH}/tests/
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-regex_2022.8.17.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-regex_2022.9.13.bb
similarity index 78%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-regex_2022.8.17.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-regex_2022.9.13.bb
index 4999fc6..886acd7 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-regex_2022.8.17.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-regex_2022.9.13.bb
@@ -5,7 +5,7 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "5c77eab46f3a2b2cd8bbe06467df783543bf7396df431eb4a144cc4b89e9fb3c"
+SRC_URI[sha256sum] = "f07373b6e56a6f3a0df3d75b651a278ca7bd357a796078a26a958ea1ce0588fd"
 
 RDEPENDS:${PN} += " \
 	python3-stringold \
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-requests-unixsocket_0.3.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-requests-unixsocket_0.3.0.bb
index 330ab7c..94497b6 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-requests-unixsocket_0.3.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-requests-unixsocket_0.3.0.bb
@@ -11,4 +11,4 @@
 inherit setuptools3
 
 DEPENDS += "python3-pbr-native"
-RDEPENDS_${PN} = "python3-requests python3-urllib3"
+RDEPENDS:${PN} = "python3-requests python3-urllib3"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-soupsieve_2.3.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-soupsieve_2.3.2.post1.bb
similarity index 68%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-soupsieve_2.3.1.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-soupsieve_2.3.2.post1.bb
index 7cb76b4..99b6f7c 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-soupsieve_2.3.1.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-soupsieve_2.3.2.post1.bb
@@ -2,11 +2,11 @@
 HOMEPAGE = "https://github.com/facelessuser/soupsieve"
 
 LICENSE = "MIT"
-LIC_FILES_CHKSUM = "file://LICENSE.md;md5=33c3a77def9b3ad83e01c65bdcc1af67"
+LIC_FILES_CHKSUM = "file://LICENSE.md;md5=c7a2acf04248c0d02dac4c82ee8a7f56"
 
-SRC_URI[sha256sum] = "b8d49b1cd4f037c7082a9683dfa1801aa2597fb11c3a1155b7a5b94829b4f1f9"
+SRC_URI[sha256sum] = "fc53893b3da2c33de295667a0e19f078c14bf86544af307354de5fcf12a3f30d"
 
-inherit pypi python_setuptools_build_meta ptest
+inherit pypi python_hatchling python_setuptools_build_meta ptest
 
 SRC_URI += " \
         file://run-ptest \
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-sqlalchemy_1.4.40.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-sqlalchemy_1.4.41.bb
similarity index 86%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-sqlalchemy_1.4.40.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-sqlalchemy_1.4.41.bb
index f61030f..5b93458 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-sqlalchemy_1.4.40.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-sqlalchemy_1.4.41.bb
@@ -4,7 +4,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=f4001d1ca15b69d096fa1b4fd1bdce79"
 
-SRC_URI[sha256sum] = "44a660506080cc975e1dfa5776fe5f6315ddc626a77b50bf0eee18b0389ea265"
+SRC_URI[sha256sum] = "0292f70d1797e3c54e862e6f30ae474014648bc9c723e14a2fda730adb0a9791"
 
 PYPI_PACKAGE = "SQLAlchemy"
 inherit pypi setuptools3
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-sqlparse_0.4.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-sqlparse_0.4.3.bb
similarity index 87%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-sqlparse_0.4.2.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-sqlparse_0.4.3.bb
index 0980ff9..c952c71 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-sqlparse_0.4.2.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-sqlparse_0.4.3.bb
@@ -8,7 +8,7 @@
             file://run-ptest \
 	    "
 
-SRC_URI[sha256sum] = "0c00730c74263a94e5a9919ade150dfc3b19c574389985446148402998287dae"
+SRC_URI[sha256sum] = "69ca804846bb114d2ec380e4360a8a340db83f0ccf3afceeb1404df028f57268"
 
 export BUILD_SYS
 export HOST_SYS
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-sympy_1.10.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-sympy_1.10.1.bb
deleted file mode 100644
index d8ac23d..0000000
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-sympy_1.10.1.bb
+++ /dev/null
@@ -1,12 +0,0 @@
-SUMMARY = "Computer algebra system (CAS) in Python"
-HOMEPAGE = "https://pypi.org/project/sympy/"
-LICENSE = "BSD-3-Clause"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=42976c55ba05d15b32a7b4757dee5e64"
-
-SRC_URI[sha256sum] = "5939eeffdf9e152172601463626c022a2c27e75cf6278de8d401d50c9d58787b"
-
-inherit pypi setuptools3
-
-RDEPENDS:${PN} += "python3-mpmath"
-
-BBCLASSEXTEND = "native nativesdk"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-sympy_1.11.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-sympy_1.11.1.bb
new file mode 100644
index 0000000..4e32936
--- /dev/null
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-sympy_1.11.1.bb
@@ -0,0 +1,12 @@
+SUMMARY = "Computer algebra system (CAS) in Python"
+HOMEPAGE = "https://pypi.org/project/sympy/"
+LICENSE = "BSD-3-Clause"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=9f6c7fdc2d69e31ad7abaea029c8ac16"
+
+SRC_URI[sha256sum] = "e32380dce63cb7c0108ed525570092fd45168bdae2faa17e528221ef72e88658"
+
+inherit pypi setuptools3
+
+RDEPENDS:${PN} += "python3-mpmath"
+
+BBCLASSEXTEND = "native nativesdk"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor/0001-setup.py-Use-setuptools-instead-of-distutils.patch b/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor/0001-setup.py-Use-setuptools-instead-of-distutils.patch
deleted file mode 100644
index a55a6a9..0000000
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor/0001-setup.py-Use-setuptools-instead-of-distutils.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From e30308284c721210e2ba50d8b3d159cedf5eada8 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Mon, 28 Feb 2022 14:44:42 -0800
-Subject: [PATCH] setup.py: Use setuptools instead of distutils
-
-Upstream-Status: Pending
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- setup.py | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/setup.py b/setup.py
-index c4fe4ed..dad1d29 100755
---- a/setup.py
-+++ b/setup.py
-@@ -23,7 +23,7 @@
- # Author: Konstantin Lepa <konstantin.lepa@gmail.com>
- 
- import os
--from distutils.core import setup
-+from setuptools import setup
- 
- prjdir = os.path.dirname(__file__)
- 
--- 
-2.35.1
-
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor_1.1.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor_1.1.0.bb
deleted file mode 100644
index 15bab62..0000000
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor_1.1.0.bb
+++ /dev/null
@@ -1,13 +0,0 @@
-SUMMARY = "ANSII Color formatting for output in terminal"
-HOMEPAGE = "https://pypi.python.org/pypi/termcolor"
-SECTION = "devel/python"
-LICENSE = "MIT"
-LIC_FILES_CHKSUM = "file://COPYING.txt;md5=809e8749b63567978acfbd81d9f6a27d"
-
-inherit pypi setuptools3
-
-SRC_URI += "file://0001-setup.py-Use-setuptools-instead-of-distutils.patch"
-
-SRC_URI[sha256sum] = "1d6d69ce66211143803fbc56652b41d73b4a400a2891d7bf7a1cdf4c02de613b"
-
-BBCLASSEXTEND = "native"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor_2.0.1.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor_2.0.1.bb
new file mode 100644
index 0000000..47f7938
--- /dev/null
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-termcolor_2.0.1.bb
@@ -0,0 +1,16 @@
+SUMMARY = "ANSII Color formatting for output in terminal"
+HOMEPAGE = "https://pypi.python.org/pypi/termcolor"
+SECTION = "devel/python"
+LICENSE = "MIT"
+LIC_FILES_CHKSUM = "file://COPYING.txt;md5=e5f5f7c9b280511f124dba5dda3d180e"
+
+inherit pypi python_setuptools_build_meta
+
+SRC_URI[sha256sum] = "6b2cf769e93364a2676e1de56a7c0cff2cf5bd07f37e9cc80b0dd6320ebfe388"
+
+DEPENDS += " \
+	${PYTHON_PN}-toml-native \
+	${PYTHON_PN}-hatch-vcs-native \
+"
+
+BBCLASSEXTEND = "native"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-traitlets_5.3.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-traitlets_5.4.0.bb
similarity index 79%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-traitlets_5.3.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-traitlets_5.4.0.bb
index 7c81867..84f9727 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-traitlets_5.3.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-traitlets_5.4.0.bb
@@ -4,7 +4,7 @@
 LICENSE = "BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://COPYING.md;md5=9c125dfc5ff5364d40b5f56f02cd9de3"
 
-SRC_URI[sha256sum] = "0bb9f1f9f017aa8ec187d8b1b2a7a6626a2a1d877116baba52a129bfa124f8e2"
+SRC_URI[sha256sum] = "3f2c4e435e271592fe4390f1746ea56836e3a080f84e7833f0f801d9613fec39"
 
 inherit pypi python_hatchling
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-transitions_0.8.11.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-transitions_0.8.11.bb
deleted file mode 100644
index 1418942..0000000
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-transitions_0.8.11.bb
+++ /dev/null
@@ -1,9 +0,0 @@
-SUMMARY = "A lightweight, object-oriented Python state machine implementation with many extensions."
-LICENSE = "MIT"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=baac7be1f4c17620df74188e23da6d47"
-
-inherit pypi setuptools3
-
-SRC_URI[sha256sum] = "7b20d32906ea4d60ee6f6c1f5dc9c9f178802425c5b155213eb0f25c277f04e4"
-
-RDEPENDS:${PN} += "python3-six python3-logging"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-transitions_0.9.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-transitions_0.9.0.bb
new file mode 100644
index 0000000..980351b
--- /dev/null
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-transitions_0.9.0.bb
@@ -0,0 +1,9 @@
+SUMMARY = "A lightweight, object-oriented Python state machine implementation with many extensions."
+LICENSE = "MIT"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=de0a0876a688a4483bfafa764773ab39"
+
+inherit pypi setuptools3
+
+SRC_URI[sha256sum] = "2f54d11bdb225779d7e729011e93a9fb717668ce3dc65f8d4f5a5d7ba2f48e10"
+
+RDEPENDS:${PN} += "python3-six python3-logging"
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-twisted_22.4.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-twisted_22.8.0.bb
similarity index 98%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-twisted_22.4.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-twisted_22.8.0.bb
index 9429f8f..f40c5f0 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-twisted_22.4.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-twisted_22.8.0.bb
@@ -7,7 +7,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=0f8d67f84b6e178c92d471011b2245fc"
 
-SRC_URI[sha256sum] = "a047990f57dfae1e0bd2b7df2526d4f16dcdc843774dc108b78c52f2a5f13680"
+SRC_URI[sha256sum] = "e5b60de39f2d1da153fbe1874d885fe3fcbdb21fcc446fa759a53e8fc3513bed"
 
 PYPI_PACKAGE = "Twisted"
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-ujson_5.4.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-ujson_5.5.0.bb
similarity index 90%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-ujson_5.4.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-ujson_5.5.0.bb
index 15cf80e..9f080eb 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-ujson_5.4.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-ujson_5.5.0.bb
@@ -4,7 +4,7 @@
 LICENSE = "BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://PKG-INFO;beginline=8;endline=8;md5=e0039a83d8a99726b5418f0b03302d0a"
 
-SRC_URI[sha256sum] = "6b953e09441e307504130755e5bd6b15850178d591f66292bba4608c4f7f9b00"
+SRC_URI[sha256sum] = "b25077a971c7da47bd6846a912a747f6963776d90720c88603b1b55d81790780"
 
 inherit pypi ptest setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-web3_5.30.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-web3_5.31.0.bb
similarity index 83%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-web3_5.30.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-web3_5.31.0.bb
index a35c05e..435eb1d 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-web3_5.30.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-web3_5.31.0.bb
@@ -4,7 +4,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=1dc2732bdc5e50382737979791cbb3b7"
 
-SRC_URI[sha256sum] = "e141d90408fd9fe5156e2ef22884a160bef8bfd55e6cecd51181af3162ea84dd"
+SRC_URI[sha256sum] = "ef0ad5c62958fe18202bacfa1f216a57d97a8abdecc68f87946c02b38aaab34e"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-websocket-client_1.3.3.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-websocket-client_1.4.1.bb
similarity index 83%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-websocket-client_1.3.3.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-websocket-client_1.4.1.bb
index 9c3ff02..39c5921 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-websocket-client_1.3.3.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-websocket-client_1.4.1.bb
@@ -7,7 +7,7 @@
 LICENSE = "Apache-2.0"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=b969e9612325987c823fc0737063ebc8"
 
-SRC_URI[sha256sum] = "d58c5f284d6a9bf8379dab423259fe8f85b70d5fa5d2916d5791a84594b122b1"
+SRC_URI[sha256sum] = "f9611eb65c8241a67fb373bef040b3cf8ad377a9f6546a12b620b6511e8ea9ef"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-xmlschema_2.0.2.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-xmlschema_2.1.0.bb
similarity index 71%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-xmlschema_2.0.2.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-xmlschema_2.1.0.bb
index eb9d4f5..d7988b8 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-xmlschema_2.0.2.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-xmlschema_2.1.0.bb
@@ -1,9 +1,9 @@
 SUMMARY = "The xmlschema library is an implementation of XML Schema for Python (supports Python 3.6+)."
 HOMEPAGE = "https://github.com/sissaschool/xmlschema"
 LICENSE = "MIT"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=47489cb18c469474afeb259ed1d4832f"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=0ab20f8e337bea2e6874f372edfd12c0"
 
-SRC_URI[sha256sum] = "ce915696b3a819fe0f986824517b9281dbd5626fd2d213363fab40f34edf05bd"
+SRC_URI[sha256sum] = "e7f70c305e0def2a2293ab7110827086c477ab3c6f8757d2acb3ccbfa6b425a4"
 
 PYPI_PACKAGE = "xmlschema"
 inherit pypi setuptools3
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-zeroconf_0.39.0.bb b/meta-openembedded/meta-python/recipes-devtools/python/python3-zeroconf_0.39.1.bb
similarity index 81%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-zeroconf_0.39.0.bb
rename to meta-openembedded/meta-python/recipes-devtools/python/python3-zeroconf_0.39.1.bb
index 9b15763..6f44248 100644
--- a/meta-openembedded/meta-python/recipes-devtools/python/python3-zeroconf_0.39.0.bb
+++ b/meta-openembedded/meta-python/recipes-devtools/python/python3-zeroconf_0.39.1.bb
@@ -3,7 +3,7 @@
 LICENSE = "LGPL-2.1-only"
 LIC_FILES_CHKSUM = "file://COPYING;md5=3bb705b228ea4a14ea2728215b780d80"
 
-SRC_URI[sha256sum] = "7c0d8257b940ee43e637fb560c2f9bd79da0638f37af162eb4f506f7274ef8e4"
+SRC_URI[sha256sum] = "b83cff68a0c8dcd2705b5e792796239accba2bfddb09bc8d05badc642f64e7f6"
 
 inherit pypi setuptools3
 
diff --git a/meta-openembedded/meta-python/recipes-networking/python/python3-ldap_3.4.2.bb b/meta-openembedded/meta-python/recipes-networking/python/python3-ldap_3.4.3.bb
similarity index 90%
rename from meta-openembedded/meta-python/recipes-networking/python/python3-ldap_3.4.2.bb
rename to meta-openembedded/meta-python/recipes-networking/python/python3-ldap_3.4.3.bb
index 44a3d00..f2c0d2e 100644
--- a/meta-openembedded/meta-python/recipes-networking/python/python3-ldap_3.4.2.bb
+++ b/meta-openembedded/meta-python/recipes-networking/python/python3-ldap_3.4.3.bb
@@ -13,7 +13,7 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "b16470a0983aaf09a00ffb8f40b69a2446f3d0be639a229256bce381fcb268f7"
+SRC_URI[sha256sum] = "ab26c519a0ef2a443a2a10391fa3c5cb52d7871323399db949ebfaa9f25ee2a0"
 
 do_configure:prepend() {
     sed -i -e 's:^library_dirs =.*::' \
diff --git a/meta-openembedded/meta-webserver/recipes-httpd/sthttpd/sthttpd/0001-Define-_GNU_SOURCE-if-HAVE_SIGSET-is-set.patch b/meta-openembedded/meta-webserver/recipes-httpd/sthttpd/sthttpd/0001-Define-_GNU_SOURCE-if-HAVE_SIGSET-is-set.patch
new file mode 100644
index 0000000..a1783a7
--- /dev/null
+++ b/meta-openembedded/meta-webserver/recipes-httpd/sthttpd/sthttpd/0001-Define-_GNU_SOURCE-if-HAVE_SIGSET-is-set.patch
@@ -0,0 +1,51 @@
+From f3889e5870e9761ee6113fac7f38aa44cc43e46c Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 7 Sep 2022 00:30:52 -0700
+Subject: [PATCH] Define _GNU_SOURCE if HAVE_SIGSET is set
+
+This enforces using sigset() API which needs _GNU_SOURCE macro to be
+defined
+
+Upstream-Status: Submitted [https://github.com/blueness/sthttpd/pull/16]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ src/libhttpd.c | 5 ++++-
+ src/thttpd.c   | 4 ++++
+ 2 files changed, 8 insertions(+), 1 deletion(-)
+
+diff --git a/src/libhttpd.c b/src/libhttpd.c
+index fa42c10..669be11 100644
+--- a/src/libhttpd.c
++++ b/src/libhttpd.c
+@@ -25,9 +25,12 @@
+ ** SUCH DAMAGE.
+ */
+ 
+-
+ #include <config.h>
+ 
++#ifdef HAVE_SIGSET
++#define _GNU_SOURCE
++#endif
++
+ //system headers
+ #include <sys/types.h>
+ #include <sys/param.h>
+diff --git a/src/thttpd.c b/src/thttpd.c
+index ad97188..3c7a449 100644
+--- a/src/thttpd.c
++++ b/src/thttpd.c
+@@ -28,6 +28,10 @@
+ 
+ #include <config.h>
+ 
++#ifdef HAVE_SIGSET
++#define _GNU_SOURCE
++#endif
++
+ //system headers
+ #include <sys/param.h>
+ #include <sys/types.h>
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-webserver/recipes-httpd/sthttpd/sthttpd_2.27.1.bb b/meta-openembedded/meta-webserver/recipes-httpd/sthttpd/sthttpd_2.27.1.bb
index 4134a0e..b40b148 100644
--- a/meta-openembedded/meta-webserver/recipes-httpd/sthttpd/sthttpd_2.27.1.bb
+++ b/meta-openembedded/meta-webserver/recipes-httpd/sthttpd/sthttpd_2.27.1.bb
@@ -8,6 +8,7 @@
 
 SRCREV = "2845bf5bff2b820d2336c8c8061cbfc5f271e720"
 SRC_URI = "git://github.com/blueness/${BPN};branch=master;protocol=https \
+           file://0001-Define-_GNU_SOURCE-if-HAVE_SIGSET-is-set.patch \
            file://thttpd.service \
            file://thttpd.conf \
            file://init"
diff --git a/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/cockpit_220.bb b/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/cockpit_276.bb
similarity index 95%
rename from meta-openembedded/meta-webserver/recipes-webadmin/cockpit/cockpit_220.bb
rename to meta-openembedded/meta-webserver/recipes-webadmin/cockpit/cockpit_276.bb
index c08de89..93198e3 100644
--- a/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/cockpit_220.bb
+++ b/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/cockpit_276.bb
@@ -8,10 +8,10 @@
     https://github.com/cockpit-project/cockpit/releases/download/${PV}/cockpit-${PV}.tar.xz \
     file://0001-remove-tests-dep-on-gobject-intro.patch \
     file://0002-fix-makefile-use-copy-rule-for-unmodified-files.patch \
+    file://0001-Warn-not-error-if-xsltproc-is-not-found.patch \
     file://cockpit.pam \
     "
-SRC_URI[md5sum] = "beb88d8e70ee1da6ebd917c956217803"
-SRC_URI[sha256sum] = "afc82acc8ef9d51e0f34265a07a2f059f5b71a1df721b299e657a40a098cbb7f"
+SRC_URI[sha256sum] = "dde91a157ee8514112334653fa2775a99d71bd1b604067a48eaf7411114d19de"
 
 inherit gettext pkgconfig autotools systemd features_check
 
@@ -30,6 +30,7 @@
 EXTRA_OECONF = " \
     --with-cockpit-user=${COCKPIT_USER_GROUP} \
     --with-cockpit-group=${COCKPIT_USER_GROUP} \
+    --with-admin-group=${COCKPIT_USER_GROUP} \
     --with-cockpit-ws-instance-user=${COCKPIT_WS_USER_GROUP} \
     --with-cockpit-ws-instance-group=${COCKPIT_WS_USER_GROUP} \
     --disable-doc \
@@ -170,6 +171,8 @@
     ${nonarch_libdir}/firewalld \
 "
 RDEPENDS:${PN} += "${PN}-bridge"
+# Needs bash for /usr/libexec/cockpit-certificate-helper
+RDEPENDS:${PN} += "bash"
 
 do_install:append() {
     pkgdatadir=${datadir}/cockpit
diff --git a/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0001-Warn-not-error-if-xsltproc-is-not-found.patch b/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0001-Warn-not-error-if-xsltproc-is-not-found.patch
new file mode 100644
index 0000000..b3b0988
--- /dev/null
+++ b/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0001-Warn-not-error-if-xsltproc-is-not-found.patch
@@ -0,0 +1,27 @@
+From 588bb5cb248aaeaf0fea33084229c99ad1574291 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Wed, 7 Sep 2022 11:12:28 -0700
+Subject: [PATCH] Warn not error if xsltproc is not found
+
+Upstream-Status: Inappropriate [oe-core specific]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ configure.ac | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index 7659f41..537203e 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -177,7 +177,7 @@ AC_SUBST([systemdunitdir], [$systemdunitdir])
+ # package as xgettext, and we find them by PATH, so just check for the one.
+ AC_PATH_PROG([XGETTEXT], [xsltproc], [no])
+ if test "$XGETTEXT" = "no"; then
+-        AC_MSG_ERROR([Please install gettext tools])
++        AC_MSG_WARN([Please install gettext tools])
+ fi
+ 
+ # ssh-add
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0001-remove-tests-dep-on-gobject-intro.patch b/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0001-remove-tests-dep-on-gobject-intro.patch
index 2242190..8f70fe9 100644
--- a/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0001-remove-tests-dep-on-gobject-intro.patch
+++ b/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0001-remove-tests-dep-on-gobject-intro.patch
@@ -7,43 +7,18 @@
  src/ws/Makefile-ws.am | 54 ---------------------------------------------------
  1 file changed, 54 deletions(-)
 
-diff --git a/src/ws/Makefile-ws.am b/src/ws/Makefile-ws.am
-index 009130941..34e13d7fe 100644
 --- a/src/ws/Makefile-ws.am
 +++ b/src/ws/Makefile-ws.am
-@@ -246,60 +246,6 @@ EXTRA_DIST += \
+@@ -58,46 +58,6 @@ pam_cockpit_cert_so_LDFLAGS = -shared
+ pam_cockpit_cert_so_SOURCES = src/ws/pam_cockpit_cert.c
  
- # ----------------------------------------------------------------------------------------------------
- 
--noinst_PROGRAMS += test-server
+ # -----------------------------------------------------------------------------
+-# test-server: server for running the html/browser unit tests against
+-
 -check_PROGRAMS += test-server
 -
--GDBUS_CODEGEN_XML = $(srcdir)/src/ws/com.redhat.Cockpit.DBusTests.xml
--
--GDBUS_CODEGEN_GENERATED = \
--	src/ws/mock-dbus-tests.h \
--	src/ws/mock-dbus-tests.c \
--	$(NULL)
--
--# FIXME: --header/--body and --output are only available from GLib 2.56.
--# just use --generate-c-code and a bit of dependency ugliness for now
--GDBUS_CODEGEN_INVOCATION = \
--	$(AM_V_GEN) gdbus-codegen \
--	--interface-prefix com.redhat.Cockpit.DBusTests \
--	--c-namespace Test \
--	--c-generate-object-manager \
--	--generate-c-code src/ws/mock-dbus-tests \
--	$(GDBUS_CODEGEN_XML)
--
--BUILT_SOURCES += $(GDBUS_CODEGEN_GENERATED)
--CLEANFILES += $(GDBUS_CODEGEN_GENERATED)
--EXTRA_DIST += $(GDBUS_CODEGEN_XML)
--
--src/ws/mock-dbus-tests.h: $(GDBUS_CODEGEN_XML)
--	$(GDBUS_CODEGEN_INVOCATION)
--
--src/ws/mock-dbus-tests.c: $(GDBUS_CODEGEN_XML) src/ws/mock-dbus-tests.h
--	$(GDBUS_CODEGEN_INVOCATION)
+-test_server_CPPFLAGS = $(libcockpit_ws_a_CPPFLAGS) $(TEST_CPP)
+-test_server_LDADD = $(libcockpit_ws_a_LIBS) $(TEST_LIBS)
 -
 -test_server_SOURCES = \
 -	src/ws/mock-service.c \
@@ -51,27 +26,33 @@
 -	src/ws/test-server.c \
 -	$(NULL)
 -
--nodist_test_server_SOURCES = \
--	$(GDBUS_CODEGEN_GENERATED) \
+-test_server_CPPFLAGS += -I$(top_builddir)/src/ws
+-nodist_test_server_SOURCES = $(GDBUS_CODEGEN_GENERATED)
+-
+-BUILT_SOURCES += $(GDBUS_CODEGEN_GENERATED)
+-CLEANFILES += $(GDBUS_CODEGEN_GENERATED)
+-GDBUS_CODEGEN_GENERATED = \
+-	src/ws/mock-dbus-tests.h \
+-	src/ws/mock-dbus-tests.c \
 -	$(NULL)
 -
--test_server_CFLAGS = 					\
--	-I$(builddir)/src/ws \
--	-I$(top_srcdir)/src/ws \
--	-DG_LOG_DOMAIN=\"test-server\"			\
--	$(GIO_CFLAGS)					\
--	$(COCKPIT_WS_CFLAGS) \
+-EXTRA_DIST += $(GDBUS_CODEGEN_XML)
+-GDBUS_CODEGEN_XML = $(srcdir)/src/ws/com.redhat.Cockpit.DBusTests.xml
+-
+-GDBUS_CODEGEN_INVOCATION = \
+-	$(AM_V_GEN) gdbus-codegen \
+-	--interface-prefix com.redhat.Cockpit.DBusTests \
+-	--c-namespace Test \
+-	--c-generate-object-manager \
 -	$(NULL)
 -
--test_server_LDADD = 					\
--	$(libcockpit_ws_LIBS) \
--	$(GIO_LIBS)					\
--	-lpam 						\
--	$(NULL)
+-src/ws/mock-dbus-tests.h: $(GDBUS_CODEGEN_XML)
+-	$(GDBUS_CODEGEN_INVOCATION) --header --output $@ $<
 -
- WS_CHECKS = \
- 	test-base64 \
- 	test-creds \
--- 
-2.11.0
-
+-src/ws/mock-dbus-tests.c: $(GDBUS_CODEGEN_XML)
+-	$(GDBUS_CODEGEN_INVOCATION) --body --output $@ $<
+-
+-# -----------------------------------------------------------------------------
+ # Unit tests
+ 
+ check_SCRIPTS += src/ws/mock-cat-with-init
diff --git a/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0002-fix-makefile-use-copy-rule-for-unmodified-files.patch b/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0002-fix-makefile-use-copy-rule-for-unmodified-files.patch
index a1ea9bc..48702c3 100644
--- a/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0002-fix-makefile-use-copy-rule-for-unmodified-files.patch
+++ b/meta-openembedded/meta-webserver/recipes-webadmin/cockpit/files/0002-fix-makefile-use-copy-rule-for-unmodified-files.patch
@@ -7,13 +7,11 @@
  pkg/Makefile.am | 27 +++++++++++++++++++++++++++
  1 file changed, 27 insertions(+)
 
-diff --git a/pkg/Makefile.am b/pkg/Makefile.am
-index 192b785..03b9787 100644
 --- a/pkg/Makefile.am
 +++ b/pkg/Makefile.am
-@@ -13,6 +13,33 @@ playground_DATA = \
- dist/playground/extra.de.po: pkg/playground/extra.de.po
- 	$(COPY_RULE)
+@@ -7,6 +7,33 @@ TESTS += $(pkg_TESTS)
+ %.metainfo.xml: %.metainfo.xml.in
+ 	$(AM_V_GEN) mkdir -p $(dir $@) && msgfmt --xml -d $(top_srcdir)/po --template $< --output $@
  
 +dist/playground/hammer.gif: pkg/playground/hammer.gif
 +	$(COPY_RULE)
@@ -43,5 +41,5 @@
 +	$(COPY_RULE)
 +
  metainfodir = ${datarootdir}/metainfo
- metainfo_DATA = pkg/sosreport/org.cockpit-project.cockpit-sosreport.metainfo.xml \
- 		pkg/kdump/org.cockpit-project.cockpit-kdump.metainfo.xml \
+ nodist_metainfo_DATA = \
+ 	pkg/sosreport/org.cockpit-project.cockpit-sosreport.metainfo.xml \
diff --git a/meta-openembedded/meta-xfce/recipes-xfce/xfce4-dev-tools/xfce4-dev-tools/0001-m4macros-Check-for-a-function-provided-by-libX11-in-.patch b/meta-openembedded/meta-xfce/recipes-xfce/xfce4-dev-tools/xfce4-dev-tools/0001-m4macros-Check-for-a-function-provided-by-libX11-in-.patch
new file mode 100644
index 0000000..43b1eb5
--- /dev/null
+++ b/meta-openembedded/meta-xfce/recipes-xfce/xfce4-dev-tools/xfce4-dev-tools/0001-m4macros-Check-for-a-function-provided-by-libX11-in-.patch
@@ -0,0 +1,35 @@
+From 8f58e1b5c15fc3b6f775207971d078c058d9eb7b Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Thu, 1 Sep 2022 08:21:56 -0700
+Subject: [PATCH] m4macros: Check for a function provided by libX11 in
+ AC_CHECK_LIB
+
+checking for main is not ideal, since its not a function from libX11
+moreover with newer versions of autoconf this will start to fail [1], so
+change this to use XFree API which is provided by libX11 and seems more
+relevant to check here.
+
+[1] https://lists.gnu.org/archive/html/autoconf-patches/2022-08/msg00018.html
+
+Upstream-Status: Submitted [https://gitlab.xfce.org/xfce/xfce4-dev-tools/-/issues/57]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ m4macros/xdt-depends.m4 | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/m4macros/xdt-depends.m4 b/m4macros/xdt-depends.m4
+index 3c07c90..ce1b272 100644
+--- a/m4macros/xdt-depends.m4
++++ b/m4macros/xdt-depends.m4
+@@ -213,7 +213,7 @@ AC_DEFUN([XDT_CHECK_LIBX11],
+ 
+   LIBX11_CFLAGS= LIBX11_LDFLAGS= LIBX11_LIBS=
+   if test x"$no_x" != x"yes"; then
+-    AC_CHECK_LIB([X11], [main],
++    AC_CHECK_LIB([X11], [XFree],
+     [
+       AC_DEFINE([HAVE_LIBX11], [1], [Define if libX11 is available])
+       LIBX11_CFLAGS="$X_CFLAGS"
+-- 
+2.37.3
+
diff --git a/meta-openembedded/meta-xfce/recipes-xfce/xfce4-dev-tools/xfce4-dev-tools_4.16.0.bb b/meta-openembedded/meta-xfce/recipes-xfce/xfce4-dev-tools/xfce4-dev-tools_4.16.0.bb
index 5c41a76..586a77e 100644
--- a/meta-openembedded/meta-xfce/recipes-xfce/xfce4-dev-tools/xfce4-dev-tools_4.16.0.bb
+++ b/meta-openembedded/meta-xfce/recipes-xfce/xfce4-dev-tools/xfce4-dev-tools_4.16.0.bb
@@ -11,7 +11,9 @@
 
 BBCLASSEXTEND = "native"
 
-SRC_URI = "http://archive.xfce.org/src/xfce/${BPN}/${@'${PV}'[0:4]}/${BPN}-${PV}.tar.bz2"
+SRC_URI = "http://archive.xfce.org/src/xfce/${BPN}/${@'${PV}'[0:4]}/${BPN}-${PV}.tar.bz2 \
+           file://0001-m4macros-Check-for-a-function-provided-by-libX11-in-.patch \
+           "
 SRC_URI:append:class-target = " file://0001-Run-native-xdt-csource-on-tests.patch"
 SRC_URI[sha256sum] = "f50b3070e66f3ebdf331744dd1ec5e1af5de333965d491e15ce05545e8eb4f04"
 
diff --git a/meta-raspberrypi/.github/workflows/yocto-builds.yml b/meta-raspberrypi/.github/workflows/yocto-builds.yml
index 824745c..3f21242 100644
--- a/meta-raspberrypi/.github/workflows/yocto-builds.yml
+++ b/meta-raspberrypi/.github/workflows/yocto-builds.yml
@@ -27,6 +27,8 @@
           - raspberrypi4
           - raspberrypi-cm3
           - raspberrypi-cm
+          - raspberrypi-armv7
+          - raspberrypi-armv8
         image: [rpi-test-image]
         distro: [poky]
     runs-on: [self-hosted, Linux]
diff --git a/meta-raspberrypi/classes/sdcard_image-rpi.bbclass b/meta-raspberrypi/classes/sdcard_image-rpi.bbclass
index a7b9ac8..178e4ef 100644
--- a/meta-raspberrypi/classes/sdcard_image-rpi.bbclass
+++ b/meta-raspberrypi/classes/sdcard_image-rpi.bbclass
@@ -25,11 +25,6 @@
 # This image depends on the rootfs image
 IMAGE_TYPEDEP:rpi-sdimg = "${SDIMG_ROOTFS_TYPE}"
 
-# Kernel image name
-SDIMG_KERNELIMAGE:raspberrypi  ?= "kernel.img"
-SDIMG_KERNELIMAGE:raspberrypi2 ?= "kernel7.img"
-SDIMG_KERNELIMAGE:raspberrypi3-64 ?= "kernel8.img"
-
 # Boot partition volume id
 # Shorten raspberrypi to just rpi to keep it under 11 characters
 # now enforced by mkfs.vfat from dosfstools-4.2
diff --git a/meta-raspberrypi/conf/layer.conf b/meta-raspberrypi/conf/layer.conf
index d7ad4ed..992ff18 100644
--- a/meta-raspberrypi/conf/layer.conf
+++ b/meta-raspberrypi/conf/layer.conf
@@ -9,7 +9,7 @@
 BBFILE_PATTERN_raspberrypi := "^${LAYERDIR}/"
 BBFILE_PRIORITY_raspberrypi = "9"
 
-LAYERSERIES_COMPAT_raspberrypi = "kirkstone"
+LAYERSERIES_COMPAT_raspberrypi = "kirkstone langdale"
 LAYERDEPENDS_raspberrypi = "core"
 
 # Additional license directories.
diff --git a/meta-raspberrypi/conf/machine/include/rpi-base.inc b/meta-raspberrypi/conf/machine/include/rpi-base.inc
index a2edf06..3e057ed 100644
--- a/meta-raspberrypi/conf/machine/include/rpi-base.inc
+++ b/meta-raspberrypi/conf/machine/include/rpi-base.inc
@@ -68,11 +68,13 @@
     bcm2710-rpi-2-b.dtb \
     bcm2710-rpi-3-b.dtb \
     bcm2710-rpi-3-b-plus.dtb \
+    bcm2710-rpi-zero-2.dtb \
     bcm2711-rpi-4-b.dtb \
     bcm2711-rpi-400.dtb \
     bcm2708-rpi-cm.dtb \
     bcm2710-rpi-cm3.dtb \
     bcm2711-rpi-cm4.dtb \
+    bcm2711-rpi-cm4s.dtb \
     "
 
 KERNEL_DEVICETREE ??= " \
@@ -133,12 +135,15 @@
 
     return ' '.join([transform(dtb) for dtb in alldtbs.split(' ') if dtb])
 
+RPI_EXTRA_IMAGE_BOOT_FILES ?= " \
+	${@bb.utils.contains('RPI_USE_U_BOOT', '1', \
+		'${KERNEL_IMAGETYPE} u-boot.bin;${SDIMG_KERNELIMAGE} boot.scr', \
+		'${KERNEL_IMAGETYPE};${SDIMG_KERNELIMAGE}', d)} \
+	"
 
 IMAGE_BOOT_FILES ?= "${BOOTFILES_DIR_NAME}/* \
                  ${@make_dtb_boot_files(d)} \
-                 ${@bb.utils.contains('RPI_USE_U_BOOT', '1', \
-                    '${KERNEL_IMAGETYPE} u-boot.bin;${SDIMG_KERNELIMAGE} boot.scr', \
-                    '${KERNEL_IMAGETYPE};${SDIMG_KERNELIMAGE}', d)} \
+                 ${RPI_EXTRA_IMAGE_BOOT_FILES} \
                  "
 do_image_wic[depends] += " \
     rpi-bootfiles:do_deploy \
diff --git a/meta-raspberrypi/conf/machine/include/rpi-default-providers.inc b/meta-raspberrypi/conf/machine/include/rpi-default-providers.inc
index c02d248..67f2bf5 100644
--- a/meta-raspberrypi/conf/machine/include/rpi-default-providers.inc
+++ b/meta-raspberrypi/conf/machine/include/rpi-default-providers.inc
@@ -7,6 +7,8 @@
 PREFERRED_PROVIDER_virtual/libgl ?= "${@bb.utils.contains("MACHINE_FEATURES", "vc4graphics", "mesa", "mesa-gl", d)}"
 PREFERRED_PROVIDER_virtual/mesa ?= "${@bb.utils.contains("MACHINE_FEATURES", "vc4graphics", "mesa", "mesa-gl", d)}"
 PREFERRED_PROVIDER_virtual/libgbm ?= "${@bb.utils.contains("MACHINE_FEATURES", "vc4graphics", "mesa", "mesa-gl", d)}"
+PREFERRED_PROVIDER_vlc ?= "rpidistro-vlc"
+PREFERRED_PROVIDER_ffmpeg ?= "rpidistro-ffmpeg"
 PREFERRED_PROVIDER_jpeg ?= "jpeg"
 
 PREFERRED_PROVIDER_virtual/libomxil ?= "userland"
diff --git a/meta-raspberrypi/conf/machine/include/rpi-default-versions.inc b/meta-raspberrypi/conf/machine/include/rpi-default-versions.inc
index 8ff2839..a29fd5e 100644
--- a/meta-raspberrypi/conf/machine/include/rpi-default-versions.inc
+++ b/meta-raspberrypi/conf/machine/include/rpi-default-versions.inc
@@ -1,3 +1,4 @@
 # RaspberryPi BSP default versions
 
 PREFERRED_VERSION_linux-raspberrypi ??= "5.15.%"
+PREFERRED_VERSION_linux-raspberrypi-v7 ??= "${PREFERRED_VERSION_linux-raspberrypi}"
diff --git a/meta-raspberrypi/conf/machine/raspberrypi-armv7.conf b/meta-raspberrypi/conf/machine/raspberrypi-armv7.conf
new file mode 100644
index 0000000..cb2e5a2
--- /dev/null
+++ b/meta-raspberrypi/conf/machine/raspberrypi-armv7.conf
@@ -0,0 +1,39 @@
+# SPDX-FileCopyrightText: Andrei Gherzan <andrei.gherzan@huawei.com>
+#
+# SPDX-License-Identifier: MIT
+
+#@TYPE: Machine
+#@NAME: RaspberryPi Development Boards (32bit)
+#@DESCRIPTION: Machine configuration for the RaspberryPi boards in 32 bit mode
+
+DEFAULTTUNE ?= "cortexa7thf-neon-vfpv4"
+require conf/machine/include/arm/armv7a/tune-cortexa7.inc
+include conf/machine/include/rpi-base.inc
+
+# This machine includes by default the kernel for v7l. We hook in support for
+# v7.
+RASPBERRYPI_v7_KERNEL = "linux-raspberrypi-v7"
+RASPBERRYPI_v7_KERNEL_PACKAGE_NAME = "kernel-v7"
+RASPBERRYPI_v7_KERNEL_FILE ?= "kernel7.img"
+# We don't need a lot for v7l because it is the default provider,
+# virtual/kernel.
+RASPBERRYPI_v7l_KERNEL_FILE ?= "kernel7l.img"
+
+MACHINE_FEATURES += "pci"
+MACHINE_EXTRA_RRECOMMENDS += "\
+    linux-firmware-rpidistro-bcm43430 \
+    linux-firmware-rpidistro-bcm43436 \
+    linux-firmware-rpidistro-bcm43436s \
+    linux-firmware-rpidistro-bcm43455 \
+    linux-firmware-rpidistro-bcm43456 \
+    bluez-firmware-rpidistro-bcm43430a1-hcd \
+    bluez-firmware-rpidistro-bcm43430b0-hcd \
+    bluez-firmware-rpidistro-bcm4345c0-hcd \
+    bluez-firmware-rpidistro-bcm4345c5-hcd \
+"
+
+# FIXME: This machine doesn't support u-boot (yet)
+RPI_EXTRA_IMAGE_BOOT_FILES = " \
+	${KERNEL_IMAGETYPE};${RASPBERRYPI_v7l_KERNEL_FILE} \
+	${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}/${KERNEL_IMAGETYPE};${RASPBERRYPI_v7_KERNEL_FILE} \
+"
diff --git a/meta-raspberrypi/conf/machine/raspberrypi-armv8.conf b/meta-raspberrypi/conf/machine/raspberrypi-armv8.conf
new file mode 100644
index 0000000..8d412ba
--- /dev/null
+++ b/meta-raspberrypi/conf/machine/raspberrypi-armv8.conf
@@ -0,0 +1,44 @@
+# SPDX-FileCopyrightText: Andrei Gherzan <andrei.gherzan@huawei.com>
+#
+# SPDX-License-Identifier: MIT
+
+#@TYPE: Machine
+#@NAME: RaspberryPi Development Boards (64bit)
+#@DESCRIPTION: Machine configuration for the RaspberryPi boards in 64 bit mode
+
+require conf/machine/include/arm/armv8a/tune-cortexa53.inc
+include conf/machine/include/rpi-base.inc
+
+MACHINE_FEATURES += "pci"
+MACHINE_EXTRA_RRECOMMENDS += "\
+    linux-firmware-rpidistro-bcm43430 \
+    linux-firmware-rpidistro-bcm43455 \
+    linux-firmware-rpidistro-bcm43456 \
+    linux-firmware-rpidistro-bcm43436 \
+    linux-firmware-rpidistro-bcm43436s \
+    bluez-firmware-rpidistro-bcm43430a1-hcd \
+    bluez-firmware-rpidistro-bcm43430b0-hcd \
+    bluez-firmware-rpidistro-bcm4345c0-hcd \
+    bluez-firmware-rpidistro-bcm4345c5-hcd \
+"
+
+RPI_KERNEL_DEVICETREE = " \
+    broadcom/bcm2710-rpi-3-b.dtb \
+    broadcom/bcm2710-rpi-3-b-plus.dtb \
+    broadcom/bcm2837-rpi-3-b.dtb \
+    broadcom/bcm2710-rpi-cm3.dtb \
+    broadcom/bcm2710-rpi-zero-2.dtb \
+    broadcom/bcm2711-rpi-4-b.dtb \
+    broadcom/bcm2711-rpi-400.dtb \
+    broadcom/bcm2711-rpi-cm4.dtb \
+    broadcom/bcm2711-rpi-cm4s.dtb \
+"
+
+SDIMG_KERNELIMAGE ?= "kernel8.img"
+KERNEL_IMAGETYPE_UBOOT ?= "Image"
+KERNEL_IMAGETYPE_DIRECT ?= "Image"
+KERNEL_BOOTCMD ?= "booti"
+UBOOT_MACHINE = "rpi_arm64_config"
+SERIAL_CONSOLES ?= "115200;ttyS0"
+
+VC4DTBO ?= "vc4-fkms-v3d"
diff --git a/meta-raspberrypi/conf/machine/raspberrypi.conf b/meta-raspberrypi/conf/machine/raspberrypi.conf
index b23687b..05263d7 100644
--- a/meta-raspberrypi/conf/machine/raspberrypi.conf
+++ b/meta-raspberrypi/conf/machine/raspberrypi.conf
@@ -7,8 +7,8 @@
 require conf/machine/include/tune-arm1176jzf-s.inc
 include conf/machine/include/rpi-base.inc
 
-SERIAL_CONSOLES ?= "115200;ttyAMA0"
-
+SDIMG_KERNELIMAGE  ?= "kernel.img"
 UBOOT_MACHINE = "rpi_config"
+SERIAL_CONSOLES ?= "115200;ttyAMA0"
 
 ARMSTUB ?= "armstub.bin"
diff --git a/meta-raspberrypi/conf/machine/raspberrypi0-2w.conf b/meta-raspberrypi/conf/machine/raspberrypi0-2w.conf
index c360d90..63c7bfb 100644
--- a/meta-raspberrypi/conf/machine/raspberrypi0-2w.conf
+++ b/meta-raspberrypi/conf/machine/raspberrypi0-2w.conf
@@ -11,7 +11,3 @@
     linux-firmware-rpidistro-bcm43436s \
     bluez-firmware-rpidistro-bcm43430b0-hcd \
 "
-
-RPI_KERNEL_DEVICETREE = " \
-    bcm2710-rpi-zero-2.dtb \
-    "
diff --git a/meta-raspberrypi/conf/machine/raspberrypi2.conf b/meta-raspberrypi/conf/machine/raspberrypi2.conf
index 403d15e..8cb859e 100644
--- a/meta-raspberrypi/conf/machine/raspberrypi2.conf
+++ b/meta-raspberrypi/conf/machine/raspberrypi2.conf
@@ -7,8 +7,8 @@
 require conf/machine/include/arm/armv7a/tune-cortexa7.inc
 include conf/machine/include/rpi-base.inc
 
+SDIMG_KERNELIMAGE ?= "kernel7.img"
 SERIAL_CONSOLES ?= "115200;ttyAMA0"
-
 UBOOT_MACHINE = "rpi_2_config"
 
 ARMSTUB ?= "armstub7.bin"
diff --git a/meta-raspberrypi/conf/machine/raspberrypi3-64.conf b/meta-raspberrypi/conf/machine/raspberrypi3-64.conf
index 95475f3..573b079 100644
--- a/meta-raspberrypi/conf/machine/raspberrypi3-64.conf
+++ b/meta-raspberrypi/conf/machine/raspberrypi3-64.conf
@@ -21,16 +21,15 @@
     broadcom/bcm2710-rpi-cm3.dtb \
     "
 
-SERIAL_CONSOLES ?= "115200;ttyS0"
-
-UBOOT_MACHINE = "rpi_arm64_config"
-
+SDIMG_KERNELIMAGE ?= "kernel8.img"
 # When u-boot is enabled we need to use the "Image" format and the "booti"
 # command to load the kernel
 KERNEL_IMAGETYPE_UBOOT ?= "Image"
 # "zImage" not supported on arm64 and ".gz" images not supported by bootloader yet
 KERNEL_IMAGETYPE_DIRECT ?= "Image"
 KERNEL_BOOTCMD ?= "booti"
+UBOOT_MACHINE = "rpi_arm64_config"
+SERIAL_CONSOLES ?= "115200;ttyS0"
 
 VC4DTBO ?= "vc4-fkms-v3d"
 ARMSTUB ?= "armstub8.bin"
diff --git a/meta-raspberrypi/conf/machine/raspberrypi4-64.conf b/meta-raspberrypi/conf/machine/raspberrypi4-64.conf
index 0cf7d51..d8b8ec0 100644
--- a/meta-raspberrypi/conf/machine/raspberrypi4-64.conf
+++ b/meta-raspberrypi/conf/machine/raspberrypi4-64.conf
@@ -21,6 +21,7 @@
     broadcom/bcm2711-rpi-4-b.dtb \
     broadcom/bcm2711-rpi-400.dtb \
     broadcom/bcm2711-rpi-cm4.dtb \
+    broadcom/bcm2711-rpi-cm4s.dtb \
 "
 
 SDIMG_KERNELIMAGE ?= "kernel8.img"
diff --git a/meta-raspberrypi/docs/extra-build-config.md b/meta-raspberrypi/docs/extra-build-config.md
index 252648a..11f7bde 100644
--- a/meta-raspberrypi/docs/extra-build-config.md
+++ b/meta-raspberrypi/docs/extra-build-config.md
@@ -430,3 +430,34 @@
 take advantage of your RTC device. You can do that by checking what is
 included/configured in the build system based on the inclusion of `rtc` in
 `MACHINE_FEATURES`.
+
+## Raspberry Pi Distro VLC
+
+To enable Raspberry Pi Distro VLC, the `meta-openembedded/meta-multimedia` layer must be
+included in your `bblayers.conf`.
+
+VLC does not support HW accelerated video decode through MMAL on a 64-bit OS.
+
+See:
+* <https://forums.raspberrypi.com/viewtopic.php?t=275370>
+* <https://forums.raspberrypi.com/viewtopic.php?t=325218#p1946169>
+
+MMAL is not enabled by default. To enable it add
+
+    DISABLE_VC4GRAPHICS = "1"
+
+to `local.conf`. Adding `vlc` to `IMAGE_INSTALL` will then default to building the Raspberry
+Pi's Distro implementation of VLC with HW accelerated video decode through MMAL into the system
+image. It also defaults to building VLC with Raspberry PI's Distro implementation of ffmpeg. The
+oe-core implementation of ffmpeg and the meta-openembedded/meta-multimedia implementation of VLC
+can however be selected via:
+
+    PREFERRED_PROVIDER_ffmpeg = "ffmpeg"
+    PREFERRED_PROVIDER_vlc = "vlc"
+
+Usage example: Start VLC with mmal_vout plugin and without an active display server.
+
+    DISPLAYNUM=$(tvservice -l | tail -c 2)
+    MMAL_DISPLAY=$(expr $DISPLAYNUM + 1)
+    VLC_SETTINGS="-I dummy --vout=mmal_vout --mmal-resize --mmal-display hdmi-$MMAL_DISPLAY --no-dbus"
+    cvlc $VLC_SETTINGS <video/playlist>
diff --git a/meta-raspberrypi/docs/layer-contents.md b/meta-raspberrypi/docs/layer-contents.md
index d12cb88..3882339 100644
--- a/meta-raspberrypi/docs/layer-contents.md
+++ b/meta-raspberrypi/docs/layer-contents.md
@@ -16,6 +16,28 @@
 
 Note: The raspberrypi3 machines include support for Raspberry Pi 3B+.
 
+## Multi-board Machines
+
+This layer generally provides support for machines that are targetting a single
+Raspberry Pi board (or a very few subsets of them). This is so that the build
+infrastructure can tune and tweak the configuration with the flexibility to
+optimise for both runtime performance and disk storage.
+
+For usecases where compatibility of more boards is required, the layer provides
+machines that are tagetting a wider support of Raspberry Pi boards.
+
+### raspberrypi-armv7
+
+This machine targets support for all the ARMv7-based Raspberry Pi boards. It
+will pull in the firmware and deploy the kernel image and kernel modules for
+all the relevant boards.
+
+### raspberrypi-armv8
+
+This machine targets support for all the ARMv8-based Raspberry Pi boards. It
+will pull in the firmware and deploy the kernel image and kernel modules for
+all the relevant boards.
+
 ## Images
 
 * rpi-test-image
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0001-configure-fix-linking-on-RISC-V-ISA.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0001-configure-fix-linking-on-RISC-V-ISA.patch
new file mode 100644
index 0000000..ac96efa
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0001-configure-fix-linking-on-RISC-V-ISA.patch
@@ -0,0 +1,19 @@
+From: =?utf-8?q?R=C3=A9mi_Denis-Courmont?= <remi@remlab.net>
+Date: Sat, 16 Jun 2018 21:31:45 +0300
+Subject: configure: fix linking on RISC-V ISA
+
+Upstream-status: Pending
+---
+ configure.ac | 1 +
+ 1 file changed, 1 insertion(+)
+
+--- a/configure.ac
++++ b/configure.ac
+@@ -113,6 +113,7 @@ case "${host_os}" in
+     ;;
+   linux*)
+     SYS=linux
++    test "${host_cpu}" = "riscv64" && CFLAGS="${CFLAGS} -pthread"
+     ;;
+   bsdi*)
+     SYS=bsdi
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0002-Revert-configure-Require-libmodplug-0.8.9.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0002-Revert-configure-Require-libmodplug-0.8.9.patch
new file mode 100644
index 0000000..3dfcf85
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0002-Revert-configure-Require-libmodplug-0.8.9.patch
@@ -0,0 +1,22 @@
+From: Sebastian Ramacher <sramacher@debian.org>
+Date: Mon, 19 Aug 2019 21:08:26 +0200
+Subject: Revert "configure: Require libmodplug >= 0.8.9"
+
+This reverts commit 48f014768dc22ecad23d0e9f53c38805a3aff832.
+
+Upstream-status: Pending
+---
+ configure.ac | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/configure.ac
++++ b/configure.ac
+@@ -2207,7 +2207,7 @@ AC_ARG_ENABLE(mod,
+   [AS_HELP_STRING([--disable-mod],
+     [do not use libmodplug (default auto)])])
+ if test "${enable_mod}" != "no" ; then
+-    PKG_CHECK_MODULES(LIBMODPLUG, [libmodplug >= 0.8.9.0], [
++    PKG_CHECK_MODULES(LIBMODPLUG, [libmodplug >= 0.8.4 libmodplug != 0.8.8], [
+           VLC_ADD_PLUGIN([mod])
+           VLC_ADD_CXXFLAGS([mod],[$LIBMODPLUG_CFLAGS])
+           VLC_ADD_CFLAGS([mod],[$LIBMODPLUG_CFLAGS]) #modules/demux/mod.c needs CFLAGS_mod, not CXXFLAGS_mod
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0003-mmal_20.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0003-mmal_20.patch
new file mode 100644
index 0000000..6038d0e
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0003-mmal_20.patch
@@ -0,0 +1,13823 @@
+Upstream-status: Pending
+
+--- a/configure.ac
++++ b/configure.ac
+@@ -3478,6 +3478,9 @@ dnl
+ AC_ARG_ENABLE(mmal,
+   AS_HELP_STRING([--enable-mmal],
+     [Multi-Media Abstraction Layer (MMAL) hardware plugin (default enable)]))
++AC_ARG_ENABLE(mmal_avcodec,
++  AS_HELP_STRING([--enable-mmal-avcodec],
++    [Use MMAL enabled avcodec libs (default disable)]))
+ if test "${enable_mmal}" != "no"; then
+   VLC_SAVE_FLAGS
+   LDFLAGS="${LDFLAGS} -L/opt/vc/lib -lvchostif"
+@@ -3488,7 +3491,7 @@ if test "${enable_mmal}" != "no"; then
+         VLC_ADD_PLUGIN([mmal])
+         VLC_ADD_LDFLAGS([mmal],[ -L/opt/vc/lib ])
+         VLC_ADD_CFLAGS([mmal],[ -isystem /opt/vc/include -isystem /opt/vc/include/interface/vcos/pthreads -isystem /opt/vc/include/interface/vmcs_host/linux ])
+-        VLC_ADD_LIBS([mmal],[ -lbcm_host -lmmal -lmmal_core -lmmal_components -lmmal_util -lvchostif ]) ], [
++        VLC_ADD_LIBS([mmal],[ -lbcm_host -lmmal -lmmal_core -lmmal_components -lmmal_util -lvchostif -lvchiq_arm -lvcsm ]) ], [
+           AS_IF([test "${enable_mmal}" = "yes"],
+             [ AC_MSG_ERROR([Cannot find bcm library...]) ],
+             [ AC_MSG_WARN([Cannot find bcm library...]) ])
+@@ -3500,6 +3503,7 @@ if test "${enable_mmal}" != "no"; then
+   VLC_RESTORE_FLAGS
+ fi
+ AM_CONDITIONAL([HAVE_MMAL], [test "${have_mmal}" = "yes"])
++AM_CONDITIONAL([HAVE_MMAL_AVCODEC], [test "${enable_mmal_avcodec}" = "yes"])
+
+ dnl
+ dnl evas plugin
+--- a/include/vlc_fourcc.h
++++ b/include/vlc_fourcc.h
+@@ -365,6 +365,11 @@
+
+ /* Broadcom MMAL opaque buffer type */
+ #define VLC_CODEC_MMAL_OPAQUE     VLC_FOURCC('M','M','A','L')
++#define VLC_CODEC_MMAL_ZC_SAND8   VLC_FOURCC('Z','S','D','8')
++#define VLC_CODEC_MMAL_ZC_SAND10  VLC_FOURCC('Z','S','D','0')
++#define VLC_CODEC_MMAL_ZC_SAND30  VLC_FOURCC('Z','S','D','3')
++#define VLC_CODEC_MMAL_ZC_I420    VLC_FOURCC('Z','4','2','0')
++#define VLC_CODEC_MMAL_ZC_RGB32   VLC_FOURCC('Z','R','G','B')
+
+ /* DXVA2 opaque video surface for use with D3D9 */
+ #define VLC_CODEC_D3D9_OPAQUE     VLC_FOURCC('D','X','A','9') /* 4:2:0  8 bpc */
+--- a/modules/hw/mmal/Makefile.am
++++ b/modules/hw/mmal/Makefile.am
+@@ -1,23 +1,57 @@
+ include $(top_srcdir)/modules/common.am
+ mmaldir = $(pluginsdir)/mmal
+
+-AM_CFLAGS += $(CFLAGS_mmal)
+-AM_LDFLAGS += -rpath '$(mmaldir)' $(LDFLAGS_mmal)
++AM_CFLAGS += -pthread $(CFLAGS_mmal)
++AM_LDFLAGS += -pthread -rpath '$(mmaldir)' $(LDFLAGS_mmal)
+
+-libmmal_vout_plugin_la_SOURCES = vout.c mmal_picture.c mmal_picture.h
++libmmal_vout_plugin_la_SOURCES = vout.c mmal_cma.c mmal_picture.c subpic.c\
++  mmal_cma.h mmal_picture.h subpic.h transform_ops.h\
++  mmal_piccpy_neon.S
+ libmmal_vout_plugin_la_CFLAGS = $(AM_CFLAGS)
+-libmmal_vout_plugin_la_LDFLAGS = $(AM_LDFLAGS) -lm
++libmmal_vout_plugin_la_LDFLAGS = $(AM_LDFLAGS) -lm -lX11 -lXrandr
+ libmmal_vout_plugin_la_LIBADD = $(LIBS_mmal)
+ mmal_LTLIBRARIES = libmmal_vout_plugin.la
+
+-libmmal_codec_plugin_la_SOURCES = codec.c
++libmmal_codec_plugin_la_SOURCES = codec.c mmal_cma.c mmal_picture.c subpic.c\
++  mmal_cma.h mmal_picture.h subpic.h transform_ops.h\
++  blend_rgba_neon.S mmal_piccpy_neon.S
+ libmmal_codec_plugin_la_CFLAGS = $(AM_CFLAGS)
+ libmmal_codec_plugin_la_LDFLAGS = $(AM_LDFLAGS)
+ libmmal_codec_plugin_la_LIBADD = $(LIBS_mmal)
+ mmal_LTLIBRARIES += libmmal_codec_plugin.la
+
+-libmmal_deinterlace_plugin_la_SOURCES = deinterlace.c mmal_picture.c
++libmmal_deinterlace_plugin_la_SOURCES = deinterlace.c mmal_picture.c mmal_cma.c\
++  mmal_cma.h mmal_picture.h transform_ops.h\
++  mmal_piccpy_neon.S
+ libmmal_deinterlace_plugin_la_CFLAGS = $(AM_CFLAGS)
+ libmmal_deinterlace_plugin_la_LDFLAGS = $(AM_LDFLAGS)
+ libmmal_deinterlace_plugin_la_LIBADD = $(LIBS_mmal)
+ mmal_LTLIBRARIES += libmmal_deinterlace_plugin.la
++
++libmmal_xsplitter_plugin_la_SOURCES = xsplitter.c mmal_picture.c mmal_cma.c\
++  mmal_cma.h mmal_picture.h transform_ops.h\
++  mmal_piccpy_neon.S
++libmmal_xsplitter_plugin_la_CFLAGS = $(AM_CFLAGS)
++libmmal_xsplitter_plugin_la_LDFLAGS = $(AM_LDFLAGS)
++libmmal_xsplitter_plugin_la_LIBADD = $(LIBS_mmal)
++mmal_LTLIBRARIES += libmmal_xsplitter_plugin.la
++
++libmmal_converter_plugin_la_SOURCES = converter_mmal.c mmal_cma.c mmal_picture.c\
++  mmal_cma.h mmal_picture.h transform_ops.h\
++  mmal_piccpy_neon.S
++libmmal_converter_plugin_la_CFLAGS = $(AM_CFLAGS)
++libmmal_converter_plugin_la_LDFLAGS = $(AM_LDFLAGS)
++libmmal_converter_plugin_la_LIBADD = $(LIBS_mmal)
++mmal_LTLIBRARIES += libmmal_converter_plugin.la
++
++if HAVE_MMAL_AVCODEC
++libmmal_avcodec_plugin_la_SOURCES = mmal_avcodec.c mmal_cma.c mmal_picture.c\
++  mmal_cma.h mmal_picture.h transform_ops.h\
++  mmal_piccpy_neon.S
++libmmal_avcodec_plugin_la_CFLAGS = $(AM_CFLAGS)
++libmmal_avcodec_plugin_la_LDFLAGS = $(AM_LDFLAGS)
++libmmal_avcodec_plugin_la_LIBADD = $(AVFORMAT_LIBS) $(AVUTIL_LIBS) $(LIBS_mmal)
++mmal_LTLIBRARIES += libmmal_avcodec_plugin.la
++endif
++
++
+--- /dev/null
++++ b/modules/hw/mmal/blend_rgba_neon.S
+@@ -0,0 +1,197 @@
++        .syntax unified
++        .arm
++//      .thumb
++        .text
++        .align 16
++        .arch armv7-a
++        .fpu neon-vfpv4
++
++@ blend_rgbx_rgba_neon
++
++@ Implements /255 as ((x * 257) + 0x8000) >> 16
++@ This generates something in the range [(x+126)/255, (x+127)/255] which is good enough
++
++@ There is advantage to aligning src and/or dest - dest gives a bit more due to being used twice
++
++
++
++@ [r0] RGBx dest      loaded into d20-d23
++@ [r1] RGBA src merge loaded into d16-d19
++@ r2   plane alpha
++@ r3   count (pixels)
++
++.macro blend_main sR, sG, sB, sA, dR, dG, dB, dA
++
++        push      { r4, lr }
++
++        vdup.u8    d7,  r2
++
++        subs       r3,  #8
++        vmov.u8    d6,  #0xff
++
++        blt        2f
++
++        @ If < 16 bytes to move then don't bother trying to align
++        @ (a) This means the the align doesn't need to worry about r3 underflow
++        @ (b) The overhead would be greater than any gain
++        cmp        r3,  #8
++        mov        r4,  r3
++        ble        1f
++
++        @ Align r1 on a 32 byte boundary
++        neg        r3,  r0
++        ubfx       r3,  r3,  #2,  #3
++
++        cmp        r3,  #0
++        blne       10f
++
++        sub        r3,  r4,  r3
++
++1:
++        vld4.8    {d16, d17, d18, d19}, [r1]
++
++1:
++        vmull.u8   q15, \sA, d7
++
++        vld4.8    {d20, d21, d22, d23}, [r0]
++
++        vsra.u16   q15, q15, #8
++        subs       r3,  #8
++        vrshrn.u16 d31, q15, #8
++        vsub.u8    d30, d6,  d31
++
++        vmull.u8   q12, \sR, d31
++        vmull.u8   q13, \sG, d31
++        vmull.u8   q14, \sB, d31
++        addge      r1,  #32
++
++        vmlal.u8   q12, \dR, d30
++        vmlal.u8   q13, \dG, d30
++        vmlal.u8   q14, \dB, d30
++        vld4.8    {d16, d17, d18, d19}, [r1]
++
++        vsra.u16   q12, q12, #8         @ * 257/256
++        vsra.u16   q13, q13, #8
++        vsra.u16   q14, q14, #8
++
++        vrshrn.u16 \dR, q12, #8
++        vrshrn.u16 \dG, q13, #8
++        vrshrn.u16 \dB, q14, #8
++        vmov.u8    \dA, #0xff
++
++        vst4.8    {d20, d21, d22, d23}, [r0]!
++        bge        1b
++        add        r1,  #32
++
++2:
++        cmp        r3,  #-8
++        blgt       10f
++
++        pop       { r4, pc }
++
++
++// Partial version
++// Align @ start & deal with tail
++10:
++        lsls       r2,  r3,  #30        @ b2 -> C, b1 -> N
++        mov        r2,  r0
++        bcc        1f
++        vld4.8    {d16[0], d17[0], d18[0], d19[0]}, [r1]!
++        vld4.8    {d20[0], d21[0], d22[0], d23[0]}, [r2]!
++        vld4.8    {d16[1], d17[1], d18[1], d19[1]}, [r1]!
++        vld4.8    {d20[1], d21[1], d22[1], d23[1]}, [r2]!
++        vld4.8    {d16[2], d17[2], d18[2], d19[2]}, [r1]!
++        vld4.8    {d20[2], d21[2], d22[2], d23[2]}, [r2]!
++        vld4.8    {d16[3], d17[3], d18[3], d19[3]}, [r1]!
++        vld4.8    {d20[3], d21[3], d22[3], d23[3]}, [r2]!
++1:
++        bpl        1f
++        vld4.8    {d16[4], d17[4], d18[4], d19[4]}, [r1]!
++        vld4.8    {d20[4], d21[4], d22[4], d23[4]}, [r2]!
++        vld4.8    {d16[5], d17[5], d18[5], d19[5]}, [r1]!
++        vld4.8    {d20[5], d21[5], d22[5], d23[5]}, [r2]!
++1:
++        tst        r3,  #1
++        beq        1f
++        vld4.8    {d16[6], d17[6], d18[6], d19[6]}, [r1]!
++        vld4.8    {d20[6], d21[6], d22[6], d23[6]}, [r2]!
++1:
++        @ Set conditions for later
++        lsls       r2,  r3,  #30        @ b2 -> C, b1 -> N
++
++        vmull.u8   q15, \sA, d7
++        vsra.u16   q15, q15, #8
++        vrshrn.u16 d31, q15, #8
++        vsub.u8    d30, d6,  d31
++
++        vmull.u8   q12, \sR, d31
++        vmull.u8   q13, \sG, d31
++        vmull.u8   q14, \sB, d31
++
++        vmlal.u8   q12, \dR, d30
++        vmlal.u8   q13, \dG, d30
++        vmlal.u8   q14, \dB, d30
++
++        vsra.u16   q12, q12, #8
++        vsra.u16   q13, q13, #8
++        vsra.u16   q14, q14, #8
++
++        vrshrn.u16 \dR, q12, #8
++        vrshrn.u16 \dG, q13, #8
++        vrshrn.u16 \dB, q14, #8
++        vmov.u8    \dA, #0xff
++
++        bcc        1f
++        vst4.8    {d20[0], d21[0], d22[0], d23[0]}, [r0]!
++        vst4.8    {d20[1], d21[1], d22[1], d23[1]}, [r0]!
++        vst4.8    {d20[2], d21[2], d22[2], d23[2]}, [r0]!
++        vst4.8    {d20[3], d21[3], d22[3], d23[3]}, [r0]!
++1:
++        bpl        1f
++        vst4.8    {d20[4], d21[4], d22[4], d23[4]}, [r0]!
++        vst4.8    {d20[5], d21[5], d22[5], d23[5]}, [r0]!
++1:
++        tst        r3,  #1
++        bxeq       lr
++        vst4.8    {d20[6], d21[6], d22[6], d23[6]}, [r0]!
++
++        bx         lr
++
++.endm
++
++
++@ [r0] RGBx dest      (Byte order: R, G, B, x)
++@ [r1] RGBA src merge (Byte order: R, G, B, A)
++@ r2   plane alpha
++@ r3   count (pixels)
++
++@ Whilst specified as RGBx+RGBA the only important part is the position of
++@ alpha, the other components are all treated the same
++
++@ [r0] RGBx dest      (Byte order: R, G, B, x)
++@ [r1] RGBA src merge (Byte order: R, G, B, A) - same as above
++@ r2   plane alpha
++@ r3   count (pixels)
++        .align  16
++        .global blend_rgbx_rgba_neon
++#ifdef __ELF__
++        .type   blend_rgbx_rgba_neon, %function
++#endif
++blend_rgbx_rgba_neon:
++        blend_main d16, d17, d18, d19, d20, d21, d22, d23
++
++
++@ [r0] RGBx dest      (Byte order: R, G, B, x)
++@ [r1] RGBA src merge (Byte order: B, G, R, A) - B / R swapped
++@ r2   plane alpha
++@ r3   count (pixels)
++        .align  16
++        .global blend_bgrx_rgba_neon
++#ifdef __ELF__
++        .type   blend_bgrx_rgba_neon, %function
++#endif
++blend_bgrx_rgba_neon:
++        blend_main d18, d17, d16, d19, d20, d21, d22, d23
++
++
++
+--- /dev/null
++++ b/modules/hw/mmal/blend_rgba_neon.h
+@@ -0,0 +1,17 @@
++#ifndef HW_MMAL_BLEND_RGBA_NEON_H
++#define HW_MMAL_BLEND_RGBA_NEON_H
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++typedef void blend_neon_fn(void * dest, const void * src, int alpha, unsigned int n);
++extern blend_neon_fn blend_rgbx_rgba_neon;
++extern blend_neon_fn blend_bgrx_rgba_neon;
++
++#ifdef __cplusplus
++}
++#endif
++
++#endif
++
+--- /dev/null
++++ b/modules/hw/mmal/blend_test.c
+@@ -0,0 +1,180 @@
++#include <stdio.h>
++#include <stdint.h>
++#include <memory.h>
++
++#include "blend_rgba_neon.h"
++
++#define RPI_PROFILE 1
++#define RPI_PROC_ALLOC 1
++#include "rpi_prof.h"
++
++static inline unsigned div255(unsigned v)
++{
++    // This models what we we do in the asm for / 255
++    // It generates something in the range [(i+126)/255, (i+127)/255] which is good enough
++    return ((v * 257) + 0x8000) >> 16;
++}
++
++static inline unsigned int a_merge(unsigned int dst, unsigned src, unsigned f)
++{
++    return div255((255 - f) * (dst) + src * f);
++}
++
++
++static void merge_line(void * dest, const void * src, int alpha, unsigned int n)
++{
++    unsigned int i;
++    const uint8_t * s_data = src;
++    uint8_t * d_data = dest;
++
++    for (i = 0; i != n; ++i) {
++        const uint32_t s_pel = ((const uint32_t *)s_data)[i];
++        const uint32_t d_pel = ((const uint32_t *)d_data)[i];
++        const unsigned int a = div255(alpha * (s_pel >> 24));
++        ((uint32_t *)d_data)[i] = 0xff000000 |
++            (a_merge((d_pel >> 16) & 0xff, (s_pel >> 16) & 0xff, a) << 16) |
++            (a_merge((d_pel >> 8)  & 0xff, (s_pel >> 8)  & 0xff, a) << 8 ) |
++            (a_merge((d_pel >> 0)  & 0xff, (s_pel >> 0)  & 0xff, a) << 0 );
++    }
++}
++
++
++// Merge RGBA with BGRA
++static void merge_line2(void * dest, const void * src, int alpha, unsigned int n)
++{
++    unsigned int i;
++    const uint8_t * s_data = src;
++    uint8_t * d_data = dest;
++
++    for (i = 0; i != n; ++i) {
++        const uint32_t s_pel = ((const uint32_t *)s_data)[i];
++        const uint32_t d_pel = ((const uint32_t *)d_data)[i];
++        const unsigned int a = div255(alpha * (s_pel >> 24));
++        ((uint32_t *)d_data)[i] = 0xff000000 |
++            (a_merge((d_pel >> 0)  & 0xff, (s_pel >> 16) & 0xff, a) << 0 ) |
++            (a_merge((d_pel >> 8)  & 0xff, (s_pel >> 8)  & 0xff, a) << 8 ) |
++            (a_merge((d_pel >> 16) & 0xff, (s_pel >> 0)  & 0xff, a) << 16);
++    }
++}
++
++#define BUF_SIZE   256
++#define BUF_SLACK  16
++#define BUF_ALIGN  64
++#define BUF_ALLOC  (BUF_SIZE + 2*BUF_SLACK + BUF_ALIGN)
++
++static void test_line(const uint32_t * const dx, const unsigned int d_off,
++                      const uint32_t * const sx, const unsigned int s_off,
++                      const unsigned int alpha, const unsigned int len, const int prof_no)
++{
++    uint32_t d0_buf[BUF_ALLOC];
++    uint32_t d1_buf[BUF_ALLOC];
++    const uint32_t * const s0 = sx + s_off;
++
++    uint32_t * const d0 =  (uint32_t *)(((uintptr_t)d0_buf + (BUF_ALIGN - 1)) & ~(BUF_ALIGN - 1)) + d_off;
++    uint32_t * const d1 = (uint32_t *)(((uintptr_t)d1_buf + (BUF_ALIGN - 1)) & ~(BUF_ALIGN - 1)) + d_off;
++    unsigned int i;
++
++    memcpy(d0, dx, (BUF_SIZE + BUF_SLACK*2)*4);
++    memcpy(d1, dx, (BUF_SIZE + BUF_SLACK*2)*4);
++
++    merge_line(d0 + BUF_SLACK, s0 + BUF_SLACK, alpha, len);
++
++    PROFILE_START();
++    blend_rgbx_rgba_neon(d1 + BUF_SLACK, s0 + BUF_SLACK, alpha, len);
++    PROFILE_ACC_N(prof_no);
++
++    for (i = 0; i != BUF_SIZE + BUF_SLACK*2; ++i) {
++        if (d0[i] != d1[i]) {
++            printf("%3d: %08x + %08x * %02x: %08x / %08x: len=%d\n", (int)(i - BUF_SLACK), dx[i], s0[i], alpha, d0[i], d1[i], len);
++        }
++    }
++}
++
++static void test_line2(const uint32_t * const dx, const unsigned int d_off,
++                      const uint32_t * const sx, const unsigned int s_off,
++                      const unsigned int alpha, const unsigned int len, const int prof_no)
++{
++    uint32_t d0_buf[BUF_ALLOC];
++    uint32_t d1_buf[BUF_ALLOC];
++    const uint32_t * const s0 = sx + s_off;
++
++    uint32_t * const d0 =  (uint32_t *)(((uintptr_t)d0_buf + (BUF_ALIGN - 1)) & ~(BUF_ALIGN - 1)) + d_off;
++    uint32_t * const d1 = (uint32_t *)(((uintptr_t)d1_buf + (BUF_ALIGN - 1)) & ~(BUF_ALIGN - 1)) + d_off;
++    unsigned int i;
++
++    memcpy(d0, dx, (BUF_SIZE + BUF_SLACK*2)*4);
++    memcpy(d1, dx, (BUF_SIZE + BUF_SLACK*2)*4);
++
++    merge_line2(d0 + BUF_SLACK, s0 + BUF_SLACK, alpha, len);
++
++    PROFILE_START();
++    blend_bgrx_rgba_neon(d1 + BUF_SLACK, s0 + BUF_SLACK, alpha, len);
++    PROFILE_ACC_N(prof_no);
++
++    for (i = 0; i != BUF_SIZE + BUF_SLACK*2; ++i) {
++        if (d0[i] != d1[i]) {
++            printf("%3d: %08x + %08x * %02x: %08x / %08x: len=%d\n", (int)(i - BUF_SLACK), dx[i], s0[i], alpha, d0[i], d1[i], len);
++        }
++    }
++}
++
++
++
++int main(int argc, char *argv[])
++{
++    unsigned int i, j;
++    uint32_t d0_buf[BUF_ALLOC];
++    uint32_t s0_buf[BUF_ALLOC];
++
++    uint32_t * const d0 = (uint32_t *)(((uintptr_t)d0_buf + 63) & ~63) + 0;
++    uint32_t * const s0 = (uint32_t *)(((uintptr_t)s0_buf + 63) & ~63) + 0;
++
++    PROFILE_INIT();
++
++    for (i = 0; i != 255*255; ++i) {
++        unsigned int a = div255(i);
++        unsigned int b = (i + 127)/255;
++        unsigned int c = (i + 126)/255;
++        if (a != b && a != c)
++            printf("%d/255: %d != %d/%d\n", i, a, b, c);
++    }
++
++    for (i = 0; i != BUF_ALLOC; ++i) {
++        d0_buf[i] = 0xff00 | i;
++        s0_buf[i] = (i << 24) | 0x40ffc0;
++    }
++
++    for (i = 0; i != 256; ++i) {
++        test_line(d0, 0, s0, 0, i, 256, -1);
++    }
++    for (i = 0; i != 256; ++i) {
++        test_line(d0, 0, s0, 0, 128, i, -1);
++    }
++
++    for (j = 0; j != 16; ++j) {
++        for (i = 0; i != 256; ++i) {
++            test_line(d0, j & 3, s0, j >> 2, i, 256, j);
++        }
++        PROFILE_PRINTF_N(j);
++        PROFILE_CLEAR_N(j);
++    }
++    printf("Done 1\n");
++
++    for (i = 0; i != 256; ++i) {
++        test_line2(d0, 0, s0, 0, i, 256, -1);
++    }
++    for (i = 0; i != 256; ++i) {
++        test_line2(d0, 0, s0, 0, 128, i, -1);
++    }
++
++    for (j = 0; j != 16; ++j) {
++        for (i = 0; i != 256; ++i) {
++            test_line2(d0, j & 3, s0, j >> 2, i, 256, j);
++        }
++        PROFILE_PRINTF_N(j);
++    }
++    printf("Done 2\n");
++
++    return 0;
++}
++
+--- a/modules/hw/mmal/codec.c
++++ b/modules/hw/mmal/codec.c
+@@ -26,267 +26,443 @@
+ #include "config.h"
+ #endif
+
++#include <stdatomic.h>
++
+ #include <vlc_common.h>
+-#include <vlc_atomic.h>
+ #include <vlc_plugin.h>
+ #include <vlc_codec.h>
++#include <vlc_filter.h>
+ #include <vlc_threads.h>
+
+-#include <bcm_host.h>
+ #include <interface/mmal/mmal.h>
+ #include <interface/mmal/util/mmal_util.h>
+ #include <interface/mmal/util/mmal_default_components.h>
+
++#include <interface/vcsm/user-vcsm.h>
++
++#include "mmal_cma.h"
+ #include "mmal_picture.h"
+
++#include "subpic.h"
++#include "blend_rgba_neon.h"
++
++#define TRACE_ALL 0
++
++#define OPT_TO_FROM_ZC 0
++
+ /*
+  * This seems to be a bit high, but reducing it causes instabilities
+  */
+ #define NUM_EXTRA_BUFFERS 5
++//#define NUM_EXTRA_BUFFERS 10
+ #define NUM_DECODER_BUFFER_HEADERS 30
+
+-#define MIN_NUM_BUFFERS_IN_TRANSIT 2
++#define CONVERTER_BUFFERS 4  // Buffers on the output of the converter
++
++#define MMAL_SLICE_HEIGHT 16
++#define MMAL_ALIGN_W      32
++#define MMAL_ALIGN_H      16
+
+ #define MMAL_OPAQUE_NAME "mmal-opaque"
+ #define MMAL_OPAQUE_TEXT N_("Decode frames directly into RPI VideoCore instead of host memory.")
+ #define MMAL_OPAQUE_LONGTEXT N_("Decode frames directly into RPI VideoCore instead of host memory. This option must only be used with the MMAL video output plugin.")
+
+-static int OpenDecoder(decoder_t *dec);
+-static void CloseDecoder(decoder_t *dec);
+-
+-vlc_module_begin()
+-    set_shortname(N_("MMAL decoder"))
+-    set_description(N_("MMAL-based decoder plugin for Raspberry Pi"))
+-    set_capability("video decoder", 90)
+-    add_shortcut("mmal_decoder")
+-    add_bool(MMAL_OPAQUE_NAME, true, MMAL_OPAQUE_TEXT, MMAL_OPAQUE_LONGTEXT, false)
+-    set_callbacks(OpenDecoder, CloseDecoder)
+-vlc_module_end()
++#define MMAL_RESIZE_NAME "mmal-resize"
++#define MMAL_RESIZE_TEXT N_("Use mmal resizer rather than hvs.")
++#define MMAL_RESIZE_LONGTEXT N_("Use mmal resizer rather than isp. This uses less gpu memory than the ISP but is slower.")
++
++#define MMAL_ISP_NAME "mmal-isp"
++#define MMAL_ISP_TEXT N_("Use mmal isp rather than hvs.")
++#define MMAL_ISP_LONGTEXT N_("Use mmal isp rather than hvs. This may be faster but has no blend.")
+
+-struct decoder_sys_t {
+-    bool opaque;
++typedef struct decoder_sys_t
++{
+     MMAL_COMPONENT_T *component;
+     MMAL_PORT_T *input;
+     MMAL_POOL_T *input_pool;
+     MMAL_PORT_T *output;
+-    MMAL_POOL_T *output_pool; /* only used for non-opaque mode */
++    hw_mmal_port_pool_ref_t *ppr;
+     MMAL_ES_FORMAT_T *output_format;
+-    vlc_sem_t sem;
+
++    MMAL_STATUS_T err_stream;
+     bool b_top_field_first;
+     bool b_progressive;
+
++    bool b_flushed;
++
++    vcsm_init_type_t vcsm_init_type;
++
++    // Lock to avoid pic update & allocate happenening simultainiously
++    // * We should be able to arrange life s.t. this isn't needed
++    //   but while we are confused apply belt & braces
++    vlc_mutex_t pic_lock;
++
+     /* statistics */
+-    int output_in_transit;
+-    int input_in_transit;
+     atomic_bool started;
+-};
++} decoder_sys_t;
+
+-/* Utilities */
+-static int change_output_format(decoder_t *dec);
+-static int send_output_buffer(decoder_t *dec);
+-static void fill_output_port(decoder_t *dec);
+-
+-/* VLC decoder callback */
+-static int decode(decoder_t *dec, block_t *block);
+-static void flush_decoder(decoder_t *dec);
+-
+-/* MMAL callbacks */
+-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
+-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
+-static void output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
+
+-static int OpenDecoder(decoder_t *dec)
+-{
+-    int ret = VLC_SUCCESS;
+-    decoder_sys_t *sys;
+-    MMAL_PARAMETER_UINT32_T extra_buffers;
+-    MMAL_STATUS_T status;
++typedef struct supported_mmal_enc_s {
++    struct {
++       MMAL_PARAMETER_HEADER_T header;
++       MMAL_FOURCC_T encodings[64];
++    } supported;
++    int n;
++} supported_mmal_enc_t;
++
++#define SUPPORTED_MMAL_ENC_INIT \
++{ \
++    {{MMAL_PARAMETER_SUPPORTED_ENCODINGS, sizeof(((supported_mmal_enc_t *)0)->supported)}, {0}}, \
++    -1 \
++}
+
+-    if (dec->fmt_in.i_codec != VLC_CODEC_MPGV &&
+-            dec->fmt_in.i_codec != VLC_CODEC_H264)
+-        return VLC_EGENERIC;
++static supported_mmal_enc_t supported_decode_in_enc = SUPPORTED_MMAL_ENC_INIT;
+
+-    sys = calloc(1, sizeof(decoder_sys_t));
+-    if (!sys) {
+-        ret = VLC_ENOMEM;
+-        goto out;
++static bool is_enc_supported(supported_mmal_enc_t * const support, const MMAL_FOURCC_T fcc)
++{
++    int i;
++
++    if (fcc == 0)
++        return false;
++    if (support->n == -1)
++        return true;  // Unknown - say OK
++    for (i = 0; i < support->n; ++i) {
++        if (support->supported.encodings[i] == fcc)
++            return true;
+     }
+-    dec->p_sys = sys;
++    return false;
++}
+
+-    sys->opaque = var_InheritBool(dec, MMAL_OPAQUE_NAME);
+-    bcm_host_init();
++static bool set_and_test_enc_supported(supported_mmal_enc_t * const support, MMAL_PORT_T * port, const MMAL_FOURCC_T fcc)
++{
++    if (support->n >= 0)
++        /* already done */;
++    else if (mmal_port_parameter_get(port, (MMAL_PARAMETER_HEADER_T *)&support->supported) != MMAL_SUCCESS)
++        support->n = 0;
++    else
++        support->n = (support->supported.header.size - sizeof(support->supported.header)) /
++          sizeof(support->supported.encodings[0]);
+
+-    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, &sys->component);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(dec, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
+-                MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
+-    }
++    return is_enc_supported(support, fcc);
++}
+
+-    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
+-    status = mmal_port_enable(sys->component->control, control_port_cb);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(dec, "Failed to enable control port %s (status=%"PRIx32" %s)",
+-                sys->component->control->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++static MMAL_FOURCC_T vlc_to_mmal_es_fourcc(const unsigned int fcc)
++{
++    switch (fcc){
++    case VLC_CODEC_MJPG:
++        return MMAL_ENCODING_MJPEG;
++    case VLC_CODEC_MP1V:
++        return MMAL_ENCODING_MP1V;
++    case VLC_CODEC_MPGV:
++    case VLC_CODEC_MP2V:
++        return MMAL_ENCODING_MP2V;
++    case VLC_CODEC_H263:
++        return MMAL_ENCODING_H263;
++    case VLC_CODEC_MP4V:
++        return MMAL_ENCODING_MP4V;
++    case VLC_CODEC_H264:
++        return MMAL_ENCODING_H264;
++    case VLC_CODEC_VP6:
++        return MMAL_ENCODING_VP6;
++    case VLC_CODEC_VP8:
++        return MMAL_ENCODING_VP8;
++    case VLC_CODEC_WMV1:
++        return MMAL_ENCODING_WMV1;
++    case VLC_CODEC_WMV2:
++        return MMAL_ENCODING_WMV2;
++    case VLC_CODEC_WMV3:
++        return MMAL_ENCODING_WMV3;
++    case VLC_CODEC_VC1:
++        return MMAL_ENCODING_WVC1;
++    case VLC_CODEC_THEORA:
++        return MMAL_ENCODING_THEORA;
++    default:
++        break;
+     }
++    return 0;
++}
+
+-    sys->input = sys->component->input[0];
+-    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
+-    if (dec->fmt_in.i_codec == VLC_CODEC_MPGV)
+-        sys->input->format->encoding = MMAL_ENCODING_MP2V;
+-    else
+-        sys->input->format->encoding = MMAL_ENCODING_H264;
++static MMAL_FOURCC_T pic_to_slice_mmal_fourcc(const MMAL_FOURCC_T fcc)
++{
++    switch (fcc){
++    case MMAL_ENCODING_I420:
++        return MMAL_ENCODING_I420_SLICE;
++    case MMAL_ENCODING_I422:
++        return MMAL_ENCODING_I422_SLICE;
++    case MMAL_ENCODING_ARGB:
++        return MMAL_ENCODING_ARGB_SLICE;
++    case MMAL_ENCODING_RGBA:
++        return MMAL_ENCODING_RGBA_SLICE;
++    case MMAL_ENCODING_ABGR:
++        return MMAL_ENCODING_ABGR_SLICE;
++    case MMAL_ENCODING_BGRA:
++        return MMAL_ENCODING_BGRA_SLICE;
++    case MMAL_ENCODING_RGB16:
++        return MMAL_ENCODING_RGB16_SLICE;
++    case MMAL_ENCODING_RGB24:
++        return MMAL_ENCODING_RGB24_SLICE;
++    case MMAL_ENCODING_RGB32:
++        return MMAL_ENCODING_RGB32_SLICE;
++    case MMAL_ENCODING_BGR16:
++        return MMAL_ENCODING_BGR16_SLICE;
++    case MMAL_ENCODING_BGR24:
++        return MMAL_ENCODING_BGR24_SLICE;
++    case MMAL_ENCODING_BGR32:
++        return MMAL_ENCODING_BGR32_SLICE;
++    default:
++        break;
++    }
++    return 0;
++}
+
+-    if (dec->fmt_in.i_codec == VLC_CODEC_H264) {
+-        if (dec->fmt_in.i_extra > 0) {
+-            status = mmal_format_extradata_alloc(sys->input->format,
+-                    dec->fmt_in.i_extra);
+-            if (status == MMAL_SUCCESS) {
+-                memcpy(sys->input->format->extradata, dec->fmt_in.p_extra,
+-                        dec->fmt_in.i_extra);
+-                sys->input->format->extradata_size = dec->fmt_in.i_extra;
+-            } else {
+-                msg_Err(dec, "Failed to allocate extra format data on input port %s (status=%"PRIx32" %s)",
+-                        sys->input->name, status, mmal_status_to_string(status));
+-            }
++#define DEBUG_SQUARES 0
++#if DEBUG_SQUARES
++static void draw_square(void * pic_buf, size_t pic_stride, unsigned int x, unsigned int y, unsigned int w, unsigned int h, uint32_t val)
++{
++    uint32_t * p = (uint32_t *)pic_buf + y * pic_stride + x;
++    unsigned int i;
++    for (i = 0; i != h; ++i) {
++        unsigned int j;
++        for (j = 0; j != w; ++j) {
++            p[j] = val;
+         }
++        p += pic_stride;
+     }
++}
++#endif
+
+-    status = mmal_port_format_commit(sys->input);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(dec, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
+-                sys->input->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++#if 0
++static inline void draw_line(void * pic_buf, size_t pic_stride, unsigned int x, unsigned int y, unsigned int len, int inc)
++{
++    uint32_t * p = (uint32_t *)pic_buf + y * pic_stride + x;
++    while (len-- != 0) {
++        *p = ~0U;
++        p += inc;
+     }
+-    sys->input->buffer_size = sys->input->buffer_size_recommended;
+-    sys->input->buffer_num = sys->input->buffer_num_recommended;
++}
+
+-    status = mmal_port_enable(sys->input, input_port_cb);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(dec, "Failed to enable input port %s (status=%"PRIx32" %s)",
+-                sys->input->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
+-    }
+
+-    sys->output = sys->component->output[0];
+-    sys->output->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
++static void draw_corners(void * pic_buf, size_t pic_stride, unsigned int x, unsigned int y, unsigned int w, unsigned int h)
++{
++    const unsigned int len = 20;
++    draw_line(pic_buf, pic_stride, x, y, len, 1);
++    draw_line(pic_buf, pic_stride, x, y, len, pic_stride);
++    draw_line(pic_buf, pic_stride, x + w - 1, y, len, -1);
++    draw_line(pic_buf, pic_stride, x + w - 1, y, len, pic_stride);
++    draw_line(pic_buf, pic_stride, x + w - 1, y + h - 1, len, -1);
++    draw_line(pic_buf, pic_stride, x + w - 1, y + h - 1, len, -(int)pic_stride);
++    draw_line(pic_buf, pic_stride, x, y + h - 1, len, 1);
++    draw_line(pic_buf, pic_stride, x, y + h - 1, len, -(int)pic_stride);
++}
++#endif
+
+-    if (sys->opaque) {
+-        extra_buffers.hdr.id = MMAL_PARAMETER_EXTRA_BUFFERS;
+-        extra_buffers.hdr.size = sizeof(MMAL_PARAMETER_UINT32_T);
+-        extra_buffers.value = NUM_EXTRA_BUFFERS;
+-        status = mmal_port_parameter_set(sys->output, &extra_buffers.hdr);
+-        if (status != MMAL_SUCCESS) {
+-            msg_Err(dec, "Failed to set MMAL_PARAMETER_EXTRA_BUFFERS on output port (status=%"PRIx32" %s)",
+-                    status, mmal_status_to_string(status));
+-            ret = VLC_EGENERIC;
+-            goto out;
+-        }
++static MMAL_RATIONAL_T
++rationalize_sar(unsigned int num, unsigned int den)
++{
++    static const unsigned int primes[] = {2, 3, 5, 7, 11, 13, 17, 19, 23, 0};
++    const unsigned int * p = primes;
+
+-        msg_Dbg(dec, "Activate zero-copy for output port");
+-        MMAL_PARAMETER_BOOLEAN_T zero_copy = {
+-            { MMAL_PARAMETER_ZERO_COPY, sizeof(MMAL_PARAMETER_BOOLEAN_T) },
+-            1
+-        };
++    // If either num or den is 0 then return a well formed "unknown"
++    if (num == 0 || den == 0) {
++        return (MMAL_RATIONAL_T){.num = 0, .den = 0};
++    }
+
+-        status = mmal_port_parameter_set(sys->output, &zero_copy.hdr);
+-        if (status != MMAL_SUCCESS) {
+-           msg_Err(dec, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
+-                    sys->output->name, status, mmal_status_to_string(status));
+-           goto out;
++    while (*p != 0 && num >= *p && den >= *p) {
++        if (num % *p != 0 || den % *p != 0)
++            ++p;
++        else {
++            num /= *p;
++            den /= *p;
+         }
+     }
++    return (MMAL_RATIONAL_T){.num = num, .den = den};
++}
+
+-    status = mmal_port_enable(sys->output, output_port_cb);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(dec, "Failed to enable output port %s (status=%"PRIx32" %s)",
+-                sys->output->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
+-    }
++// Buffer either attached to pic or released
++static picture_t * alloc_opaque_pic(decoder_t * const dec, MMAL_BUFFER_HEADER_T * const buf)
++{
++    decoder_sys_t *const dec_sys = dec->p_sys;
+
+-    status = mmal_component_enable(sys->component);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(dec, "Failed to enable component %s (status=%"PRIx32" %s)",
+-                sys->component->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++    vlc_mutex_lock(&dec_sys->pic_lock);
++    picture_t * const pic = decoder_NewPicture(dec);
++    vlc_mutex_unlock(&dec_sys->pic_lock);
++
++    if (pic == NULL)
++        goto fail1;
++
++    if (buf->length == 0) {
++        msg_Err(dec, "%s: Empty buffer", __func__);
++        goto fail2;
+     }
+
+-    sys->input_pool = mmal_pool_create(sys->input->buffer_num, 0);
++    if ((pic->context = hw_mmal_gen_context(buf, dec_sys->ppr)) == NULL)
++        goto fail2;
+
+-    if (sys->opaque) {
+-        dec->fmt_out.i_codec = VLC_CODEC_MMAL_OPAQUE;
+-        dec->fmt_out.video.i_chroma = VLC_CODEC_MMAL_OPAQUE;
+-    } else {
+-        dec->fmt_out.i_codec = VLC_CODEC_I420;
+-        dec->fmt_out.video.i_chroma = VLC_CODEC_I420;
++    buf_to_pic_copy_props(pic, buf);
++
++#if TRACE_ALL
++    msg_Dbg(dec, "pic: prog=%d, tff=%d, date=%lld", pic->b_progressive, pic->b_top_field_first, (long long)pic->date);
++#endif
++
++    return pic;
++
++fail2:
++    picture_Release(pic);
++fail1:
++    // Recycle rather than release to avoid buffer starvation if NewPic fails
++    hw_mmal_port_pool_ref_recycle(dec_sys->ppr, buf);
++    return NULL;
++}
++
++static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++{
++    decoder_t *dec = (decoder_t *)port->userdata;
++    MMAL_STATUS_T status;
++
++#if TRACE_ALL
++    msg_Dbg(dec, "<<< %s: cmd=%d, data=%p", __func__, buffer->cmd, buffer->data);
++#endif
++
++    if (buffer->cmd == MMAL_EVENT_ERROR) {
++        status = *(uint32_t *)buffer->data;
++        dec->p_sys->err_stream = status;
++        msg_Err(dec, "MMAL error %"PRIx32" \"%s\"", status,
++                mmal_status_to_string(status));
+     }
+
+-    dec->pf_decode = decode;
+-    dec->pf_flush  = flush_decoder;
++    mmal_buffer_header_release(buffer);
++}
+
+-    vlc_sem_init(&sys->sem, 0);
++static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++{
++    block_t * const block = (block_t *)buffer->user_data;
+
+-out:
+-    if (ret != VLC_SUCCESS)
+-        CloseDecoder(dec);
++    (void)port;  // Unused
+
+-    return ret;
++#if TRACE_ALL
++    msg_Dbg((decoder_t *)port->userdata, "<<< %s: cmd=%d, data=%p, len=%d/%d, pts=%lld", __func__,
++            buffer->cmd, buffer->data, buffer->length, buffer->alloc_size, (long long)buffer->pts);
++#endif
++
++    mmal_buffer_header_reset(buffer);
++    mmal_buffer_header_release(buffer);
++
++    if (block != NULL)
++        block_Release(block);
+ }
+
+-static void CloseDecoder(decoder_t *dec)
++static void decoder_output_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+ {
+-    decoder_sys_t *sys = dec->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer;
++    decoder_t * const dec = (decoder_t *)port->userdata;
+
+-    if (!sys)
++    if (buffer->cmd == 0 && buffer->length != 0)
++    {
++#if TRACE_ALL
++        msg_Dbg(dec, "<<< %s: cmd=%d, data=%p, len=%d/%d, pts=%lld", __func__,
++                buffer->cmd, buffer->data, buffer->length, buffer->alloc_size, (long long)buffer->pts);
++#endif
++
++        picture_t *pic = alloc_opaque_pic(dec, buffer);
++#if TRACE_ALL
++        msg_Dbg(dec, "flags=%#x, video flags=%#x", buffer->flags, buffer->type->video.flags);
++#endif
++        if (pic == NULL)
++            msg_Err(dec, "Failed to allocate new picture");
++        else
++            decoder_QueueVideo(dec, pic);
++        // Buffer released or attached to pic - do not release again
+         return;
++    }
+
+-    if (sys->component && sys->component->control->is_enabled)
+-        mmal_port_disable(sys->component->control);
++    if (buffer->cmd == MMAL_EVENT_FORMAT_CHANGED)
++    {
++        decoder_sys_t * const sys = dec->p_sys;
++        MMAL_EVENT_FORMAT_CHANGED_T * const fmt = mmal_event_format_changed_get(buffer);
++        MMAL_ES_FORMAT_T * const format = mmal_format_alloc();
+
+-    if (sys->input && sys->input->is_enabled)
+-        mmal_port_disable(sys->input);
++        if (format == NULL)
++            msg_Err(dec, "Failed to allocate new format");
++        else
++        {
++            mmal_format_full_copy(format, fmt->format);
++            format->encoding = MMAL_ENCODING_OPAQUE;
+
+-    if (sys->output && sys->output->is_enabled)
+-        mmal_port_disable(sys->output);
++            // If no PAR in the stream - see if we've got one from the demux
++            if (format->es->video.par.den <= 0 || format->es->video.par.num <= 0) {
++                unsigned int n = dec->fmt_in.video.i_sar_num;
++                unsigned int d = dec->fmt_in.video.i_sar_den;
++
++                if (n == 0 || d == 0) {
++                    // Guesswork required
++                    const unsigned int w = format->es->video.width;
++                    const unsigned int h = format->es->video.height;
++                    if ((w == 704 || w == 720) && (h == 480 || h == 576)) {
++                        // Very likely SD 4:3
++                        n = w * 3;
++                        d = h * 4;
++                    }
++                    else
++                    {
++                        // Otherwise guess SAR 1:1
++                        n = 1;
++                        d = 1;
++                    }
++                }
+
+-    if (sys->component && sys->component->is_enabled)
+-        mmal_component_disable(sys->component);
++                format->es->video.par = rationalize_sar(n, d);
++            }
+
+-    if (sys->input_pool)
+-        mmal_pool_destroy(sys->input_pool);
++            if (sys->output_format != NULL)
++                mmal_format_free(sys->output_format);
+
+-    if (sys->output_format)
+-        mmal_format_free(sys->output_format);
++            sys->output_format = format;
++        }
++    }
++    else if (buffer->cmd != 0) {
++        char buf0[5];
++        msg_Warn(dec, "Unexpected output cb event: %s", str_fourcc(buf0, buffer->cmd));
++    }
+
+-    if (sys->output_pool)
+-        mmal_pool_destroy(sys->output_pool);
++    // If we get here then we were flushing (cmd == 0 && len == 0) or
++    // that was an EVENT - in either case we want to release the buffer
++    // back to its pool rather than recycle it.
++    mmal_buffer_header_reset(buffer);
++    buffer->user_data = NULL;
++    mmal_buffer_header_release(buffer);
++}
+
+-    if (sys->component)
+-        mmal_component_release(sys->component);
+
+-    vlc_sem_destroy(&sys->sem);
+-    free(sys);
+
+-    bcm_host_deinit();
++static void fill_output_port(decoder_t *dec)
++{
++    decoder_sys_t *sys = dec->p_sys;
++
++    if (decoder_UpdateVideoFormat(dec) != 0)
++    {
++        // If we have a new format don't bother stuffing the buffer
++        // We should get a reset RSN
++#if TRACE_ALL
++        msg_Dbg(dec, "%s: Updated", __func__);
++#endif
++
++        return;
++    }
++
++    hw_mmal_port_pool_ref_fill(sys->ppr);
++    return;
+ }
+
+ static int change_output_format(decoder_t *dec)
+ {
+     MMAL_PARAMETER_VIDEO_INTERLACE_TYPE_T interlace_type;
+-    decoder_sys_t *sys = dec->p_sys;
++    decoder_sys_t * const sys = dec->p_sys;
+     MMAL_STATUS_T status;
+-    int pool_size;
+     int ret = 0;
+
++#if TRACE_ALL
++    msg_Dbg(dec, "%s: <<<", __func__);
++#endif
++
+     if (atomic_load(&sys->started)) {
+         mmal_format_full_copy(sys->output->format, sys->output_format);
+         status = mmal_port_format_commit(sys->output);
+@@ -300,7 +476,9 @@ static int change_output_format(decoder_
+     }
+
+ port_reset:
++#if TRACE_ALL
+     msg_Dbg(dec, "%s: Do full port reset", __func__);
++#endif
+     status = mmal_port_disable(sys->output);
+     if (status != MMAL_SUCCESS) {
+         msg_Err(dec, "Failed to disable output port (status=%"PRIx32" %s)",
+@@ -310,6 +488,7 @@ port_reset:
+     }
+
+     mmal_format_full_copy(sys->output->format, sys->output_format);
++
+     status = mmal_port_format_commit(sys->output);
+     if (status != MMAL_SUCCESS) {
+         msg_Err(dec, "Failed to commit output format (status=%"PRIx32" %s)",
+@@ -318,18 +497,10 @@ port_reset:
+         goto out;
+     }
+
+-    if (sys->opaque) {
+-        sys->output->buffer_num = NUM_DECODER_BUFFER_HEADERS;
+-        pool_size = NUM_DECODER_BUFFER_HEADERS;
+-    } else {
+-        sys->output->buffer_num = __MAX(sys->output->buffer_num_recommended,
+-                MIN_NUM_BUFFERS_IN_TRANSIT);
+-        pool_size = sys->output->buffer_num;
+-    }
+-
++    sys->output->buffer_num = NUM_DECODER_BUFFER_HEADERS;
+     sys->output->buffer_size = sys->output->buffer_size_recommended;
+
+-    status = mmal_port_enable(sys->output, output_port_cb);
++    status = mmal_port_enable(sys->output, decoder_output_cb);
+     if (status != MMAL_SUCCESS) {
+         msg_Err(dec, "Failed to enable output port (status=%"PRIx32" %s)",
+                 status, mmal_status_to_string(status));
+@@ -338,25 +509,14 @@ port_reset:
+     }
+
+     if (!atomic_load(&sys->started)) {
+-        if (!sys->opaque) {
+-            sys->output_pool = mmal_port_pool_create(sys->output, pool_size, 0);
+-            msg_Dbg(dec, "Created output pool with %d pictures", sys->output_pool->headers_num);
+-        }
+-
+         atomic_store(&sys->started, true);
+
+         /* we need one picture from vout for each buffer header on the output
+          * port */
+-        dec->i_extra_picture_buffers = pool_size;
+-
+-        /* remove what VLC core reserves as it is part of the pool_size
+-         * already */
+-        if (dec->fmt_in.i_codec == VLC_CODEC_H264)
+-            dec->i_extra_picture_buffers -= 19;
+-        else
+-            dec->i_extra_picture_buffers -= 3;
+-
++        dec->i_extra_picture_buffers = 10;
++#if TRACE_ALL
+         msg_Dbg(dec, "Request %d extra pictures", dec->i_extra_picture_buffers);
++#endif
+     }
+
+ apply_fmt:
+@@ -366,8 +526,8 @@ apply_fmt:
+     dec->fmt_out.video.i_y_offset = sys->output->format->es->video.crop.y;
+     dec->fmt_out.video.i_visible_width = sys->output->format->es->video.crop.width;
+     dec->fmt_out.video.i_visible_height = sys->output->format->es->video.crop.height;
+-    dec->fmt_out.video.i_sar_num = sys->output->format->es->video.par.num;
+-    dec->fmt_out.video.i_sar_den = sys->output->format->es->video.par.den;
++    dec->fmt_out.video.i_sar_num = sys->output_format->es->video.par.num;  // SAR can be killed by commit
++    dec->fmt_out.video.i_sar_den = sys->output_format->es->video.par.den;
+     dec->fmt_out.video.i_frame_rate = sys->output->format->es->video.frame_rate.num;
+     dec->fmt_out.video.i_frame_rate_base = sys->output->format->es->video.frame_rate.den;
+
+@@ -382,12 +542,19 @@ apply_fmt:
+         sys->b_progressive = (interlace_type.eMode == MMAL_InterlaceProgressive);
+         sys->b_top_field_first = sys->b_progressive ? true :
+             (interlace_type.eMode == MMAL_InterlaceFieldsInterleavedUpperFirst);
++#if TRACE_ALL
+         msg_Dbg(dec, "Detected %s%s video (%d)",
+                 sys->b_progressive ? "progressive" : "interlaced",
+                 sys->b_progressive ? "" : (sys->b_top_field_first ? " tff" : " bff"),
+                 interlace_type.eMode);
++#endif
+     }
+
++    // Tell the rest of the world we have changed format
++    vlc_mutex_lock(&sys->pic_lock);
++    ret = decoder_UpdateVideoFormat(dec);
++    vlc_mutex_unlock(&sys->pic_lock);
++
+ out:
+     mmal_format_free(sys->output_format);
+     sys->output_format = NULL;
+@@ -395,144 +562,85 @@ out:
+     return ret;
+ }
+
+-static int send_output_buffer(decoder_t *dec)
++static MMAL_STATUS_T
++set_extradata_and_commit(decoder_t * const dec, decoder_sys_t * const sys)
+ {
+-    decoder_sys_t *sys = dec->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer;
+-    picture_sys_t *p_sys;
+-    picture_t *picture = NULL;
+     MMAL_STATUS_T status;
+-    unsigned buffer_size = 0;
+-    int ret = 0;
+
+-    if (!sys->output->is_enabled)
+-        return VLC_EGENERIC;
+-
+-    /* If local output pool is allocated, use it - this is only the case for
+-     * non-opaque modes */
+-    if (sys->output_pool) {
+-        buffer = mmal_queue_get(sys->output_pool->queue);
+-        if (!buffer) {
+-            msg_Warn(dec, "Failed to get new buffer");
+-            return VLC_EGENERIC;
+-        }
+-    }
+-
+-    if (!decoder_UpdateVideoFormat(dec))
+-        picture = decoder_NewPicture(dec);
+-    if (!picture) {
+-        msg_Warn(dec, "Failed to get new picture");
+-        ret = -1;
+-        goto err;
+-    }
+-
+-    p_sys = picture->p_sys;
+-    for (int i = 0; i < picture->i_planes; i++)
+-        buffer_size += picture->p[i].i_lines * picture->p[i].i_pitch;
+-
+-    if (sys->output_pool) {
+-        mmal_buffer_header_reset(buffer);
+-        buffer->alloc_size = sys->output->buffer_size;
+-        if (buffer_size < sys->output->buffer_size) {
+-            msg_Err(dec, "Retrieved picture with too small data block (%d < %d)",
+-                    buffer_size, sys->output->buffer_size);
+-            ret = VLC_EGENERIC;
+-            goto err;
+-        }
+-
+-        if (!sys->opaque)
+-            buffer->data = picture->p[0].p_pixels;
+-    } else {
+-        buffer = p_sys->buffer;
+-        if (!buffer) {
+-            msg_Warn(dec, "Picture has no buffer attached");
+-            picture_Release(picture);
+-            return VLC_EGENERIC;
+-        }
+-        buffer->data = p_sys->buffer->data;
+-    }
+-    buffer->user_data = picture;
+-    buffer->cmd = 0;
+-
+-    status = mmal_port_send_buffer(sys->output, buffer);
++    status = mmal_port_format_commit(sys->input);
+     if (status != MMAL_SUCCESS) {
+-        msg_Err(dec, "Failed to send buffer to output port (status=%"PRIx32" %s)",
+-                status, mmal_status_to_string(status));
+-        ret = -1;
+-        goto err;
+-    }
+-    atomic_fetch_add(&sys->output_in_transit, 1);
+-
+-    return ret;
+-
+-err:
+-    if (picture)
+-        picture_Release(picture);
+-    if (sys->output_pool && buffer) {
+-        buffer->data = NULL;
+-        mmal_buffer_header_release(buffer);
++        msg_Err(dec, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
++                sys->input->name, status, mmal_status_to_string(status));
+     }
+-    return ret;
++    return status;
+ }
+
+-static void fill_output_port(decoder_t *dec)
++static MMAL_STATUS_T decoder_send_extradata(decoder_t * const dec, decoder_sys_t *const sys)
+ {
+-    decoder_sys_t *sys = dec->p_sys;
+-
+-    unsigned max_buffers_in_transit = 0;
+-    int buffers_available = 0;
+-    int buffers_to_send = 0;
+-    int i;
++    if (dec->fmt_in.i_codec == VLC_CODEC_H264 &&
++        dec->fmt_in.i_extra > 0)
++    {
++        MMAL_BUFFER_HEADER_T * const buf = mmal_queue_wait(sys->input_pool->queue);
++        MMAL_STATUS_T status;
++
++        mmal_buffer_header_reset(buf);
++        buf->cmd = 0;
++        buf->user_data = NULL;
++        buf->alloc_size = sys->input->buffer_size;
++        buf->length = dec->fmt_in.i_extra;
++        buf->data = dec->fmt_in.p_extra;
++        buf->flags = MMAL_BUFFER_HEADER_FLAG_CONFIG;
+
+-    if (sys->output_pool) {
+-        max_buffers_in_transit = __MAX(sys->output_pool->headers_num,
+-                MIN_NUM_BUFFERS_IN_TRANSIT);
+-        buffers_available = mmal_queue_length(sys->output_pool->queue);
+-    } else {
+-        max_buffers_in_transit = NUM_DECODER_BUFFER_HEADERS;
+-        buffers_available = NUM_DECODER_BUFFER_HEADERS - atomic_load(&sys->output_in_transit);
++        status = mmal_port_send_buffer(sys->input, buf);
++        if (status != MMAL_SUCCESS) {
++            msg_Err(dec, "Failed to send extradata buffer to input port (status=%"PRIx32" %s)",
++                    status, mmal_status_to_string(status));
++            return status;
++        }
+     }
+-    buffers_to_send = max_buffers_in_transit - atomic_load(&sys->output_in_transit);
+
+-    if (buffers_to_send > buffers_available)
+-        buffers_to_send = buffers_available;
+-
+-#ifndef NDEBUG
+-    msg_Dbg(dec, "Send %d buffers to output port (available: %d, "
+-                    "in_transit: %d, buffer_num: %d)",
+-                    buffers_to_send, buffers_available,
+-                    atomic_load(&sys->output_in_transit),
+-                    sys->output->buffer_num);
+-#endif
+-    for (i = 0; i < buffers_to_send; ++i)
+-        if (send_output_buffer(dec) < 0)
+-            break;
++    return MMAL_SUCCESS;
+ }
+
+ static void flush_decoder(decoder_t *dec)
+ {
+-    decoder_sys_t *sys = dec->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer;
+-    MMAL_STATUS_T status;
++    decoder_sys_t *const sys = dec->p_sys;
+
+-    msg_Dbg(dec, "Flushing decoder ports...");
+-    mmal_port_flush(sys->output);
+-    mmal_port_flush(sys->input);
+-
+-    while (atomic_load(&sys->output_in_transit) ||
+-           atomic_load(&sys->input_in_transit))
+-        vlc_sem_wait(&sys->sem);
++#if TRACE_ALL
++    msg_Dbg(dec, "%s: <<<", __func__);
++#endif
++
++    if (!sys->b_flushed) {
++        mmal_port_disable(sys->input);
++        mmal_port_disable(sys->output);
++        // We can leave the input disabled, but we want the output enabled
++        // in order to sink any buffers returning from other modules
++        mmal_port_enable(sys->output, decoder_output_cb);
++        sys->b_flushed = true;
++    }
++#if TRACE_ALL
++    msg_Dbg(dec, "%s: >>>", __func__);
++#endif
+ }
+
+ static int decode(decoder_t *dec, block_t *block)
+ {
+     decoder_sys_t *sys = dec->p_sys;
+     MMAL_BUFFER_HEADER_T *buffer;
+-    bool need_flush = false;
+     uint32_t len;
+-    uint32_t flags = 0;
++    uint32_t flags = MMAL_BUFFER_HEADER_FLAG_FRAME_START;
+     MMAL_STATUS_T status;
+
++#if TRACE_ALL
++    msg_Dbg(dec, "<<< %s: %lld/%lld", __func__, block == NULL ? -1LL : block->i_dts, block == NULL ? -1LL : block->i_pts);
++#endif
++
++    if (sys->err_stream != MMAL_SUCCESS) {
++        msg_Err(dec, "MMAL error reported by ctrl");
++        flush_decoder(dec);
++        return VLCDEC_ECRITICAL;  /// I think they are all fatal
++    }
++
+     /*
+      * Configure output port if necessary
+      */
+@@ -541,18 +649,50 @@ static int decode(decoder_t *dec, block_
+             msg_Err(dec, "Failed to change output port format");
+     }
+
+-    if (!block)
+-        goto out;
++    if (block == NULL)
++        return VLCDEC_SUCCESS;
+
+     /*
+      * Check whether full flush is required
+      */
+-    if (block && block->i_flags & BLOCK_FLAG_DISCONTINUITY) {
++    if (block->i_flags & BLOCK_FLAG_DISCONTINUITY) {
++#if TRACE_ALL
++        msg_Dbg(dec, "%s: >>> Discontinuity", __func__);
++#endif
+         flush_decoder(dec);
++    }
++
++    if (block->i_buffer == 0)
++    {
+         block_Release(block);
+         return VLCDEC_SUCCESS;
+     }
+
++    // Reenable stuff if the last thing we did was flush
++    if (!sys->output->is_enabled &&
++        (status = mmal_port_enable(sys->output, decoder_output_cb)) != MMAL_SUCCESS)
++    {
++        msg_Err(dec, "Output port enable failed");
++        goto fail;
++    }
++
++    if (!sys->input->is_enabled)
++    {
++        if ((status = set_extradata_and_commit(dec, sys)) != MMAL_SUCCESS)
++            goto fail;
++
++        if ((status = mmal_port_enable(sys->input, input_port_cb)) != MMAL_SUCCESS)
++        {
++            msg_Err(dec, "Input port enable failed");
++            goto fail;
++        }
++
++        if ((status = decoder_send_extradata(dec, sys)) != MMAL_SUCCESS)
++            goto fail;
++    }
++
++    // *** We cannot get a picture to put the result in 'till we have
++    // reported the size & the output stages have been set up
+     if (atomic_load(&sys->started))
+         fill_output_port(dec);
+
+@@ -563,18 +703,21 @@ static int decode(decoder_t *dec, block_
+     if (block->i_flags & BLOCK_FLAG_CORRUPTED)
+         flags |= MMAL_BUFFER_HEADER_FLAG_CORRUPTED;
+
+-    while (block && block->i_buffer > 0) {
+-        buffer = mmal_queue_timedwait(sys->input_pool->queue, 100);
++    while (block != NULL)
++    {
++        buffer = mmal_queue_wait(sys->input_pool->queue);
+         if (!buffer) {
+             msg_Err(dec, "Failed to retrieve buffer header for input data");
+-            need_flush = true;
+-            break;
++            goto fail;
+         }
++
+         mmal_buffer_header_reset(buffer);
+         buffer->cmd = 0;
+-        buffer->pts = block->i_pts != 0 ? block->i_pts : block->i_dts;
++        buffer->pts = block->i_pts != VLC_TICK_INVALID ? block->i_pts :
++            block->i_dts != VLC_TICK_INVALID ? block->i_dts : MMAL_TIME_UNKNOWN;
+         buffer->dts = block->i_dts;
+         buffer->alloc_size = sys->input->buffer_size;
++        buffer->user_data = NULL;
+
+         len = block->i_buffer;
+         if (len > buffer->alloc_size)
+@@ -585,94 +728,1808 @@ static int decode(decoder_t *dec, block_
+         block->i_buffer -= len;
+         buffer->length = len;
+         if (block->i_buffer == 0) {
++            flags |= MMAL_BUFFER_HEADER_FLAG_FRAME_END;
++            if (block->i_flags & BLOCK_FLAG_END_OF_SEQUENCE) {
++                msg_Dbg(dec, "EOS sent");
++                flags |= MMAL_BUFFER_HEADER_FLAG_EOS;
++            }
+             buffer->user_data = block;
+             block = NULL;
+         }
+         buffer->flags = flags;
+
++#if TRACE_ALL
++        msg_Dbg(dec, "%s: -- Send buffer: cmd=%d, data=%p, size=%d, len=%d, offset=%d, flags=%#x, pts=%lld, dts=%lld", __func__,\
++                buffer->cmd, buffer->data, buffer->alloc_size, buffer->length, buffer->offset,
++                buffer->flags, (long long)buffer->pts, (long long)buffer->dts);
++#endif
+         status = mmal_port_send_buffer(sys->input, buffer);
+         if (status != MMAL_SUCCESS) {
+             msg_Err(dec, "Failed to send buffer to input port (status=%"PRIx32" %s)",
+                     status, mmal_status_to_string(status));
+-            break;
++            goto fail;
+         }
+-        atomic_fetch_add(&sys->input_in_transit, 1);
++
++        // Reset flushed flag once we have sent a buf
++        sys->b_flushed = false;
++        flags &= ~MMAL_BUFFER_HEADER_FLAG_FRAME_START;
+     }
++    return VLCDEC_SUCCESS;
+
+-out:
+-    if (need_flush)
+-        flush_decoder(dec);
++fail:
++    flush_decoder(dec);
++    return VLCDEC_ECRITICAL;
+
+-    return VLCDEC_SUCCESS;
+ }
+
+-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++
++static void CloseDecoder(decoder_t *dec)
+ {
+-    decoder_t *dec = (decoder_t *)port->userdata;
++    decoder_sys_t *sys = dec->p_sys;
++
++#if TRACE_ALL
++    msg_Dbg(dec, "%s: <<<", __func__);
++#endif
++
++    if (!sys)
++        return;
++
++    if (sys->component != NULL) {
++        if (sys->input->is_enabled)
++            mmal_port_disable(sys->input);
++
++        if (sys->output->is_enabled)
++            mmal_port_disable(sys->output);
++
++        if (sys->component->control->is_enabled)
++            mmal_port_disable(sys->component->control);
++
++        if (sys->component->is_enabled)
++            mmal_component_disable(sys->component);
++
++        mmal_component_release(sys->component);
++    }
++
++    if (sys->input_pool != NULL)
++        mmal_pool_destroy(sys->input_pool);
++
++    if (sys->output_format != NULL)
++        mmal_format_free(sys->output_format);
++
++    hw_mmal_port_pool_ref_release(sys->ppr, false);
++
++    cma_vcsm_exit(sys->vcsm_init_type);
++
++    vlc_mutex_destroy(&sys->pic_lock);
++    free(sys);
++}
++
++static int OpenDecoder(decoder_t *dec)
++{
++    int ret = VLC_EGENERIC;
++    decoder_sys_t *sys;
+     MMAL_STATUS_T status;
++    const MMAL_FOURCC_T in_fcc = vlc_to_mmal_es_fourcc(dec->fmt_in.i_codec);
++
++#if TRACE_ALL || 1
++    {
++        char buf1[5], buf2[5], buf2a[5];
++        char buf3[5], buf4[5];
++        MMAL_RATIONAL_T r = rationalize_sar(dec->fmt_in.video.i_sar_num, dec->fmt_in.video.i_sar_den);
++
++        msg_Dbg(dec, "%s: <<< (%s/%s)[%s] %dx%d %d/%d=%d/%d o:%#x -> (%s/%s) %dx%d %d/%d o:%#x", __func__,
++                str_fourcc(buf1, dec->fmt_in.i_codec),
++                str_fourcc(buf2, dec->fmt_in.video.i_chroma),
++                str_fourcc(buf2a, in_fcc),
++                dec->fmt_in.video.i_width, dec->fmt_in.video.i_height,
++                dec->fmt_in.video.i_sar_num, dec->fmt_in.video.i_sar_den,
++                r.num, r.den,
++                (int)dec->fmt_in.video.orientation,
++                str_fourcc(buf3, dec->fmt_out.i_codec),
++                str_fourcc(buf4, dec->fmt_out.video.i_chroma),
++                dec->fmt_out.video.i_width, dec->fmt_out.video.i_height,
++                dec->fmt_out.video.i_sar_num, dec->fmt_out.video.i_sar_den,
++                (int)dec->fmt_out.video.orientation);
++    }
++#endif
++
++    if (!is_enc_supported(&supported_decode_in_enc, in_fcc))
++        return VLC_EGENERIC;
++
++    sys = calloc(1, sizeof(decoder_sys_t));
++    if (!sys) {
++        ret = VLC_ENOMEM;
++        goto fail;
++    }
++    dec->p_sys = sys;
++    vlc_mutex_init(&sys->pic_lock);
++
++    if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
++        msg_Err(dec, "VCSM init failed");
++        goto fail;
++    }
++    msg_Info(dec, "VCSM init succeeded: %s", cma_vcsm_init_str(sys->vcsm_init_type));
++
++    sys->err_stream = MMAL_SUCCESS;
++
++    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, &sys->component);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(dec, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
++                MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    sys->input = sys->component->input[0];
++    sys->output = sys->component->output[0];
++
++    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
++    sys->input->format->encoding = in_fcc;
++
++    if (!set_and_test_enc_supported(&supported_decode_in_enc, sys->input, in_fcc)) {
++#if TRACE_ALL
++        char cbuf[5];
++        msg_Dbg(dec, "Format not supported: %s", str_fourcc(cbuf, in_fcc));
++#endif
++        goto fail;
++    }
++
++    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)dec;
++    status = mmal_port_enable(sys->component->control, control_port_cb);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(dec, "Failed to enable control port %s (status=%"PRIx32" %s)",
++                sys->component->control->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    if ((status = set_extradata_and_commit(dec, sys)) != MMAL_SUCCESS)
++        goto fail;
++
++    sys->input->buffer_size = sys->input->buffer_size_recommended;
++    sys->input->buffer_num = sys->input->buffer_num_recommended;
++
++    status = mmal_port_enable(sys->input, input_port_cb);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(dec, "Failed to enable input port %s (status=%"PRIx32" %s)",
++                sys->input->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    // Set vanishingly unlikely shape (or at least crop)
++    // to ensure that we get a resolution changed event
++    // Small wxh are rejected (128x128 is rejected) so pick a
++    // plausible size.
++    // Crop doesn't seem to be checked for being constrained by wxh
++    // so we could place it outside the pic to be sure that it is
++    // never matched but stick with something legal in case it is ever
++    // actually checked
++    sys->output->format->es->video.height = 256;
++    sys->output->format->es->video.width = 256;
++    sys->output->format->es->video.crop.height = 4;
++    sys->output->format->es->video.crop.width = 2;
++    sys->output->format->es->video.crop.x = 66;
++    sys->output->format->es->video.crop.y = 88;
++
++    if ((status = hw_mmal_opaque_output(VLC_OBJECT(dec), &sys->ppr,
++                                        sys->output, NUM_EXTRA_BUFFERS, decoder_output_cb)) != MMAL_SUCCESS)
++        goto fail;
++
++    status = mmal_component_enable(sys->component);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(dec, "Failed to enable component %s (status=%"PRIx32" %s)",
++                sys->component->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    if ((sys->input_pool = mmal_pool_create(sys->input->buffer_num, 0)) == NULL)
++    {
++        msg_Err(dec, "Failed to create input pool");
++        goto fail;
++    }
++
++    sys->b_flushed = true;
++
++    if ((status = decoder_send_extradata(dec, sys)) != MMAL_SUCCESS)
++        goto fail;
++
++    // Given no better ideas at this point copy input format to output
++    // This also copies container stuff (such as orientation) that we do not
++    // decode from the ES but may be important to display
++    video_format_Copy(&dec->fmt_out.video, &dec->fmt_in.video);
++    dec->fmt_out.i_codec = VLC_CODEC_MMAL_OPAQUE;
++    dec->fmt_out.video.i_chroma = VLC_CODEC_MMAL_OPAQUE;
++
++
++    dec->pf_decode = decode;
++    dec->pf_flush  = flush_decoder;
++
++#if TRACE_ALL
++    msg_Dbg(dec, ">>> %s: ok", __func__);
++#endif
++    return 0;
++
++fail:
++    CloseDecoder(dec);
++#if TRACE_ALL
++msg_Dbg(dec, ">>> %s: FAIL: ret=%d", __func__, ret);
++#endif
++    return ret;
++}
++
++// ----------------------------
++
++#define CONV_MAX_LATENCY 1  // In frames
++
++typedef struct pic_fifo_s {
++    picture_t * head;
++    picture_t * tail;
++} pic_fifo_t;
++
++static inline picture_t * pic_fifo_get(pic_fifo_t * const pf)
++{
++    picture_t * const pic = pf->head;;
++    if (pic != NULL) {
++        pf->head = pic->p_next;
++        pic->p_next = NULL;
++    }
++    return pic;
++}
++
++static inline picture_t * pic_fifo_get_all(pic_fifo_t * const pf)
++{
++    picture_t * const pic = pf->head;;
++    pf->head = NULL;
++    return pic;
++}
++
++static inline void pic_fifo_release_all(pic_fifo_t * const pf)
++{
++    picture_t * pic;
++    while ((pic = pic_fifo_get(pf)) != NULL) {
++        picture_Release(pic);
++    }
++}
++
++static inline void pic_fifo_init(pic_fifo_t * const pf)
++{
++    pf->head = NULL;
++    pf->tail = NULL;  // Not strictly needed
++}
++
++static inline void pic_fifo_put(pic_fifo_t * const pf, picture_t * pic)
++{
++    pic->p_next = NULL;
++    if (pf->head == NULL)
++        pf->head = pic;
++    else
++        pf->tail->p_next = pic;
++    pf->tail = pic;
++}
++
++#define SUBS_MAX 3
++
++typedef enum filter_resizer_e {
++    FILTER_RESIZER_RESIZER,
++    FILTER_RESIZER_ISP,
++    FILTER_RESIZER_HVS
++} filter_resizer_t;
++
++typedef struct conv_frame_stash_s
++{
++    mtime_t pts;
++    MMAL_BUFFER_HEADER_T * sub_bufs[SUBS_MAX];
++} conv_frame_stash_t;
++
++typedef struct filter_sys_t {
++    filter_resizer_t resizer_type;
++    MMAL_COMPONENT_T *component;
++    MMAL_PORT_T *input;
++    MMAL_PORT_T *output;
++    MMAL_POOL_T *out_pool;  // Free output buffers
++    MMAL_POOL_T *in_pool;   // Input pool to get BH for replication
++
++    cma_buf_pool_t * cma_in_pool;
++    cma_buf_pool_t * cma_out_pool;
++
++    subpic_reg_stash_t subs[SUBS_MAX];
++
++    pic_fifo_t ret_pics;
++
++    unsigned int pic_n;
++    vlc_sem_t sem;
++    vlc_mutex_t lock;
++
++    MMAL_STATUS_T err_stream;
++
++    bool needs_copy_in;
++    bool is_cma;
++    bool is_sliced;
++    bool out_fmt_set;
++    const char * component_name;
++    MMAL_PORT_BH_CB_T in_port_cb_fn;
++    MMAL_PORT_BH_CB_T out_port_cb_fn;
++
++    uint64_t frame_seq;
++    conv_frame_stash_t stash[16];
++
++    // Slice specific tracking stuff
++    struct {
++        pic_fifo_t pics;
++        unsigned int line;  // Lines filled
++    } slice;
++
++    vcsm_init_type_t vcsm_init_type;
++} filter_sys_t;
++
++
++static MMAL_STATUS_T pic_to_format(MMAL_ES_FORMAT_T * const es_fmt, const picture_t * const pic)
++{
++    unsigned int bpp = (pic->format.i_bits_per_pixel + 7) >> 3;
++    MMAL_VIDEO_FORMAT_T * const v_fmt = &es_fmt->es->video;
++
++    es_fmt->type = MMAL_ES_TYPE_VIDEO;
++    es_fmt->encoding = vlc_to_mmal_video_fourcc(&pic->format);
++    es_fmt->encoding_variant = 0;
++
++    // Fill in crop etc.
++    hw_mmal_vlc_fmt_to_mmal_fmt(es_fmt, &pic->format);
++    // Override width / height with strides if appropriate
++    if (bpp != 0) {
++        v_fmt->width = pic->p[0].i_pitch / bpp;
++        v_fmt->height = pic->p[0].i_lines;
++    }
++    return MMAL_SUCCESS;
++}
++
++
++static MMAL_STATUS_T conv_enable_in(filter_t * const p_filter, filter_sys_t * const sys)
++{
++    MMAL_STATUS_T err = MMAL_SUCCESS;
++
++    if (!sys->input->is_enabled &&
++        (err = mmal_port_enable(sys->input, sys->in_port_cb_fn)) != MMAL_SUCCESS)
++    {
++        msg_Err(p_filter, "Failed to enable input port %s (status=%"PRIx32" %s)",
++                sys->input->name, err, mmal_status_to_string(err));
++    }
++    return err;
++}
++
++static MMAL_STATUS_T conv_enable_out(filter_t * const p_filter, filter_sys_t * const sys)
++{
++    MMAL_STATUS_T err = MMAL_SUCCESS;
++
++    if (sys->is_cma)
++    {
++        if (sys->cma_out_pool == NULL &&
++            (sys->cma_out_pool = cma_buf_pool_new(CONVERTER_BUFFERS, CONVERTER_BUFFERS, true, "mmal_resizer")) == NULL)
++        {
++            msg_Err(p_filter, "Failed to alloc cma buf pool");
++            return MMAL_ENOMEM;
++        }
++    }
++    else
++    {
++        cma_buf_pool_deletez(&sys->cma_out_pool);
++    }
++
++    if (!sys->output->is_enabled &&
++        (err = mmal_port_enable(sys->output, sys->out_port_cb_fn)) != MMAL_SUCCESS)
++    {
++        msg_Err(p_filter, "Failed to enable output port %s (status=%"PRIx32" %s)",
++                sys->output->name, err, mmal_status_to_string(err));
++    }
++    return err;
++}
++
++static void conv_control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++{
++    filter_t * const p_filter = (filter_t *)port->userdata;
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, "%s: <<< cmd=%d, data=%p, pic=%p", __func__, buffer->cmd, buffer->data, buffer->user_data);
++#endif
+
+     if (buffer->cmd == MMAL_EVENT_ERROR) {
+-        status = *(uint32_t *)buffer->data;
+-        msg_Err(dec, "MMAL error %"PRIx32" \"%s\"", status,
++        MMAL_STATUS_T status = *(uint32_t *)buffer->data;
++
++        p_filter->p_sys->err_stream = status;
++
++        msg_Err(p_filter, "MMAL error %"PRIx32" \"%s\"", status,
+                 mmal_status_to_string(status));
+     }
+
+     mmal_buffer_header_release(buffer);
+ }
+
+-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++static void conv_input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
+ {
+-    block_t *block = (block_t *)buffer->user_data;
+-    decoder_t *dec = (decoder_t *)port->userdata;
+-    decoder_sys_t *sys = dec->p_sys;
+-    buffer->user_data = NULL;
++#if TRACE_ALL
++    picture_context_t * ctx = buf->user_data;
++//    filter_sys_t *const sys = ((filter_t *)port->userdata)->p_sys;
++
++    msg_Dbg((filter_t *)port->userdata, "<<< %s cmd=%d, ctx=%p, buf=%p, flags=%#x, len=%d/%d, pts=%lld",
++            __func__, buf->cmd, ctx, buf, buf->flags, buf->length, buf->alloc_size, (long long)buf->pts);
++#else
++    VLC_UNUSED(port);
++#endif
++
++    mmal_buffer_header_release(buf);
++
++#if TRACE_ALL
++    msg_Dbg((filter_t *)port->userdata, ">>> %s", __func__);
++#endif
++}
++
++static void conv_out_q_pic(filter_sys_t * const sys, picture_t * const pic)
++{
++    pic->p_next = NULL;
++
++    vlc_mutex_lock(&sys->lock);
++    pic_fifo_put(&sys->ret_pics, pic);
++    vlc_mutex_unlock(&sys->lock);
+
+-    mmal_buffer_header_release(buffer);
+-    if (block)
+-        block_Release(block);
+-    atomic_fetch_sub(&sys->input_in_transit, 1);
+     vlc_sem_post(&sys->sem);
+ }
+
+-static void output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++static void conv_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
+ {
+-    decoder_t *dec = (decoder_t *)port->userdata;
+-    decoder_sys_t *sys = dec->p_sys;
+-    picture_t *picture;
+-    MMAL_EVENT_FORMAT_CHANGED_T *fmt;
+-    MMAL_ES_FORMAT_T *format;
+-
+-    if (buffer->cmd == 0) {
+-        picture = (picture_t *)buffer->user_data;
+-        if (buffer->length > 0) {
+-            picture->date = buffer->pts;
+-            picture->b_progressive = sys->b_progressive;
+-            picture->b_top_field_first = sys->b_top_field_first;
+-            decoder_QueueVideo(dec, picture);
+-        } else {
+-            picture_Release(picture);
+-            if (sys->output_pool) {
+-                buffer->user_data = NULL;
+-                buffer->alloc_size = 0;
+-                buffer->data = NULL;
+-                mmal_buffer_header_release(buffer);
+-            }
+-        }
+-        atomic_fetch_sub(&sys->output_in_transit, 1);
+-        vlc_sem_post(&sys->sem);
+-    } else if (buffer->cmd == MMAL_EVENT_FORMAT_CHANGED) {
+-        fmt = mmal_event_format_changed_get(buffer);
++    filter_t * const p_filter = (filter_t *)port->userdata;
++    filter_sys_t * const sys = p_filter->p_sys;
+
+-        format = mmal_format_alloc();
+-        mmal_format_full_copy(format, fmt->format);
++#if TRACE_ALL
++    msg_Dbg(p_filter, "<<< %s: cmd=%d, flags=%#x, pic=%p, data=%p, len=%d/%d, pts=%lld/%lld", __func__,
++            buf->cmd, buf->flags, buf->user_data, buf->data, buf->length, buf->alloc_size,
++            (long long)buf->pts, (long long)sys->stash[(unsigned int)(buf->pts & 0xf)].pts);
++#endif
++    if (buf->cmd == 0) {
++        picture_t * const pic = (picture_t *)buf->user_data;
+
+-        if (sys->opaque)
+-            format->encoding = MMAL_ENCODING_OPAQUE;
++        if (pic == NULL) {
++            msg_Err(p_filter, "%s: Buffer has no attached picture", __func__);
++        }
++        else if (buf->data == NULL || buf->length == 0)
++        {
++#if TRACE_ALL
++            msg_Dbg(p_filter, "%s: Buffer has no data", __func__);
++#endif
++        }
++        else
++        {
++            buf_to_pic_copy_props(pic, buf);
++
++            // Set pic data pointers from buf aux info now it has it
++            if (sys->is_cma) {
++                if (cma_pic_set_data(pic, sys->output->format, buf) != VLC_SUCCESS)
++                    msg_Err(p_filter, "Failed to set data");
++            }
++
++//            draw_corners(pic->p[0].p_pixels, pic->p[0].i_pitch / 4, 0, 0, pic->p[0].i_visible_pitch / 4, pic->p[0].i_visible_lines);
++#if DEBUG_SQUARES
++            draw_square(pic->p[0].p_pixels, pic->p[0].i_pitch / 4,  0, 0, 32, 32, 0xffff0000);
++            draw_square(pic->p[0].p_pixels, pic->p[0].i_pitch / 4, 32, 0, 32, 32, 0xff00ff00);
++            draw_square(pic->p[0].p_pixels, pic->p[0].i_pitch / 4, 64, 0, 32, 32, 0xff0000ff);
++#endif
++
++            buf->user_data = NULL;  // Responsability for this pic no longer with buffer
++            conv_out_q_pic(sys, pic);
++        }
++    }
++
++    mmal_buffer_header_release(buf);
++}
++
++
++static void slice_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
++{
++    filter_t * const p_filter = (filter_t *)port->userdata;
++    filter_sys_t * const sys = p_filter->p_sys;
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, "<<< %s: cmd=%d, flags=%#x, pic=%p, data=%p, len=%d/%d, pts=%lld", __func__,
++            buf->cmd, buf->flags, buf->user_data, buf->data, buf->length, buf->alloc_size, (long long)buf->pts);
++#endif
++
++    if (buf->cmd != 0)
++    {
++        mmal_buffer_header_release(buf);
++        return;
++    }
++
++    if (buf->data == NULL || buf->length == 0)
++    {
++#if TRACE_ALL
++        msg_Dbg(p_filter, "%s: Buffer has no data", __func__);
++#endif
++    }
++    else
++    {
++        // Got slice
++        picture_t *pic = sys->slice.pics.head;
++        const unsigned int scale_lines = sys->output->format->es->video.height;  // Expected lines of callback
++
++        if (pic == NULL) {
++            msg_Err(p_filter, "No output picture");
++            goto fail;
++        }
++
++        // Copy lines
++        // * single plane only - fix for I420
++        {
++            const unsigned int scale_n = __MIN(scale_lines - sys->slice.line, MMAL_SLICE_HEIGHT);
++            const unsigned int pic_lines = pic->p[0].i_lines;
++            const unsigned int copy_n = sys->slice.line + scale_n <= pic_lines ? scale_n :
++                sys->slice.line >= pic_lines ? 0 :
++                    pic_lines - sys->slice.line;
++
++            const unsigned int src_stride = buf->type->video.pitch[0];
++            const unsigned int dst_stride = pic->p[0].i_pitch;
++            uint8_t *dst = pic->p[0].p_pixels + sys->slice.line * dst_stride;
++            const uint8_t *src = buf->data + buf->type->video.offset[0];
++
++            if (src_stride == dst_stride) {
++                if (copy_n != 0)
++                    memcpy(dst, src, src_stride * copy_n);
++            }
++            else {
++                unsigned int i;
++                for (i = 0; i != copy_n; ++i) {
++                    memcpy(dst, src, __MIN(dst_stride, src_stride));
++                    dst += dst_stride;
++                    src += src_stride;
++                }
++            }
++            sys->slice.line += scale_n;
++        }
++
++        if ((buf->flags & MMAL_BUFFER_HEADER_FLAG_FRAME_END) != 0 || sys->slice.line >= scale_lines) {
++
++            if ((buf->flags & MMAL_BUFFER_HEADER_FLAG_FRAME_END) == 0 || sys->slice.line != scale_lines) {
++                // Stuff doesn't add up...
++                msg_Err(p_filter, "Line count (%d/%d) & EOF disagree (flags=%#x)", sys->slice.line, scale_lines, buf->flags);
++                goto fail;
++            }
++            else {
++                sys->slice.line = 0;
++
++                vlc_mutex_lock(&sys->lock);
++                pic_fifo_get(&sys->slice.pics);  // Remove head from Q
++                vlc_mutex_unlock(&sys->lock);
++
++                buf_to_pic_copy_props(pic, buf);
++                conv_out_q_pic(sys, pic);
++            }
++        }
++    }
++
++    // Put back
++    buf->user_data = NULL; // Zap here to make sure we can't reuse later
++    mmal_buffer_header_reset(buf);
++
++    if (mmal_port_send_buffer(sys->output, buf) != MMAL_SUCCESS) {
++        mmal_buffer_header_release(buf);
++    }
++    return;
++
++fail:
++    sys->err_stream = MMAL_EIO;
++    vlc_sem_post(&sys->sem);  // If we were waiting then break us out - the flush should fix sem values
++}
++
++
++static void conv_flush(filter_t * p_filter)
++{
++    filter_sys_t * const sys = p_filter->p_sys;
++    unsigned int i;
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, "<<< %s", __func__);
++#endif
++
++    if (sys->resizer_type == FILTER_RESIZER_HVS)
++    {
++        for (i = 0; i != SUBS_MAX; ++i) {
++            hw_mmal_subpic_flush(VLC_OBJECT(p_filter), sys->subs + i);
++        }
++    }
++
++    if (sys->input != NULL && sys->input->is_enabled)
++        mmal_port_disable(sys->input);
++
++    if (sys->output != NULL && sys->output->is_enabled)
++        mmal_port_disable(sys->output);
++
++//    cma_buf_pool_deletez(&sys->cma_out_pool);
++
++    // Free up anything we may have already lying around
++    // Don't need lock as the above disables should have prevented anything
++    // happening in the background
++
++    for (i = 0; i != 16; ++i) {
++        conv_frame_stash_t *const stash = sys->stash + i;
++        unsigned int sub_no;
++
++        stash->pts = MMAL_TIME_UNKNOWN;
++        for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
++            if (stash->sub_bufs[sub_no] != NULL) {
++                mmal_buffer_header_release(stash->sub_bufs[sub_no]);
++                stash->sub_bufs[sub_no] = NULL;
++            }
++        }
++    }
++
++    pic_fifo_release_all(&sys->slice.pics);
++    pic_fifo_release_all(&sys->ret_pics);
++
++    // Reset sem values - easiest & most reliable way is to just kill & re-init
++    vlc_sem_destroy(&sys->sem);
++    vlc_sem_init(&sys->sem, 0);
++    sys->pic_n = 0;
++
++    // Reset error status
++    sys->err_stream = MMAL_SUCCESS;
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, ">>> %s", __func__);
++#endif
++}
++
++static void conv_stash_fixup(filter_t * const p_filter, filter_sys_t * const sys, picture_t * const p_pic)
++{
++    conv_frame_stash_t * const stash = sys->stash + (p_pic->date & 0xf);
++    unsigned int sub_no;
++    VLC_UNUSED(p_filter);
++
++    p_pic->date = stash->pts;
++    for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
++        if (stash->sub_bufs[sub_no] != NULL) {
++            // **** Do stashed blend
++            // **** Aaargh, bother... need to rescale subs too
++
++            mmal_buffer_header_release(stash->sub_bufs[sub_no]);
++            stash->sub_bufs[sub_no] = NULL;
++        }
++    }
++}
++
++// Output buffers may contain a pic ref on error or flush
++// Free it
++static MMAL_BOOL_T out_buffer_pre_release_cb(MMAL_BUFFER_HEADER_T *header, void *userdata)
++{
++    VLC_UNUSED(userdata);
++
++    picture_t * const pic = header->user_data;
++    header->user_data = NULL;
++
++    if (pic != NULL)
++        picture_Release(pic);
++
++    return MMAL_FALSE;
++}
++
++static MMAL_STATUS_T conv_set_output(filter_t * const p_filter, filter_sys_t * const sys, picture_t * const pic)
++{
++    MMAL_STATUS_T status;
++
++    sys->output->userdata = (struct MMAL_PORT_USERDATA_T *)p_filter;
++    sys->output->format->type = MMAL_ES_TYPE_VIDEO;
++    sys->output->format->encoding = vlc_to_mmal_video_fourcc(&p_filter->fmt_out.video);
++    sys->output->format->encoding_variant = 0;
++    hw_mmal_vlc_fmt_to_mmal_fmt(sys->output->format, &p_filter->fmt_out.video);
++
++    if (pic != NULL)
++    {
++        // Override default format width/height if we have a pic we need to match
++        if ((status = pic_to_format(sys->output->format, pic)) != MMAL_SUCCESS)
++        {
++            char cbuf[5];
++            msg_Err(p_filter, "Bad format desc: %s, pic=%p, bits=%d", str_fourcc(cbuf, pic->format.i_chroma), pic, pic->format.i_bits_per_pixel);
++            return status;
++        }
++
++        MMAL_VIDEO_FORMAT_T *fmt = &sys->output->format->es->video;
++        msg_Dbg(p_filter, "%s: %dx%d [(0,0) %dx%d]", __func__, fmt->width, fmt->height, fmt->crop.width, fmt->crop.height);
++    }
++
++    if (sys->is_sliced) {
++        // Override height for slice
++        sys->output->format->es->video.height = MMAL_SLICE_HEIGHT;
++    }
++
++    mmal_log_dump_format(sys->output->format);
++
++    status = mmal_port_format_commit(sys->output);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(p_filter, "Failed to commit format for output port %s (status=%"PRIx32" %s)",
++                sys->output->name, status, mmal_status_to_string(status));
++        return status;
++    }
++
++    sys->output->buffer_num = __MAX(sys->is_sliced ? 16 : 2, sys->output->buffer_num_recommended);
++    sys->output->buffer_size = sys->output->buffer_size_recommended;
++
++    if ((status = conv_enable_out(p_filter, sys)) != MMAL_SUCCESS)
++        return status;
++
++    return MMAL_SUCCESS;
++}
++
++
++static picture_t *conv_get_out_pics(filter_sys_t * const sys)
++{
++    picture_t * ret_pics;
++
++    vlc_sem_wait(&sys->sem);
++
++    // Return a single pending buffer
++    vlc_mutex_lock(&sys->lock);
++    ret_pics = pic_fifo_get(&sys->ret_pics);
++    vlc_mutex_unlock(&sys->lock);
++
++    return ret_pics;
++}
++
++static picture_t *conv_filter(filter_t *p_filter, picture_t *p_pic)
++{
++    filter_sys_t * const sys = p_filter->p_sys;
++    picture_t * ret_pics = NULL;
++    MMAL_STATUS_T err;
++    const uint64_t frame_seq = ++sys->frame_seq;
++    conv_frame_stash_t * const stash = sys->stash + (frame_seq & 0xf);
++    MMAL_BUFFER_HEADER_T * out_buf = NULL;
++
++#if TRACE_ALL
++    {
++        char dbuf0[5], dbuf1[5];
++        msg_Dbg(p_filter, "<<< %s: %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d->%s,%dx%d [(%d,%d) %dx%d] sar:%d/%d", __func__,
++                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma), p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
++                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
++                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
++                p_filter->fmt_in.video.i_sar_num, p_filter->fmt_in.video.i_sar_den,
++                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma), p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
++                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
++                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height,
++                p_filter->fmt_out.video.i_sar_num, p_filter->fmt_out.video.i_sar_den);
++    }
++#endif
++
++    if (sys->err_stream != MMAL_SUCCESS) {
++        goto stream_fail;
++    }
++
++    // Check pic fmt corresponds to what we have set up
++    if (hw_mmal_vlc_pic_to_mmal_fmt_update(sys->input->format, p_pic))
++    {
++        msg_Dbg(p_filter, "Reset input port format");
++
++        // HVS can take new formats without disable, others need it
++        if (sys->resizer_type != FILTER_RESIZER_HVS) {
++            // Extract any pending pic
++            if (sys->pic_n >= 2) {
++                ret_pics = conv_get_out_pics(sys);
++                // If pic_n == 1 then we return without trying to get stuff
++                sys->pic_n = 1;
++            }
++            if (sys->input->is_enabled) {
++                if ((err = mmal_port_disable(sys->input)) != MMAL_SUCCESS)
++                    msg_Warn(p_filter, "Format update disable failed: %s", mmal_status_to_string(err));
++            }
++        }
++
++//        mmal_log_dump_port(sys->input);
++        if ((err = mmal_port_format_commit(sys->input)) != MMAL_SUCCESS)
++            msg_Warn(p_filter, "Format update commit failed: %s", mmal_status_to_string(err));
++
++        // (Re)enable if required will be done later
++    }
++
++    if (p_pic->context == NULL) {
++        // Can't have stashed subpics if not one of our pics
++        if (!sys->needs_copy_in)
++            msg_Dbg(p_filter, "%s: No context", __func__);
++    }
++    else if (sys->resizer_type == FILTER_RESIZER_HVS)
++    {
++        unsigned int sub_no = 0;
++
++        for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
++            int rv;
++            if ((rv = hw_mmal_subpic_update(VLC_OBJECT(p_filter),
++                                            hw_mmal_pic_sub_buf_get(p_pic, sub_no),
++                                            sys->subs + sub_no,
++                                            &p_pic->format,
++                                            &sys->output->format->es->video.crop,
++                                            MMAL_DISPLAY_ROT0,
++                                            frame_seq)) == 0)
++                break;
++            else if (rv < 0)
++                goto fail;
++        }
++    }
++    else
++    {
++        unsigned int sub_no = 0;
++        for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
++            if ((stash->sub_bufs[sub_no] = hw_mmal_pic_sub_buf_get(p_pic, sub_no)) != NULL) {
++                mmal_buffer_header_acquire(stash->sub_bufs[sub_no]);
++            }
++        }
++    }
++
++    if (!sys->out_fmt_set) {
++        sys->out_fmt_set = true;
++
++        if (sys->is_sliced) {
++            // If zc then we will do stride conversion when we copy to arm side
++            // so no need to worry about actual pic dimensions here
++            if ((err = conv_set_output(p_filter, sys, NULL)) != MMAL_SUCCESS)
++                goto fail;
++
++            sys->out_pool = mmal_port_pool_create(sys->output, sys->output->buffer_num, sys->output->buffer_size);
++        }
++        else {
++            picture_t *pic = filter_NewPicture(p_filter);
++            err = conv_set_output(p_filter, sys, pic);
++            picture_Release(pic);
++            if (err != MMAL_SUCCESS)
++                goto fail;
++
++            sys->out_pool = mmal_pool_create(sys->output->buffer_num, 0);
++        }
++
++        if (sys->out_pool == NULL) {
++            msg_Err(p_filter, "Failed to create output pool");
++            goto fail;
++        }
++    }
++
++    // Reenable stuff if the last thing we did was flush
++    if ((err = conv_enable_out(p_filter, sys)) != MMAL_SUCCESS ||
++        (err = conv_enable_in(p_filter, sys)) != MMAL_SUCCESS)
++        goto fail;
++
++    // We attach pic to buf before stuffing the output port
++    // We could attach the pic on output for cma, but it is a lot easier to keep
++    // the code common.
++    {
++        picture_t * const out_pic = filter_NewPicture(p_filter);
++
++        if (out_pic == NULL)
++        {
++            msg_Err(p_filter, "Failed to alloc required filter output pic");
++            goto fail;
++        }
++
++        out_pic->format.i_sar_den = p_filter->fmt_out.video.i_sar_den;
++        out_pic->format.i_sar_num = p_filter->fmt_out.video.i_sar_num;
++
++        if (sys->is_sliced) {
++            vlc_mutex_lock(&sys->lock);
++            pic_fifo_put(&sys->slice.pics, out_pic);
++            vlc_mutex_unlock(&sys->lock);
++
++            // Poke any returned pic buffers into output
++            // In general this should only happen immediately after enable
++            while ((out_buf = mmal_queue_get(sys->out_pool->queue)) != NULL)
++                mmal_port_send_buffer(sys->output, out_buf);
++        }
++        else
++        {
++            // 1 in - 1 out
++            if ((out_buf = mmal_queue_wait(sys->out_pool->queue)) == NULL)
++            {
++                msg_Err(p_filter, "Failed to get output buffer");
++                picture_Release(out_pic);
++                goto fail;
++            }
++            mmal_buffer_header_reset(out_buf);
++
++            // Attach out_pic to the buffer & ensure it is freed when the buffer is released
++            // On a good send callback the pic will be extracted to avoid this
++            out_buf->user_data = out_pic;
++            mmal_buffer_header_pre_release_cb_set(out_buf, out_buffer_pre_release_cb, NULL);
++
++#if 0
++            {
++                char dbuf0[5];
++                msg_Dbg(p_filter, "out_pic %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d",
++                        str_fourcc(dbuf0, out_pic->format.i_chroma),
++                        out_pic->format.i_width, out_pic->format.i_height,
++                        out_pic->format.i_x_offset, out_pic->format.i_y_offset,
++                        out_pic->format.i_visible_width, out_pic->format.i_visible_height,
++                        out_pic->format.i_sar_num, out_pic->format.i_sar_den);
++            }
++#endif
++
++            if (sys->is_cma) {
++                int rv;
++
++                cma_buf_t * const cb = cma_buf_pool_alloc_buf(sys->cma_out_pool, sys->output->buffer_size);
++                if (cb == NULL) {
++                    char dbuf0[5];
++                    msg_Err(p_filter, "Failed to alloc CMA buf: fmt=%s, size=%d",
++                            str_fourcc(dbuf0, out_pic->format.i_chroma),
++                            sys->output->buffer_size);
++                    goto fail;
++                }
++                const unsigned int vc_h = cma_buf_vc_handle(cb);  // Cannot coerce without going via variable
++                out_buf->data = (uint8_t *)vc_h;
++                out_buf->alloc_size = sys->output->buffer_size;
++
++                if ((rv = cma_buf_pic_attach(cb, out_pic)) != VLC_SUCCESS)
++                {
++                    char dbuf0[5];
++                    msg_Err(p_filter, "Failed to attach CMA to pic: fmt=%s err=%d",
++                            str_fourcc(dbuf0, out_pic->format.i_chroma),
++                            rv);
++                    cma_buf_unref(cb);
++                    goto fail;
++                }
++            }
++            else {
++                out_buf->data = out_pic->p[0].p_pixels;
++                out_buf->alloc_size = out_pic->p[0].i_pitch * out_pic->p[0].i_lines;
++                //**** stride ????
++            }
++
++#if TRACE_ALL
++            msg_Dbg(p_filter, "Out buf send: pic=%p, data=%p, user=%p, flags=%#x, len=%d/%d, pts=%lld",
++                    p_pic, out_buf->data, out_buf->user_data, out_buf->flags,
++                    out_buf->length, out_buf->alloc_size, (long long)out_buf->pts);
++#endif
++
++            if ((err = mmal_port_send_buffer(sys->output, out_buf)) != MMAL_SUCCESS)
++            {
++                msg_Err(p_filter, "Send buffer to output failed");
++                goto fail;
++            }
++            out_buf = NULL;
++        }
++    }
++
++
++    // Stuff into input
++    // We assume the BH is already set up with values reflecting pic date etc.
++    stash->pts = p_pic->date;
++    {
++        MMAL_BUFFER_HEADER_T *const pic_buf = sys->needs_copy_in ?
++            hw_mmal_pic_buf_copied(p_pic, sys->in_pool, sys->input, sys->cma_in_pool) :
++            hw_mmal_pic_buf_replicated(p_pic, sys->in_pool);
++
++        // Whether or not we extracted the pic_buf we are done with the picture
++        picture_Release(p_pic);
++        p_pic = NULL;
++
++        if (pic_buf == NULL) {
++            msg_Err(p_filter, "Pic has no attached buffer");
++            goto fail;
++        }
++
++        pic_buf->pts = frame_seq;
++
++#if TRACE_ALL
++            msg_Dbg(p_filter, "In buf send: pic=%p, data=%p, user=%p, flags=%#x, len=%d/%d/%d, pts=%lld",
++                    p_pic, pic_buf->data, pic_buf->user_data, pic_buf->flags,
++                    pic_buf->length, pic_buf->alloc_size, sys->input->buffer_size, (long long)pic_buf->pts);
++#endif
++
++        if ((err = mmal_port_send_buffer(sys->input, pic_buf)) != MMAL_SUCCESS)
++        {
++            msg_Err(p_filter, "Send buffer to input failed");
++            mmal_buffer_header_release(pic_buf);
++            goto fail;
++        }
++    }
++
++    // We have a 1 pic latency for everything except the 1st pic which we
++    // wait for.
++    // This means we get a single static pic out
++    if (sys->pic_n++ == 1) {
++#if TRACE_ALL
++        msg_Dbg(p_filter, ">>> %s: Pic1=%p", __func__, ret_pics);
++#endif
++        return ret_pics;
++    }
++
++    ret_pics = conv_get_out_pics(sys);
++
++    if (sys->err_stream != MMAL_SUCCESS)
++        goto stream_fail;
++
++    conv_stash_fixup(p_filter, sys, ret_pics);
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, ">>> %s: pic=%p", __func__, ret_pics);
++#endif
++
++    return ret_pics;
++
++stream_fail:
++    msg_Err(p_filter, "MMAL error reported by callback");
++fail:
++#if TRACE_ALL
++    msg_Err(p_filter, ">>> %s: FAIL", __func__);
++#endif
++    if (ret_pics != NULL)
++        picture_Release(ret_pics);
++    if (out_buf != NULL)
++        mmal_buffer_header_release(out_buf);
++    if (p_pic != NULL)
++        picture_Release(p_pic);
++    conv_flush(p_filter);
++    return NULL;
++}
++
++static void CloseConverter(vlc_object_t * obj)
++{
++    filter_t * const p_filter = (filter_t *)obj;
++    filter_sys_t * const sys = p_filter->p_sys;
++    unsigned int i;
++
++#if TRACE_ALL
++    msg_Dbg(obj, "<<< %s", __func__);
++#endif
++
++    if (sys == NULL)
++        return;
++
++    // Disables input & output ports
++    conv_flush(p_filter);
++
++    cma_buf_pool_deletez(&sys->cma_in_pool);
++    cma_buf_pool_deletez(&sys->cma_out_pool);
++
++    if (sys->component && sys->component->control->is_enabled)
++        mmal_port_disable(sys->component->control);
++
++    if (sys->component && sys->component->is_enabled)
++        mmal_component_disable(sys->component);
++
++    if (sys->resizer_type == FILTER_RESIZER_HVS)
++    {
++        for (i = 0; i != SUBS_MAX; ++i) {
++            hw_mmal_subpic_close(VLC_OBJECT(p_filter), sys->subs + i);
++        }
++    }
++
++    if (sys->out_pool)
++    {
++        if (sys->is_sliced)
++            mmal_port_pool_destroy(sys->output, sys->out_pool);
++        else
++            mmal_pool_destroy(sys->out_pool);
++    }
++
++    if (sys->in_pool != NULL)
++        mmal_pool_destroy(sys->in_pool);
++
++    if (sys->component)
++        mmal_component_release(sys->component);
++
++    cma_vcsm_exit(sys->vcsm_init_type);
++
++    vlc_sem_destroy(&sys->sem);
++    vlc_mutex_destroy(&sys->lock);
++
++    p_filter->p_sys = NULL;
++    free(sys);
++}
++
++
++static inline MMAL_FOURCC_T filter_enc_in(const video_format_t * const fmt)
++{
++    if (hw_mmal_chroma_is_mmal(fmt->i_chroma))
++        return vlc_to_mmal_video_fourcc(fmt);
++
++    if (fmt->i_chroma == VLC_CODEC_I420 ||
++        fmt->i_chroma == VLC_CODEC_I420_10L)
++        return MMAL_ENCODING_I420;
++
++    return 0;
++}
++
++static inline MMAL_FOURCC_T filter_enc_out(const video_format_t * const fmt)
++{
++    const MMAL_FOURCC_T mmes = vlc_to_mmal_video_fourcc(fmt);
++    // Can only copy out single plane stuff currently - this could be fixed!
++    return hw_mmal_chroma_is_mmal(fmt->i_chroma) || mmes != MMAL_ENCODING_I420 ? mmes : 0;
++}
++
++
++static int OpenConverter(vlc_object_t * obj)
++{
++    filter_t * const p_filter = (filter_t *)obj;
++    int ret = VLC_EGENERIC;
++    filter_sys_t *sys;
++    MMAL_STATUS_T status;
++    MMAL_FOURCC_T enc_out = filter_enc_out(&p_filter->fmt_out.video);
++    const MMAL_FOURCC_T enc_in = filter_enc_in(&p_filter->fmt_in.video);
++    bool use_resizer;
++    bool use_isp;
++    int gpu_mem;
++
++    // At least in principle we should deal with any mmal format as input
++    if (enc_in == 0 || enc_out == 0)
++        return VLC_EGENERIC;
++
++    // Can't transform
++    if (p_filter->fmt_in.video.orientation != p_filter->fmt_out.video.orientation)
++        return VLC_EGENERIC;
++
++    use_resizer = var_InheritBool(p_filter, MMAL_RESIZE_NAME);
++    use_isp = var_InheritBool(p_filter, MMAL_ISP_NAME);
++
++retry:
++    // ** Make more generic by checking supported encs
++    //
++    // Must use ISP - HVS can't do this, nor can resizer
++    if (enc_in == MMAL_ENCODING_YUVUV64_10) {
++        // If resizer selected then just give up
++        if (use_resizer)
++            return VLC_EGENERIC;
++        // otherwise downgrade HVS to ISP
++        use_isp = true;
++    }
++    // HVS can't do I420
++    if (enc_out == MMAL_ENCODING_I420) {
++        use_isp = true;
++    }
++    // Only HVS can deal with SAND30
++    if (enc_in == MMAL_ENCODING_YUV10_COL) {
++        if (use_isp || use_resizer)
++            return VLC_EGENERIC;
++    }
+
+-        sys->output_format = format;
+
+-        mmal_buffer_header_release(buffer);
++    if (use_resizer) {
++        // use resizer overrides use_isp
++        use_isp = false;
++    }
++
++    // Check we have a sliced version of the fourcc if we want the resizer
++    if (use_resizer &&
++        (enc_out = pic_to_slice_mmal_fourcc(enc_out)) == 0) {
++        return VLC_EGENERIC;
++    }
++
++    gpu_mem = hw_mmal_get_gpu_mem();
++
++    {
++        char dbuf0[5], dbuf1[5], dbuf2[5], dbuf3[5];
++        msg_Dbg(p_filter, "%s: (%s) %s/%s,%dx%d [(%d,%d) %d/%d] sar:%d/%d->%s/%s,%dx%d [(%d,%d) %dx%d] rgb:%#x:%#x:%#x sar:%d/%d (gpu=%d)", __func__,
++                use_resizer ? "resize" : use_isp ? "isp" : "hvs",
++                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma), str_fourcc(dbuf2, enc_in),
++                p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
++                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
++                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
++                p_filter->fmt_in.video.i_sar_num, p_filter->fmt_in.video.i_sar_den,
++                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma), str_fourcc(dbuf3, enc_out),
++                p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
++                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
++                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height,
++                p_filter->fmt_out.video.i_rmask, p_filter->fmt_out.video.i_gmask, p_filter->fmt_out.video.i_bmask,
++                p_filter->fmt_out.video.i_sar_num, p_filter->fmt_out.video.i_sar_den,
++                gpu_mem);
++    }
++
++    sys = calloc(1, sizeof(filter_sys_t));
++    if (!sys) {
++        ret = VLC_ENOMEM;
++        goto fail;
++    }
++    p_filter->p_sys = sys;
++
++    // Init stuff the we destroy unconditionaly in Close first
++    vlc_mutex_init(&sys->lock);
++    vlc_sem_init(&sys->sem, 0);
++    sys->err_stream = MMAL_SUCCESS;
++    pic_fifo_init(&sys->ret_pics);
++    pic_fifo_init(&sys->slice.pics);
++
++    sys->needs_copy_in = !hw_mmal_chroma_is_mmal(p_filter->fmt_in.video.i_chroma);
++    sys->in_port_cb_fn = conv_input_port_cb;
++
++    if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
++        msg_Err(p_filter, "VCSM init failed");
++        goto fail;
++    }
++
++    if (use_resizer) {
++        sys->resizer_type = FILTER_RESIZER_RESIZER;
++        sys->is_sliced = true;
++        sys->component_name = MMAL_COMPONENT_DEFAULT_RESIZER;
++        sys->out_port_cb_fn = slice_output_port_cb;
++    }
++    else if (use_isp) {
++        sys->resizer_type = FILTER_RESIZER_ISP;
++        sys->is_sliced = false;  // Copy directly into filter picture
++        sys->component_name = MMAL_COMPONENT_ISP_RESIZER;
++        sys->out_port_cb_fn = conv_output_port_cb;
+     } else {
+-        mmal_buffer_header_release(buffer);
++        sys->resizer_type = FILTER_RESIZER_HVS;
++        sys->is_sliced = false;  // Copy directly into filter picture
++        sys->component_name = MMAL_COMPONENT_HVS;
++        sys->out_port_cb_fn = conv_output_port_cb;
++    }
++    sys->is_cma = is_cma_buf_pic_chroma(p_filter->fmt_out.video.i_chroma);
++
++    status = mmal_component_create(sys->component_name, &sys->component);
++    if (status != MMAL_SUCCESS) {
++        if (!use_isp && !use_resizer) {
++            msg_Warn(p_filter, "Failed to rcreate HVS resizer - retrying with ISP");
++            CloseConverter(obj);
++            use_isp = true;
++            goto retry;
++        }
++        msg_Err(p_filter, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
++                MMAL_COMPONENT_DEFAULT_VIDEO_DECODER, status, mmal_status_to_string(status));
++        goto fail;
+     }
++    sys->output = sys->component->output[0];
++    sys->input  = sys->component->input[0];
++
++    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)p_filter;
++    status = mmal_port_enable(sys->component->control, conv_control_port_cb);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(p_filter, "Failed to enable control port %s (status=%"PRIx32" %s)",
++                sys->component->control->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    if (sys->needs_copy_in &&
++        (sys->cma_in_pool = cma_buf_pool_new(2, 2, true, "conv-copy-in")) == NULL)
++    {
++        msg_Err(p_filter, "Failed to allocate input CMA pool");
++        goto fail;
++    }
++
++    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)p_filter;
++    sys->input->format->type = MMAL_ES_TYPE_VIDEO;
++    sys->input->format->encoding = enc_in;
++    sys->input->format->encoding_variant = MMAL_ENCODING_I420;
++    hw_mmal_vlc_fmt_to_mmal_fmt(sys->input->format, &p_filter->fmt_in.video);
++    port_parameter_set_bool(sys->input, MMAL_PARAMETER_ZERO_COPY, 1);
++
++    mmal_log_dump_format(sys->input->format);
++
++    status = mmal_port_format_commit(sys->input);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(p_filter, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
++                sys->input->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++    sys->input->buffer_size = sys->input->buffer_size_recommended;
++    sys->input->buffer_num = NUM_DECODER_BUFFER_HEADERS;
++
++    if ((status = conv_enable_in(p_filter, sys)) != MMAL_SUCCESS)
++        goto fail;
++
++    port_parameter_set_bool(sys->output, MMAL_PARAMETER_ZERO_COPY, sys->is_sliced || sys->is_cma);
++
++    status = mmal_component_enable(sys->component);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(p_filter, "Failed to enable component %s (status=%"PRIx32" %s)",
++                sys->component->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    if ((sys->in_pool = mmal_pool_create(sys->input->buffer_num, 0)) == NULL)
++    {
++        msg_Err(p_filter, "Failed to create input pool");
++        goto fail;
++    }
++
++    if (sys->resizer_type == FILTER_RESIZER_HVS)
++    {
++        unsigned int i;
++        for (i = 0; i != SUBS_MAX; ++i) {
++            if (hw_mmal_subpic_open(VLC_OBJECT(p_filter), sys->subs + i, sys->component->input[i + 1], -1, i + 1) != MMAL_SUCCESS)
++            {
++                msg_Err(p_filter, "Failed to open subpic %d", i);
++                goto fail;
++            }
++        }
++    }
++
++    p_filter->pf_video_filter = conv_filter;
++    p_filter->pf_flush = conv_flush;
++    // video_drain NIF in filter structure
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, ">>> %s: ok", __func__);
++#endif
++
++    return VLC_SUCCESS;
++
++fail:
++    CloseConverter(obj);
++
++    if (!use_resizer && status == MMAL_ENOMEM) {
++        use_resizer = true;
++        msg_Warn(p_filter, "Lack of memory to use HVS/ISP: trying resizer");
++        goto retry;
++    }
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, ">>> %s: FAIL: %d", __func__, ret);
++#endif
++    return ret;
++}
++
++#if OPT_TO_FROM_ZC
++//----------------------------------------------------------------------------
++//
++// Simple copy in to ZC
++
++typedef struct to_zc_sys_s {
++    vcsm_init_type_t vcsm_init_type;
++    cma_buf_pool_t * cma_out_pool;
++} to_zc_sys_t;
++
++
++static size_t buf_alloc_size(const vlc_fourcc_t i_chroma, const unsigned int width, const unsigned int height)
++{
++    const unsigned int pels = width * height;
++
++    switch (i_chroma)
++    {
++        case VLC_CODEC_MMAL_ZC_RGB32:
++            return pels * 4;
++        case VLC_CODEC_MMAL_ZC_I420:
++            return pels * 3 / 2;
++        default:
++            break;
++    }
++    return 0;
++}
++
++
++static picture_t *
++to_zc_filter(filter_t *p_filter, picture_t *in_pic)
++{
++    to_zc_sys_t * const sys = (to_zc_sys_t *)p_filter->p_sys;
++#if TRACE_ALL
++    msg_Dbg(p_filter, "<<< %s", __func__);
++#endif
++
++    assert(p_filter->fmt_out.video.i_chroma == VLC_CODEC_MMAL_ZC_I420);
++
++    picture_t * const out_pic = filter_NewPicture(p_filter);
++    if (out_pic == NULL)
++        goto fail0;
++
++    MMAL_ES_SPECIFIC_FORMAT_T mm_vfmt = {.video={0}};
++    MMAL_ES_FORMAT_T mm_esfmt = {
++        .encoding = vlc_to_mmal_video_fourcc(&p_filter->fmt_out.video),
++        .es = &mm_vfmt};
++
++    hw_mmal_vlc_fmt_to_mmal_fmt(&mm_esfmt, &p_filter->fmt_out.video);
++
++    const size_t buf_alloc = buf_alloc_size(p_filter->fmt_out.video.i_chroma,
++                                            mm_vfmt.video.width, mm_vfmt.video.height);
++    if (buf_alloc == 0)
++        goto fail1;
++    cma_buf_t *const cb = cma_buf_pool_alloc_buf(sys->cma_out_pool, buf_alloc);
++    if (cb == NULL)
++        goto fail1;
++
++    if (cma_buf_pic_attach(cb, out_pic) != VLC_SUCCESS)
++        goto fail2;
++    cma_pic_set_data(out_pic, &mm_esfmt, NULL);
++
++    hw_mmal_copy_pic_to_buf(cma_buf_addr(cb), NULL, &mm_esfmt, in_pic);
++
++    // Copy pic properties
++    out_pic->date              = in_pic->date;
++    out_pic->b_force           = in_pic->b_force;
++    out_pic->b_progressive     = in_pic->b_progressive;
++    out_pic->b_top_field_first = in_pic->b_top_field_first;
++    out_pic->i_nb_fields       = in_pic->i_nb_fields;
++
++    picture_Release(in_pic);
++
++    return out_pic;
++
++fail2:
++    cma_buf_unref(cb);
++fail1:
++    picture_Release(out_pic);
++fail0:
++    picture_Release(in_pic);
++    return NULL;
++}
++
++static void to_zc_flush(filter_t * p_filter)
++{
++    VLC_UNUSED(p_filter);
+ }
++
++static void CloseConverterToZc(vlc_object_t * obj)
++{
++    filter_t * const p_filter = (filter_t *)obj;
++    to_zc_sys_t * const sys = (to_zc_sys_t *)p_filter->p_sys;
++
++    if (sys == NULL)
++        return;
++
++    p_filter->p_sys = NULL;
++
++    cma_buf_pool_deletez(&sys->cma_out_pool);
++    cma_vcsm_exit(sys->vcsm_init_type);
++
++    free(sys);
++}
++
++static bool to_zc_validate_fmt(const video_format_t * const f_in, const video_format_t * const f_out)
++{
++    if (!((f_in->i_chroma == VLC_CODEC_I420 || f_in->i_chroma == VLC_CODEC_I420_10L) &&
++          f_out->i_chroma == VLC_CODEC_MMAL_ZC_I420))
++    {
++        return false;
++    }
++    if (f_in->i_height != f_out->i_height ||
++        f_in->i_width  != f_out->i_width)
++    {
++        return false;
++    }
++
++    return true;
++}
++
++static int OpenConverterToZc(vlc_object_t * obj)
++{
++    int ret = VLC_EGENERIC;
++    filter_t * const p_filter = (filter_t *)obj;
++
++    if (!to_zc_validate_fmt(&p_filter->fmt_in.video, &p_filter->fmt_out.video))
++        goto fail;
++
++    {
++        char dbuf0[5], dbuf1[5];
++        msg_Dbg(p_filter, "%s: %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d->%s,%dx%d [(%d,%d) %dx%d] rgb:%#x:%#x:%#x sar:%d/%d", __func__,
++                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma),
++                p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
++                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
++                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
++                p_filter->fmt_in.video.i_sar_num, p_filter->fmt_in.video.i_sar_den,
++                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma),
++                p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
++                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
++                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height,
++                p_filter->fmt_out.video.i_rmask, p_filter->fmt_out.video.i_gmask, p_filter->fmt_out.video.i_bmask,
++                p_filter->fmt_out.video.i_sar_num, p_filter->fmt_out.video.i_sar_den);
++    }
++
++    to_zc_sys_t * const sys = calloc(1, sizeof(*sys));
++    if (!sys) {
++        ret = VLC_ENOMEM;
++        goto fail;
++    }
++    p_filter->p_sys = (filter_sys_t *)sys;
++
++    if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
++        msg_Err(p_filter, "VCSM init failed");
++        goto fail;
++    }
++
++    if ((sys->cma_out_pool = cma_buf_pool_new(5, 5, true, "conv-to-zc")) == NULL)
++    {
++        msg_Err(p_filter, "Failed to allocate input CMA pool");
++        goto fail;
++    }
++
++    p_filter->pf_video_filter = to_zc_filter;
++    p_filter->pf_flush = to_zc_flush;
++    return VLC_SUCCESS;
++
++fail:
++    CloseConverterToZc(obj);
++    return ret;
++}
++
++//----------------------------------------------------------------------------
++//
++// Simple "copy" from ZC
++
++static void CloseConverterFromZc(vlc_object_t * obj)
++{
++    VLC_UNUSED(obj);
++}
++
++static int OpenConverterFromZc(vlc_object_t * obj)
++{
++    return VLC_EGENERIC;
++}
++#endif
++//----------------------------------------------------------------------------
++
++typedef struct blend_sys_s {
++    vzc_pool_ctl_t * vzc;
++    const picture_t * last_dst;  // Not a ref, just a hint that we have a new pic
++    vcsm_init_type_t vcsm_init_type;
++} blend_sys_t;
++
++static void FilterBlendMmal(filter_t *p_filter,
++                  picture_t *dst, const picture_t * src,
++                  int x_offset, int y_offset, int alpha)
++{
++    blend_sys_t * const sys = (blend_sys_t *)p_filter->p_sys;
++#if TRACE_ALL
++    msg_Dbg(p_filter, "%s (%d,%d:%d) pic=%p, pts=%lld, force=%d", __func__, x_offset, y_offset, alpha, src, src->date, src->b_force);
++#endif
++    // If nothing to do then do nothing
++    if (alpha == 0 ||
++        src->format.i_visible_height == 0 ||
++        src->format.i_visible_width == 0)
++    {
++        return;
++    }
++
++    if (dst->context == NULL)
++        msg_Err(p_filter, "MMAL pic missing context");
++    else
++    {
++        // cast away src const so we can ref it
++        MMAL_BUFFER_HEADER_T *buf = hw_mmal_vzc_buf_from_pic(sys->vzc, (picture_t *)src,
++                                                             vis_mmal_rect(&dst->format),
++                                                             x_offset, y_offset,
++                                                             alpha,
++                                                             dst != sys->last_dst || !hw_mmal_pic_has_sub_bufs(dst));
++        if (buf == NULL) {
++            msg_Err(p_filter, "Failed to allocate vzc buffer for subpic");
++            return;
++        }
++
++        hw_mmal_pic_sub_buf_add(dst, buf);
++
++        sys->last_dst = dst;
++    }
++}
++
++static void FlushBlendMmal(filter_t * p_filter)
++{
++    blend_sys_t * const sys = (blend_sys_t *)p_filter->p_sys;
++    sys->last_dst = NULL;
++    hw_mmal_vzc_pool_flush(sys->vzc);
++}
++
++static void CloseBlendMmal(vlc_object_t *object)
++{
++    filter_t * const p_filter = (filter_t *)object;
++    blend_sys_t * const sys = (blend_sys_t *)p_filter->p_sys;
++
++    if (sys != NULL) {
++        p_filter->p_sys = NULL;
++
++        hw_mmal_vzc_pool_release(sys->vzc);
++        cma_vcsm_exit(sys->vcsm_init_type);
++        free(sys);
++    }
++}
++
++static int OpenBlendMmal(vlc_object_t *object)
++{
++    filter_t * const p_filter = (filter_t *)object;
++    const vlc_fourcc_t vfcc_dst = p_filter->fmt_out.video.i_chroma;
++
++    if (!hw_mmal_chroma_is_mmal(vfcc_dst) ||
++        !hw_mmal_vzc_subpic_fmt_valid(&p_filter->fmt_in.video))
++    {
++        return VLC_EGENERIC;
++    }
++
++    {
++        char dbuf0[5], dbuf1[5];
++        msg_Dbg(p_filter, "%s: (%s) %s,%dx%d [(%d,%d) %dx%d]->%s,%dx%d [(%d,%d) %dx%d]", __func__,
++                "blend",
++                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma), p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
++                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
++                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
++                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma), p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
++                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
++                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height);
++    }
++
++    {
++        blend_sys_t * const sys = calloc(1, sizeof (*sys));
++        if (sys == NULL)
++            return VLC_ENOMEM;
++
++        p_filter->p_sys = (filter_sys_t *)sys;
++
++        if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
++            msg_Err(p_filter, "VCSM init failed");
++            goto fail;
++        }
++
++        if ((sys->vzc = hw_mmal_vzc_pool_new()) == NULL)
++            goto fail;
++    }
++
++    p_filter->pf_video_blend = FilterBlendMmal;
++    p_filter->pf_flush = FlushBlendMmal;
++
++    return VLC_SUCCESS;
++
++fail:
++    CloseBlendMmal(VLC_OBJECT(p_filter));
++    return VLC_ENOMEM;
++}
++
++// ---------------------------------------------------------------------------
++
++static void FilterBlendNeon(filter_t *p_filter,
++                  picture_t *dst_pic, const picture_t * src_pic,
++                  int x_offset, int y_offset, int alpha)
++{
++    const uint8_t * s_data;
++    uint8_t * d_data;
++    int width = src_pic->format.i_visible_width;
++    int height = src_pic->format.i_visible_height;
++    blend_neon_fn *const blend_fn = (blend_neon_fn * )p_filter->p_sys;
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, "%s (%d,%d:%d) pic=%p, pts=%lld, force=%d", __func__, x_offset, y_offset, alpha, src_pic, src_pic->date, src_pic->b_force);
++#endif
++
++    if (alpha == 0 ||
++        src_pic->format.i_visible_height == 0 ||
++        src_pic->format.i_visible_width == 0)
++    {
++        return;
++    }
++
++    x_offset += dst_pic->format.i_x_offset;
++    y_offset += dst_pic->format.i_y_offset;
++
++    // Deal with R/B overrun
++    if (x_offset + width >= (int)(dst_pic->format.i_x_offset + dst_pic->format.i_visible_width))
++        width = dst_pic->format.i_x_offset + dst_pic->format.i_visible_width - x_offset;
++    if (y_offset + height >= (int)(dst_pic->format.i_y_offset + dst_pic->format.i_visible_height))
++        height = dst_pic->format.i_y_offset + dst_pic->format.i_visible_height - y_offset;
++
++    if (width <= 0 || height <= 0) {
++        return;
++    }
++
++    // *** L/U overrun
++
++    s_data = src_pic->p[0].p_pixels +
++        src_pic->p[0].i_pixel_pitch * src_pic->format.i_x_offset +
++        src_pic->p[0].i_pitch * src_pic->format.i_y_offset;
++    d_data = dst_pic->p[0].p_pixels +
++        dst_pic->p[0].i_pixel_pitch * x_offset +
++        dst_pic->p[0].i_pitch * y_offset;
++
++
++    do {
++        blend_fn(d_data, s_data, alpha, width);
++        s_data += src_pic->p[0].i_pitch;
++        d_data += dst_pic->p[0].i_pitch;
++    } while (--height > 0);
++}
++
++static void CloseBlendNeon(vlc_object_t *object)
++{
++    VLC_UNUSED(object);
++}
++
++static int OpenBlendNeon(vlc_object_t *object)
++{
++    filter_t * const p_filter = (filter_t *)object;
++    const vlc_fourcc_t vfcc_dst = p_filter->fmt_out.video.i_chroma;
++    MMAL_FOURCC_T mfcc_src = vlc_to_mmal_video_fourcc(&p_filter->fmt_in.video);
++    MMAL_FOURCC_T mfcc_dst = vlc_to_mmal_video_fourcc(&p_filter->fmt_out.video);
++    blend_neon_fn * blend_fn = (blend_neon_fn *)0;
++
++    // Non-alpha RGB only for dest
++    if (vfcc_dst != VLC_CODEC_RGB32)
++        return VLC_EGENERIC;
++
++    // Check we have appropriate blend fn (mmal doesn't have a non-alpha RGB32)
++    switch (mfcc_src) {
++    case MMAL_ENCODING_RGBA:
++        if (mfcc_dst == MMAL_ENCODING_RGBA)
++            blend_fn = blend_rgbx_rgba_neon;
++        else if (mfcc_dst == MMAL_ENCODING_BGRA)
++            blend_fn = blend_bgrx_rgba_neon;
++        break;
++
++    case MMAL_ENCODING_BGRA:
++        if (mfcc_dst == MMAL_ENCODING_BGRA)
++            blend_fn = blend_rgbx_rgba_neon;
++        else if (mfcc_dst == MMAL_ENCODING_RGBA)
++            blend_fn = blend_bgrx_rgba_neon;
++        break;
++
++    default:
++        break;
++    }
++
++    if (blend_fn == (blend_neon_fn *)0)
++    {
++        return VLC_EGENERIC;
++    }
++
++    p_filter->p_sys = (void *)blend_fn;
++    p_filter->pf_video_blend = FilterBlendNeon;
++
++    {
++        char dbuf0[5], dbuf1[5];
++        char dbuf0a[5], dbuf1a[5];
++        msg_Dbg(p_filter, "%s: (%s) %s/%s,%dx%d [(%d,%d) %dx%d]->%s/%s,%dx%d [(%d,%d) %dx%d]", __func__,
++                "blend",
++                str_fourcc(dbuf0, p_filter->fmt_in.video.i_chroma),
++                str_fourcc(dbuf0a, mfcc_src),
++                p_filter->fmt_in.video.i_width, p_filter->fmt_in.video.i_height,
++                p_filter->fmt_in.video.i_x_offset, p_filter->fmt_in.video.i_y_offset,
++                p_filter->fmt_in.video.i_visible_width, p_filter->fmt_in.video.i_visible_height,
++                str_fourcc(dbuf1, p_filter->fmt_out.video.i_chroma),
++                str_fourcc(dbuf1a, mfcc_dst),
++                p_filter->fmt_out.video.i_width, p_filter->fmt_out.video.i_height,
++                p_filter->fmt_out.video.i_x_offset, p_filter->fmt_out.video.i_y_offset,
++                p_filter->fmt_out.video.i_visible_width, p_filter->fmt_out.video.i_visible_height);
++    }
++
++    return VLC_SUCCESS;
++}
++
++vlc_module_begin()
++    set_category( CAT_INPUT )
++    set_subcategory( SUBCAT_INPUT_VCODEC )
++    set_shortname(N_("MMAL decoder"))
++    set_description(N_("MMAL-based decoder plugin for Raspberry Pi"))
++    set_capability("video decoder", 90)
++    add_shortcut("mmal_decoder")
++    add_bool(MMAL_OPAQUE_NAME, true, MMAL_OPAQUE_TEXT, MMAL_OPAQUE_LONGTEXT, false)
++    set_callbacks(OpenDecoder, CloseDecoder)
++
++    add_submodule()
++    set_category( CAT_VIDEO )
++    set_subcategory( SUBCAT_VIDEO_VFILTER )
++    set_shortname(N_("MMAL resizer"))
++    set_description(N_("MMAL resizing conversion filter"))
++    add_shortcut("mmal_converter")
++    set_capability( "video converter", 900 )
++    add_bool(MMAL_RESIZE_NAME, /* default */ false, MMAL_RESIZE_TEXT, MMAL_RESIZE_LONGTEXT, /* advanced option */ false)
++    add_bool(MMAL_ISP_NAME, /* default */ false, MMAL_ISP_TEXT, MMAL_ISP_LONGTEXT, /* advanced option */ false)
++    set_callbacks(OpenConverter, CloseConverter)
++
++#if OPT_TO_FROM_ZC
++    add_submodule()
++    set_category( CAT_VIDEO )
++    set_subcategory( SUBCAT_VIDEO_VFILTER )
++    set_shortname(N_("MMAL to ZC"))
++    set_description(N_("MMAL conversion to ZC filter"))
++    add_shortcut("mmal_to_zc")
++    set_capability( "video converter", 901 )
++    set_callbacks(OpenConverterToZc, CloseConverterToZc)
++
++    add_submodule()
++    set_category( CAT_VIDEO )
++    set_subcategory( SUBCAT_VIDEO_VFILTER )
++    set_shortname(N_("MMAL from ZC"))
++    set_description(N_("MMAL conversion from ZC filter"))
++    add_shortcut("mmal_from_zc")
++    set_capability( "video converter", 902 )
++    set_callbacks(OpenConverterFromZc, CloseConverterFromZc)
++#endif
++
++    add_submodule()
++    set_category( CAT_VIDEO )
++    set_subcategory( SUBCAT_VIDEO_VFILTER )
++    set_description(N_("Video pictures blending for MMAL"))
++    add_shortcut("mmal_blend")
++    set_capability("video blending", 120)
++    set_callbacks(OpenBlendMmal, CloseBlendMmal)
++
++    add_submodule()
++    set_category( CAT_VIDEO )
++    set_subcategory( SUBCAT_VIDEO_VFILTER )
++    set_description(N_("Video pictures blending for neon"))
++    add_shortcut("neon_blend")
++    set_capability("video blending", 110)
++    set_callbacks(OpenBlendNeon, CloseBlendNeon)
++
++vlc_module_end()
++
++
+--- /dev/null
++++ b/modules/hw/mmal/converter_mmal.c
+@@ -0,0 +1,479 @@
++#ifdef HAVE_CONFIG_H
++# include "config.h"
++#endif
++
++#include <unistd.h>
++#include <fcntl.h>
++#include <sys/ioctl.h>
++#include <sys/mman.h>
++
++#include <interface/vcsm/user-vcsm.h>
++
++#include <vlc_common.h>
++#include <vlc_picture.h>
++
++#include <libdrm/drm_fourcc.h>
++#include <EGL/egl.h>
++#include <EGL/eglext.h>
++#include <GLES2/gl2.h>
++#include <GLES2/gl2ext.h>
++
++#include "mmal_cma.h"
++
++#include "../../video_output/opengl/converter.h"
++
++#include "mmal_picture.h"
++
++#include <assert.h>
++
++#define TRACE_ALL 0
++
++typedef struct mmal_gl_converter_s
++{
++    EGLint drm_fourcc;
++    vcsm_init_type_t vcsm_init_type;
++    cma_buf_t * last_cb;
++
++    PFNGLEGLIMAGETARGETTEXTURE2DOESPROC glEGLImageTargetTexture2DOES;
++} mmal_gl_converter_t;
++
++
++static EGLint vlc_to_gl_fourcc(const video_format_t * const fmt)
++{
++    // Converting to mmal selects the right RGB32 varient
++    switch(vlc_to_mmal_video_fourcc(fmt))
++    {
++       case MMAL_ENCODING_I420:
++          return MMAL_FOURCC('Y','U','1','2');
++       case MMAL_ENCODING_YV12:
++          return MMAL_FOURCC('Y','V','1','2');
++       case MMAL_ENCODING_I422:
++          return MMAL_FOURCC('Y','U','1','6');
++//       case MMAL_ENCODING_YUVUV128:  // Doesn't actually work yet
++       case MMAL_ENCODING_NV12:
++          return MMAL_FOURCC('N','V','1','2');
++       case MMAL_ENCODING_NV21:
++          return MMAL_FOURCC('N','V','2','1');
++       case MMAL_ENCODING_RGB16:
++          return MMAL_FOURCC('R','G','1','6');
++       case MMAL_ENCODING_RGB24:
++          return MMAL_FOURCC('B','G','2','4');
++       case MMAL_ENCODING_BGR24:
++          return MMAL_FOURCC('R','G','2','4');
++       case MMAL_ENCODING_BGR32:
++       case MMAL_ENCODING_BGRA:
++          return MMAL_FOURCC('X','R','2','4');
++       case MMAL_ENCODING_RGB32:
++       case MMAL_ENCODING_RGBA:
++          return MMAL_FOURCC('X','B','2','4');
++       default:
++          break;
++    }
++    return 0;
++}
++
++typedef struct tex_context_s {
++    picture_context_t cmn;
++    GLuint texture;
++
++    PFNGLDELETETEXTURESPROC DeleteTextures;  // Copy fn pointer so we don't need tc on delete
++} tex_context_t;
++
++static void tex_context_delete(tex_context_t * const tex)
++{
++    tex->DeleteTextures(1, &tex->texture);
++    free(tex);
++}
++
++static void tex_context_destroy(picture_context_t * pic_ctx)
++{
++    tex_context_delete((tex_context_t *)pic_ctx);
++}
++
++static picture_context_t * tex_context_copy(picture_context_t * pic_ctx)
++{
++    return pic_ctx;
++}
++
++static tex_context_t * get_tex_context(const opengl_tex_converter_t * const tc, picture_t * const pic, cma_buf_t * const cb)
++{
++    mmal_gl_converter_t * const sys = tc->priv;
++    tex_context_t * tex = (tex_context_t *)cma_buf_context2(cb);
++    if (tex != NULL)
++        return tex;
++
++    if ((tex = malloc(sizeof(*tex))) == NULL)
++        return NULL;
++
++    *tex = (tex_context_t){
++        .cmn = {
++            .destroy = tex_context_destroy,
++            .copy = tex_context_copy
++        },
++        .texture = 0,
++        .DeleteTextures = tc->vt->DeleteTextures
++    };
++
++    {
++        EGLint attribs[30];
++        EGLint * a = attribs;
++        const int fd = cma_buf_fd(cb);
++        uint8_t * base_addr = cma_buf_addr(cb);
++
++        if (pic->i_planes >= 4 || pic->i_planes <= 0)
++        {
++            msg_Err(tc, "%s: Bad planes: %d", __func__, pic->i_planes);
++            goto fail;
++        }
++
++        *a++ = EGL_WIDTH;
++        *a++ = pic->format.i_visible_width;
++        *a++ = EGL_HEIGHT;
++        *a++ = pic->format.i_visible_height;
++        *a++ = EGL_LINUX_DRM_FOURCC_EXT;
++        *a++ = sys->drm_fourcc;
++
++        if (pic->format.i_chroma == VLC_CODEC_MMAL_ZC_SAND8)
++        {
++            // Sand is its own very special bunny :-(
++            static const EGLint attnames[] = {
++                EGL_DMA_BUF_PLANE0_FD_EXT,
++                EGL_DMA_BUF_PLANE0_OFFSET_EXT,
++                EGL_DMA_BUF_PLANE0_PITCH_EXT,
++                EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
++                EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
++                EGL_DMA_BUF_PLANE1_FD_EXT,
++                EGL_DMA_BUF_PLANE1_OFFSET_EXT,
++                EGL_DMA_BUF_PLANE1_PITCH_EXT,
++                EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
++                EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT
++            };
++
++            const EGLint * n = attnames;
++
++            for (int i = 0; i < pic->i_planes; ++i)
++            {
++                const uint64_t mod = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(pic->p[i].i_pitch >> 7);
++
++                *a++ = *n++;
++                *a++ = fd;
++                *a++ = *n++;
++                *a++ = pic->p[i].p_pixels - base_addr;
++                *a++ = *n++;
++                *a++ = pic->format.i_width;
++                *a++ = *n++;
++                *a++ = (EGLint)(mod >> 32);
++                *a++ = *n++;
++                *a++ = (EGLint)(mod & 0xffffffff);
++            }
++        }
++        else
++        {
++            static const EGLint attnames[] = {
++                EGL_DMA_BUF_PLANE0_FD_EXT,
++                EGL_DMA_BUF_PLANE0_OFFSET_EXT,
++                EGL_DMA_BUF_PLANE0_PITCH_EXT,
++                EGL_DMA_BUF_PLANE1_FD_EXT,
++                EGL_DMA_BUF_PLANE1_OFFSET_EXT,
++                EGL_DMA_BUF_PLANE1_PITCH_EXT,
++                EGL_DMA_BUF_PLANE2_FD_EXT,
++                EGL_DMA_BUF_PLANE2_OFFSET_EXT,
++                EGL_DMA_BUF_PLANE2_PITCH_EXT,
++                EGL_DMA_BUF_PLANE3_FD_EXT,
++                EGL_DMA_BUF_PLANE3_OFFSET_EXT,
++                EGL_DMA_BUF_PLANE3_PITCH_EXT
++            };
++
++            const EGLint * n = attnames;
++
++            for (int i = 0; i < pic->i_planes; ++i)
++            {
++                *a++ = *n++;
++                *a++ = fd;
++                *a++ = *n++;
++                *a++ = pic->p[i].p_pixels - base_addr;
++                *a++ = *n++;
++                *a++ = pic->p[i].i_pitch;
++            }
++        }
++
++        *a = EGL_NONE;
++
++        const EGLImage image = tc->gl->egl.createImageKHR(tc->gl, EGL_LINUX_DMA_BUF_EXT, NULL, attribs);
++        if (!image) {
++           msg_Err(tc, "Failed to import fd %d: Err=%#x", fd, tc->vt->GetError());
++           goto fail;
++        }
++
++        // ** ?? tc->tex_target
++        tc->vt->GenTextures(1, &tex->texture);
++        tc->vt->BindTexture(GL_TEXTURE_EXTERNAL_OES, tex->texture);
++        tc->vt->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
++        tc->vt->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
++        sys->glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
++
++        tc->gl->egl.destroyImageKHR(tc->gl, image);
++    }
++
++    if (cma_buf_add_context2(cb, &tex->cmn) != VLC_SUCCESS)
++    {
++        msg_Err(tc, "%s: add_context2 failed", __func__);
++        goto fail;
++    }
++    return tex;
++
++fail:
++    tex_context_delete(tex);
++    return NULL;
++}
++
++
++static int
++tc_mmal_update(const opengl_tex_converter_t *tc, GLuint *textures,
++                const GLsizei *tex_width, const GLsizei *tex_height,
++                picture_t *pic, const size_t *plane_offset)
++{
++    mmal_gl_converter_t * const sys = tc->priv;
++#if TRACE_ALL
++    {
++        char cbuf[5];
++        msg_Dbg(tc, "%s: %s %d*%dx%d : %d*%dx%d", __func__,
++                str_fourcc(cbuf, pic->format.i_chroma),
++                tc->tex_count, tex_width[0], tex_height[0], pic->i_planes, pic->p[0].i_pitch, pic->p[0].i_lines);
++    }
++#endif
++    VLC_UNUSED(tex_width);
++    VLC_UNUSED(tex_height);
++    VLC_UNUSED(plane_offset);
++
++    if (!is_cma_buf_pic_chroma(pic->format.i_chroma))
++    {
++        char cbuf[5];
++        msg_Err(tc, "Pic with unexpected chroma: %s", str_fourcc(cbuf, pic->format.i_chroma));
++        return VLC_EGENERIC;
++    }
++
++    cma_buf_t * const cb = cma_buf_pic_get(pic);
++    if (cb == NULL)
++    {
++        msg_Err(tc, "Pic missing cma buf");
++        return VLC_EGENERIC;
++    }
++
++    tex_context_t * const tex = get_tex_context(tc, pic, cb);
++    if (tex == NULL)
++        return VLC_EGENERIC;
++
++//    tc->vt->BindTexture(GL_TEXTURE_EXTERNAL_OES, tex->texture);
++
++    cma_buf_unref(sys->last_cb);
++    sys->last_cb = cma_buf_ref(cb);
++
++    textures[0] = tex->texture;
++    return VLC_SUCCESS;
++}
++
++static int
++tc_mmal_fetch_locations(opengl_tex_converter_t *tc, GLuint program)
++{
++    tc->uloc.Texture[0] = tc->vt->GetUniformLocation(program, "Texture0");
++    return tc->uloc.Texture[0] != -1 ? VLC_SUCCESS : VLC_EGENERIC;
++}
++
++static void
++tc_mmal_prepare_shader(const opengl_tex_converter_t *tc,
++                        const GLsizei *tex_width, const GLsizei *tex_height,
++                        float alpha)
++{
++    (void) tex_width; (void) tex_height; (void) alpha;
++    VLC_UNUSED(tc);
++//    tc->vt->Uniform1i(tc->uloc.Texture[0], 0);
++}
++
++static GLuint
++tc_fragment_shader_init(opengl_tex_converter_t * const tc, const GLenum tex_target,
++                        const vlc_fourcc_t chroma, const video_color_space_t yuv_space)
++{
++    VLC_UNUSED(yuv_space);
++
++    tc->tex_count = 1;
++    tc->tex_target = tex_target;
++    tc->texs[0] = (struct opengl_tex_cfg) {
++        { 1, 1 }, { 1, 1 }, GL_RGB, chroma, GL_UNSIGNED_SHORT  //** ??
++    };
++
++    tc->pf_fetch_locations = tc_mmal_fetch_locations;
++    tc->pf_prepare_shader = tc_mmal_prepare_shader;
++
++
++    const char fs[] =
++       "#extension GL_OES_EGL_image_external : enable\n"
++       "precision mediump float;\n"
++       "uniform samplerExternalOES Texture0;\n"
++       "varying vec2 TexCoord0;\n"
++       "void main() {\n"
++       "  gl_FragColor = texture2D(Texture0, TexCoord0);\n"
++       "}\n";
++
++
++    const char *code = fs;
++
++    GLuint fragment_shader = tc->vt->CreateShader(GL_FRAGMENT_SHADER);
++    tc->vt->ShaderSource(fragment_shader, 1, &code, NULL);
++    tc->vt->CompileShader(fragment_shader);
++    return fragment_shader;
++}
++
++
++static void
++CloseGLConverter(vlc_object_t *obj)
++{
++    opengl_tex_converter_t * const tc = (opengl_tex_converter_t *)obj;
++    mmal_gl_converter_t * const sys = tc->priv;
++
++    if (sys == NULL)
++        return;
++
++    cma_buf_unref(sys->last_cb);
++    cma_vcsm_exit(sys->vcsm_init_type);
++    free(sys);
++}
++
++
++// Pick a chroma that we can convert to
++// Prefer I420 as smallest
++static vlc_fourcc_t chroma_in_out(const vlc_fourcc_t chroma_in)
++{
++    switch (chroma_in)
++    {
++        case VLC_CODEC_MMAL_OPAQUE:
++        case VLC_CODEC_MMAL_ZC_I420:
++        case VLC_CODEC_MMAL_ZC_SAND8:
++        case VLC_CODEC_MMAL_ZC_SAND10:          // ISP only
++            return VLC_CODEC_MMAL_ZC_I420;
++        case VLC_CODEC_MMAL_ZC_SAND30:          // HVS only
++        case VLC_CODEC_MMAL_ZC_RGB32:
++            return VLC_CODEC_MMAL_ZC_RGB32;     // HVS can't generate YUV of any sort
++        default:
++            break;
++    }
++    return 0;
++}
++
++
++static int
++OpenGLConverter(vlc_object_t *obj)
++{
++    opengl_tex_converter_t * const tc = (opengl_tex_converter_t *)obj;
++    int rv = VLC_EGENERIC;
++    const EGLint eglfmt = vlc_to_gl_fourcc(&tc->fmt);
++    const vlc_fourcc_t chroma_out = chroma_in_out(tc->fmt.i_chroma);
++
++    // Do we know what to do with this?
++    if (chroma_out == 0)
++        return rv;
++
++    {
++        char dbuf0[5], dbuf1[5], dbuf2[5];
++        msg_Dbg(tc, "<<< %s: V:%s/E:%s,%dx%d [(%d,%d) %d/%d] sar:%d/%d -> %s", __func__,
++                str_fourcc(dbuf0, tc->fmt.i_chroma),
++                str_fourcc(dbuf1, eglfmt),
++                tc->fmt.i_width, tc->fmt.i_height,
++                tc->fmt.i_x_offset, tc->fmt.i_y_offset,
++                tc->fmt.i_visible_width, tc->fmt.i_visible_height,
++                tc->fmt.i_sar_num, tc->fmt.i_sar_den,
++                str_fourcc(dbuf2, chroma_out));
++    }
++
++    if (tc->gl->ext != VLC_GL_EXT_EGL ||
++        !tc->gl->egl.createImageKHR || !tc->gl->egl.destroyImageKHR)
++    {
++        // Missing an important callback
++        msg_Dbg(tc, "Missing EGL xxxImageKHR calls");
++        return rv;
++    }
++
++    if ((tc->priv = calloc(1, sizeof(mmal_gl_converter_t))) == NULL)
++    {
++        msg_Err(tc, "priv alloc failure");
++        rv = VLC_ENOMEM;
++        goto fail;
++    }
++    mmal_gl_converter_t * const sys = tc->priv;
++
++    sys->drm_fourcc = eglfmt;
++
++    if ((sys->vcsm_init_type = cma_vcsm_init()) != VCSM_INIT_CMA) {
++        msg_Dbg(tc, "VCSM init failed");
++        goto fail;
++    }
++
++    if ((sys->glEGLImageTargetTexture2DOES = vlc_gl_GetProcAddress(tc->gl, "glEGLImageTargetTexture2DOES")) == NULL)
++    {
++        msg_Err(tc, "Failed to bind GL fns");
++        goto fail;
++    }
++
++    if ((tc->fshader = tc_fragment_shader_init(tc, GL_TEXTURE_EXTERNAL_OES,
++                                                   eglfmt == 0 ? VLC_CODEC_RGB32 : tc->fmt.i_chroma,
++                                                   eglfmt == 0 ? COLOR_SPACE_SRGB : tc->fmt.space)) == 0)
++    {
++        msg_Err(tc, "Failed to make shader");
++        goto fail;
++    }
++
++    if (eglfmt == 0)
++    {
++        tc->fmt.i_chroma = chroma_out;
++        tc->fmt.i_bits_per_pixel = 8;
++        if (tc->fmt.i_chroma == VLC_CODEC_MMAL_ZC_RGB32)
++        {
++            tc->fmt.i_rmask = 0xff0000;
++            tc->fmt.i_gmask = 0xff00;
++            tc->fmt.i_bmask = 0xff;
++            tc->fmt.space = COLOR_SPACE_SRGB;
++        }
++        else
++        {
++            tc->fmt.i_rmask = 0;
++            tc->fmt.i_gmask = 0;
++            tc->fmt.i_bmask = 0;
++            tc->fmt.space = COLOR_SPACE_UNDEF;
++        }
++        sys->drm_fourcc = vlc_to_gl_fourcc(&tc->fmt);
++    }
++
++    tc->handle_texs_gen = true;  // We manage the texs
++    tc->pf_update  = tc_mmal_update;
++
++#if TRACE_ALL
++    {
++        char dbuf0[5], dbuf1[5], dbuf2[5];
++        msg_Dbg(tc, ">>> %s: V:%s/E:%s,%dx%d [(%d,%d) %d/%d] sar:%d/%d -> %s", __func__,
++                str_fourcc(dbuf0, tc->fmt.i_chroma),
++                str_fourcc(dbuf1, sys->drm_fourcc),
++                tc->fmt.i_width, tc->fmt.i_height,
++                tc->fmt.i_x_offset, tc->fmt.i_y_offset,
++                tc->fmt.i_visible_width, tc->fmt.i_visible_height,
++                tc->fmt.i_sar_num, tc->fmt.i_sar_den,
++                str_fourcc(dbuf2, chroma_out));
++    }
++#endif
++
++    return VLC_SUCCESS;
++
++fail:
++    CloseGLConverter(obj);
++    return rv;
++}
++
++vlc_module_begin ()
++    set_description("MMAL OpenGL surface converter")
++    set_shortname (N_("MMALGLConverter"))
++    set_capability("glconv", 900)
++    set_callbacks(OpenGLConverter, CloseGLConverter)
++    set_category(CAT_VIDEO)
++    set_subcategory(SUBCAT_VIDEO_VOUT)
++    add_shortcut("mmal_gl_converter")
++vlc_module_end ()
++
+--- a/modules/hw/mmal/deinterlace.c
++++ b/modules/hw/mmal/deinterlace.c
+@@ -26,11 +26,12 @@
+ #include "config.h"
+ #endif
+
+-#include <vlc_picture_pool.h>
++#include <stdatomic.h>
++
+ #include <vlc_common.h>
++#include <vlc_picture_pool.h>
+ #include <vlc_plugin.h>
+ #include <vlc_filter.h>
+-#include <vlc_atomic.h>
+
+ #include "mmal_picture.h"
+
+@@ -39,468 +40,814 @@
+ #include <interface/mmal/util/mmal_util.h>
+ #include <interface/mmal/util/mmal_default_components.h>
+
+-#define MIN_NUM_BUFFERS_IN_TRANSIT 2
++#define MMAL_DEINTERLACE_NO_QPU "mmal-deinterlace-no-qpu"
++#define MMAL_DEINTERLACE_NO_QPU_TEXT N_("Do not use QPUs for advanced HD deinterlacing.")
++#define MMAL_DEINTERLACE_NO_QPU_LONGTEXT N_("Do not make use of the QPUs to allow higher quality deinterlacing of HD content.")
+
+-#define MMAL_DEINTERLACE_QPU "mmal-deinterlace-adv-qpu"
+-#define MMAL_DEINTERLACE_QPU_TEXT N_("Use QPUs for advanced HD deinterlacing.")
+-#define MMAL_DEINTERLACE_QPU_LONGTEXT N_("Make use of the QPUs to allow higher quality deinterlacing of HD content.")
++#define MMAL_DEINTERLACE_ADV "mmal-deinterlace-adv"
++#define MMAL_DEINTERLACE_ADV_TEXT N_("Force advanced deinterlace")
++#define MMAL_DEINTERLACE_ADV_LONGTEXT N_("Force advanced deinterlace")
+
+-static int Open(filter_t *filter);
+-static void Close(filter_t *filter);
++#define MMAL_DEINTERLACE_FAST "mmal-deinterlace-fast"
++#define MMAL_DEINTERLACE_FAST_TEXT N_("Force fast deinterlace")
++#define MMAL_DEINTERLACE_FAST_LONGTEXT N_("Force fast deinterlace")
+
+-vlc_module_begin()
+-    set_shortname(N_("MMAL deinterlace"))
+-    set_description(N_("MMAL-based deinterlace filter plugin"))
+-    set_capability("video filter", 0)
+-    set_category(CAT_VIDEO)
+-    set_subcategory(SUBCAT_VIDEO_VFILTER)
+-    set_callbacks(Open, Close)
+-    add_shortcut("deinterlace")
+-    add_bool(MMAL_DEINTERLACE_QPU, false, MMAL_DEINTERLACE_QPU_TEXT,
+-                    MMAL_DEINTERLACE_QPU_LONGTEXT, true);
+-vlc_module_end()
++#define MMAL_DEINTERLACE_NONE "mmal-deinterlace-none"
++#define MMAL_DEINTERLACE_NONE_TEXT N_("Force no deinterlace")
++#define MMAL_DEINTERLACE_NONE_LONGTEXT N_("Force no interlace. Simply strips off the interlace markers and passes the frame straight through. "\
++    "This is the default for > SD if < 96M gpu-mem")
++
++#define MMAL_DEINTERLACE_HALF_RATE "mmal-deinterlace-half-rate"
++#define MMAL_DEINTERLACE_HALF_RATE_TEXT N_("Halve output framerate")
++#define MMAL_DEINTERLACE_HALF_RATE_LONGTEXT N_("Halve output framerate. 1 output frame for each pair of interlaced fields input")
++
++#define MMAL_DEINTERLACE_FULL_RATE "mmal-deinterlace-full-rate"
++#define MMAL_DEINTERLACE_FULL_RATE_TEXT N_("Full output framerate")
++#define MMAL_DEINTERLACE_FULL_RATE_LONGTEXT N_("Full output framerate. 1 output frame for each interlaced field input")
+
+-struct filter_sys_t {
++
++typedef struct filter_sys_t
++{
+     MMAL_COMPONENT_T *component;
+     MMAL_PORT_T *input;
+     MMAL_PORT_T *output;
++    MMAL_POOL_T *in_pool;
++
++    MMAL_QUEUE_T * out_q;
++
++    // Bind this lot somehow into ppr????
++    bool is_cma;
++    cma_buf_pool_t * cma_out_pool;
++    MMAL_POOL_T * out_pool;
++
++    hw_mmal_port_pool_ref_t *out_ppr;
++
++    bool half_rate;
++    bool use_qpu;
++    bool use_fast;
++    bool use_passthrough;
++    unsigned int seq_in;    // Seq of next frame to submit (1-15) [Init=1]
++    unsigned int seq_out;   // Seq of last frame received  (1-15) [Init=15]
+
+-    MMAL_QUEUE_T *filtered_pictures;
+-    vlc_sem_t sem;
++    vcsm_init_type_t vcsm_init_type;
+
+-    atomic_bool started;
++} filter_sys_t;
+
+-    /* statistics */
+-    int output_in_transit;
+-    int input_in_transit;
+-};
+-
+-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
+-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
+-static void output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
+-static picture_t *deinterlace(filter_t *filter, picture_t *picture);
+-static void flush(filter_t *filter);
+
+ #define MMAL_COMPONENT_DEFAULT_DEINTERLACE "vc.ril.image_fx"
+
+-static int Open(filter_t *filter)
++#define TRACE_ALL 0
++
++
++
++// Buffer attached to pic on success, is still valid on failure
++static picture_t * di_alloc_opaque(filter_t * const p_filter, MMAL_BUFFER_HEADER_T * const buf)
+ {
+-    int32_t frame_duration = filter->fmt_in.video.i_frame_rate != 0 ?
+-            (int64_t)1000000 * filter->fmt_in.video.i_frame_rate_base /
+-            filter->fmt_in.video.i_frame_rate : 0;
+-    bool use_qpu = var_InheritBool(filter, MMAL_DEINTERLACE_QPU);
++    filter_sys_t *const filter_sys = p_filter->p_sys;
++    picture_t * const pic = filter_NewPicture(p_filter);
+
+-    MMAL_PARAMETER_IMAGEFX_PARAMETERS_T imfx_param = {
+-            { MMAL_PARAMETER_IMAGE_EFFECT_PARAMETERS, sizeof(imfx_param) },
+-            MMAL_PARAM_IMAGEFX_DEINTERLACE_ADV,
+-            4,
+-            { 3, frame_duration, 0, use_qpu }
+-    };
++    if (pic == NULL)
++        goto fail1;
+
+-    int ret = VLC_SUCCESS;
+-    MMAL_STATUS_T status;
+-    filter_sys_t *sys;
++    if (buf->length == 0) {
++        msg_Err(p_filter, "%s: Empty buffer", __func__);
++        goto fail2;
++    }
+
+-    msg_Dbg(filter, "Try to open mmal_deinterlace filter. frame_duration: %d, QPU %s!",
+-            frame_duration, use_qpu ? "used" : "unused");
++    if ((pic->context = hw_mmal_gen_context(buf, filter_sys->out_ppr)) == NULL)
++        goto fail2;
+
+-    if (filter->fmt_in.video.i_chroma != VLC_CODEC_MMAL_OPAQUE)
+-        return VLC_EGENERIC;
++    buf_to_pic_copy_props(pic, buf);
+
+-    if (filter->fmt_out.video.i_chroma != VLC_CODEC_MMAL_OPAQUE)
+-        return VLC_EGENERIC;
++#if TRACE_ALL
++    msg_Dbg(p_filter, "pic: prog=%d, tff=%d, date=%lld", pic->b_progressive, pic->b_top_field_first, (long long)pic->date);
++#endif
+
+-    sys = calloc(1, sizeof(filter_sys_t));
+-    if (!sys)
+-        return VLC_ENOMEM;
+-    filter->p_sys = sys;
++    return pic;
+
+-    bcm_host_init();
++fail2:
++    picture_Release(pic);
++fail1:
++//    mmal_buffer_header_release(buf);
++    return NULL;
++}
+
+-    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_DEINTERLACE, &sys->component);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
+-                MMAL_COMPONENT_DEFAULT_DEINTERLACE, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
+-    }
++static void di_input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++{
++#if TRACE_ALL
++    pic_ctx_mmal_t * ctx = buffer->user_data;
++//    filter_sys_t *const sys = ((filter_t *)port->userdata)->p_sys;
++
++    msg_Dbg((filter_t *)port->userdata, "<<< %s: cmd=%d, ctx=%p, buf=%p, flags=%#x, pts=%lld", __func__, buffer->cmd, ctx, buffer,
++            buffer->flags, (long long)buffer->pts);
++#else
++    VLC_UNUSED(port);
++#endif
+
+-    status = mmal_port_parameter_set(sys->component->output[0], &imfx_param.hdr);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to configure MMAL component %s (status=%"PRIx32" %s)",
+-                MMAL_COMPONENT_DEFAULT_DEINTERLACE, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
+-    }
++    mmal_buffer_header_release(buffer);
+
+-    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
+-    status = mmal_port_enable(sys->component->control, control_port_cb);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to enable control port %s (status=%"PRIx32" %s)",
+-                sys->component->control->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++#if TRACE_ALL
++    msg_Dbg((filter_t *)port->userdata, ">>> %s", __func__);
++#endif
++}
++
++static void di_output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
++{
++    if (buf->cmd == 0 && buf->length != 0)
++    {
++        // The filter structure etc. should always exist if we have contents
++        // but might not on later flushes as we shut down
++        filter_t * const p_filter = (filter_t *)port->userdata;
++        filter_sys_t * const sys = p_filter->p_sys;
++
++#if TRACE_ALL
++        msg_Dbg(p_filter, "<<< %s: cmd=%d; flags=%#x, pts=%lld", __func__, buf->cmd, buf->flags, (long long) buf->pts);
++#endif
++        mmal_queue_put(sys->out_q, buf);
++#if TRACE_ALL
++        msg_Dbg(p_filter, ">>> %s: out Q len=%d", __func__, mmal_queue_length(sys->out_q));
++#endif
++        return;
+     }
+
+-    sys->input = sys->component->input[0];
+-    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
+-    if (filter->fmt_in.i_codec == VLC_CODEC_MMAL_OPAQUE)
+-        sys->input->format->encoding = MMAL_ENCODING_OPAQUE;
+-    sys->input->format->es->video.width = filter->fmt_in.video.i_width;
+-    sys->input->format->es->video.height = filter->fmt_in.video.i_height;
+-    sys->input->format->es->video.crop.x = 0;
+-    sys->input->format->es->video.crop.y = 0;
+-    sys->input->format->es->video.crop.width = filter->fmt_in.video.i_width;
+-    sys->input->format->es->video.crop.height = filter->fmt_in.video.i_height;
+-    sys->input->format->es->video.par.num = filter->fmt_in.video.i_sar_num;
+-    sys->input->format->es->video.par.den = filter->fmt_in.video.i_sar_den;
++    mmal_buffer_header_reset(buf);   // User data stays intact so release will kill pic
++    mmal_buffer_header_release(buf);
++}
+
+-    es_format_Copy(&filter->fmt_out, &filter->fmt_in);
+-    filter->fmt_out.video.i_frame_rate *= 2;
+
+-    status = mmal_port_format_commit(sys->input);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
+-                        sys->input->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
+-    }
+-    sys->input->buffer_size = sys->input->buffer_size_recommended;
+-    sys->input->buffer_num = sys->input->buffer_num_recommended;
+
+-    if (filter->fmt_in.i_codec == VLC_CODEC_MMAL_OPAQUE) {
+-        MMAL_PARAMETER_BOOLEAN_T zero_copy = {
+-            { MMAL_PARAMETER_ZERO_COPY, sizeof(MMAL_PARAMETER_BOOLEAN_T) },
+-            1
+-        };
++static MMAL_STATUS_T fill_output_from_q(filter_t * const p_filter, filter_sys_t * const sys, MMAL_QUEUE_T * const q)
++{
++    MMAL_BUFFER_HEADER_T * out_buf;
+
+-        status = mmal_port_parameter_set(sys->input, &zero_copy.hdr);
+-        if (status != MMAL_SUCCESS) {
+-           msg_Err(filter, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
+-                    sys->input->name, status, mmal_status_to_string(status));
+-           goto out;
++    while ((out_buf = mmal_queue_get(q)) != NULL)
++    {
++        MMAL_STATUS_T err;
++        if ((err = mmal_port_send_buffer(sys->output, out_buf)) != MMAL_SUCCESS)
++        {
++            msg_Err(p_filter, "Send buffer to output failed");
++            mmal_queue_put_back(q, out_buf);
++            return err;
+         }
+     }
++    return MMAL_SUCCESS;
++}
+
+-    status = mmal_port_enable(sys->input, input_port_cb);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to enable input port %s (status=%"PRIx32" %s)",
+-                sys->input->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
+-    }
++// Output buffers may contain a pic ref on error or flush
++// Free it
++static MMAL_BOOL_T out_buffer_pre_release_cb(MMAL_BUFFER_HEADER_T *header, void *userdata)
++{
++    VLC_UNUSED(userdata);
+
+-    sys->output = sys->component->output[0];
+-    sys->output->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
+-    mmal_format_full_copy(sys->output->format, sys->input->format);
++    cma_buf_t * const cb = header->user_data;
++    header->user_data = NULL;
++    cma_buf_unref(cb);  // Copes fine with NULL
+
+-    status = mmal_port_format_commit(sys->output);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to commit format for output port %s (status=%"PRIx32" %s)",
+-                        sys->input->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++    return MMAL_FALSE;
++}
++
++static inline unsigned int seq_inc(unsigned int x)
++{
++    return x + 1 >= 16 ? 1 : x + 1;
++}
++
++static inline unsigned int seq_delta(unsigned int sseq, unsigned int fseq)
++{
++    return fseq == 0 ? 0 : fseq <= sseq ? sseq - fseq : 15 - (fseq - sseq);
++}
++
++static picture_t *deinterlace(filter_t * p_filter, picture_t * p_pic)
++{
++    filter_sys_t * const sys = p_filter->p_sys;
++    picture_t *ret_pics = NULL;
++    MMAL_STATUS_T err;
++    MMAL_BUFFER_HEADER_T * out_buf = NULL;
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, "<<< %s", __func__);
++#endif
++
++    if (hw_mmal_vlc_pic_to_mmal_fmt_update(sys->input->format, p_pic))
++    {
++        // ****** Breaks on opaque (at least)
++
++        if (sys->input->is_enabled)
++            mmal_port_disable(sys->input);
++#if 0
++        if (sys->output->is_enabled)
++            mmal_port_disable(sys->output);
++
++        mmal_format_full_copy(sys->output->format, sys->input->format);
++        mmal_port_format_commit(sys->output);
++        sys->output->buffer_num = 30;
++        sys->output->buffer_size = sys->input->buffer_size_recommended;
++        mmal_port_enable(sys->output, di_output_port_cb);
++#endif
++        if (mmal_port_format_commit(sys->input) != MMAL_SUCCESS)
++            msg_Err(p_filter, "Failed to update pic format");
++        sys->input->buffer_num = 30;
++        sys->input->buffer_size = sys->input->buffer_size_recommended;
++        mmal_log_dump_format(sys->input->format);
++    }
++
++    // Reenable stuff if the last thing we did was flush
++    // Output should always be enabled
++    if (!sys->input->is_enabled &&
++        (err = mmal_port_enable(sys->input, di_input_port_cb)) != MMAL_SUCCESS)
++    {
++        msg_Err(p_filter, "Input port reenable failed");
++        goto fail;
++    }
++
++    if (!sys->is_cma)
++    {
++        // Fill output from anything that has turned up in pool Q
++        if (hw_mmal_port_pool_ref_fill(sys->out_ppr) != MMAL_SUCCESS)
++        {
++            msg_Err(p_filter, "Out port fill fail");
++            goto fail;
++        }
+     }
++    else
++    {
++        // We are expecting one in - one out so simply wedge a new bufer
++        // into the output port.  Flow control will happen on cma alloc.
++
++        if ((out_buf = mmal_queue_get(sys->out_pool->queue)) == NULL)
++        {
++            // Should never happen
++            msg_Err(p_filter, "Failed to get output buffer");
++            goto fail;
++        }
++        mmal_buffer_header_reset(out_buf);
+
+-    sys->output->buffer_num = 3;
++        // Attach cma_buf to the buffer & ensure it is freed when the buffer is released
++        // On a good send callback the pic will be extracted to avoid this
++        mmal_buffer_header_pre_release_cb_set(out_buf, out_buffer_pre_release_cb, p_filter);
++
++        cma_buf_t * const cb = cma_buf_pool_alloc_buf(sys->cma_out_pool, sys->output->buffer_size);
++        if ((out_buf->user_data = cb) == NULL)  // Check & attach cb to buf
++        {
++            char dbuf0[5];
++            msg_Err(p_filter, "Failed to alloc CMA buf: fmt=%s, size=%d",
++                    str_fourcc(dbuf0, p_pic->format.i_chroma),
++                    sys->output->buffer_size);
++            goto fail;
++        }
++        const unsigned int vc_h = cma_buf_vc_handle(cb);  // Cannot coerce without going via variable
++        out_buf->data = (uint8_t *)vc_h;
++        out_buf->alloc_size = sys->output->buffer_size;
++
++#if TRACE_ALL
++        msg_Dbg(p_filter, "Out buf send: pic=%p, data=%p, user=%p, flags=%#x, len=%d/%d, pts=%lld",
++                p_pic, out_buf->data, out_buf->user_data, out_buf->flags,
++                out_buf->length, out_buf->alloc_size, (long long)out_buf->pts);
++#endif
+
+-    if (filter->fmt_in.i_codec == VLC_CODEC_MMAL_OPAQUE) {
+-        MMAL_PARAMETER_UINT32_T extra_buffers = {
+-            { MMAL_PARAMETER_EXTRA_BUFFERS, sizeof(MMAL_PARAMETER_UINT32_T) },
+-            5
+-        };
+-        status = mmal_port_parameter_set(sys->output, &extra_buffers.hdr);
+-        if (status != MMAL_SUCCESS) {
+-            msg_Err(filter, "Failed to set MMAL_PARAMETER_EXTRA_BUFFERS on output port (status=%"PRIx32" %s)",
+-                    status, mmal_status_to_string(status));
+-            goto out;
++        if ((err = mmal_port_send_buffer(sys->output, out_buf)) != MMAL_SUCCESS)
++        {
++            msg_Err(p_filter, "Send buffer to output failed");
++            goto fail;
+         }
++        out_buf = NULL;
++    }
+
+-        MMAL_PARAMETER_BOOLEAN_T zero_copy = {
+-            { MMAL_PARAMETER_ZERO_COPY, sizeof(MMAL_PARAMETER_BOOLEAN_T) },
+-            1
+-        };
++    // Stuff into input
++    // We assume the BH is already set up with values reflecting pic date etc.
++    {
++        MMAL_BUFFER_HEADER_T * const pic_buf = hw_mmal_pic_buf_replicated(p_pic, sys->in_pool);
++
++        if (pic_buf == NULL)
++        {
++            msg_Err(p_filter, "Pic has not attached buffer");
++            goto fail;
++        }
+
+-        status = mmal_port_parameter_set(sys->output, &zero_copy.hdr);
+-        if (status != MMAL_SUCCESS) {
+-           msg_Err(filter, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
+-                    sys->output->name, status, mmal_status_to_string(status));
+-           goto out;
++        picture_Release(p_pic);
++
++        // Add a sequence to the flags so we can track what we have actually
++        // deinterlaced
++        pic_buf->flags = (pic_buf->flags & ~(0xfU * MMAL_BUFFER_HEADER_FLAG_USER0)) | (sys->seq_in * (MMAL_BUFFER_HEADER_FLAG_USER0));
++        sys->seq_in = seq_inc(sys->seq_in);
++
++        if ((err = mmal_port_send_buffer(sys->input, pic_buf)) != MMAL_SUCCESS)
++        {
++            msg_Err(p_filter, "Send buffer to input failed");
++            mmal_buffer_header_release(pic_buf);
++            goto fail;
+         }
+     }
+
+-    status = mmal_port_enable(sys->output, output_port_cb);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to enable output port %s (status=%"PRIx32" %s)",
+-                sys->output->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++    // Return anything that is in the out Q
++    {
++        picture_t ** pp_pic = &ret_pics;
++
++        // Advanced di has a 3 frame latency, so if the seq delta is greater
++        // than that then we are expecting at least two frames of output. Wait
++        // for one of those.
++        // seq_in is seq of the next frame we are going to submit (1-15, no 0)
++        // seq_out is last frame we removed from Q
++        // So after 4 frames sent (1st time we want to wait), 0 rx seq_in=5, seq_out=15, delta=5
++
++        while ((out_buf = (seq_delta(sys->seq_in, sys->seq_out) >= 5 ? mmal_queue_timedwait(sys->out_q, 1000) : mmal_queue_get(sys->out_q))) != NULL)
++        {
++            const unsigned int seq_out = (out_buf->flags / MMAL_BUFFER_HEADER_FLAG_USER0) & 0xf;
++            int rv;
++
++            picture_t * out_pic;
++
++            if (sys->is_cma)
++            {
++                // Alloc pic
++                if ((out_pic = filter_NewPicture(p_filter)) == NULL)
++                {
++                    // Can't alloc pic - just stop extraction
++                    mmal_queue_put_back(sys->out_q, out_buf);
++                    out_buf = NULL;
++                    msg_Warn(p_filter, "Failed to alloc new filter output pic");
++                    break;
++                }
++
++                // Extract cma_buf from buf & attach to pic
++                cma_buf_t * const cb = (cma_buf_t *)out_buf->user_data;
++                if ((rv = cma_buf_pic_attach(cb, out_pic)) != VLC_SUCCESS)
++                {
++                    char dbuf0[5];
++                    msg_Err(p_filter, "Failed to attach CMA to pic: fmt=%s err=%d",
++                            str_fourcc(dbuf0, out_pic->format.i_chroma),
++                            rv);
++                    // cb still attached to buffer and will be freed with it
++                    goto fail;
++                }
++                out_buf->user_data = NULL;
++
++                buf_to_pic_copy_props(out_pic, out_buf);
++
++                // Set pic data pointers from buf aux info now it has it
++                if ((rv = cma_pic_set_data(out_pic, sys->output->format, out_buf)) != VLC_SUCCESS)
++                {
++                    char dbuf0[5];
++                    msg_Err(p_filter, "Failed to set data: fmt=%s, rv=%d",
++                            str_fourcc(dbuf0, sys->output->format->encoding),
++                            rv);
++                }
++
++                out_buf->user_data = NULL;  // Responsability for this pic no longer with buffer
++                mmal_buffer_header_release(out_buf);
++            }
++            else
++            {
++                out_pic = di_alloc_opaque(p_filter, out_buf);
++
++                if (out_pic == NULL) {
++                    msg_Warn(p_filter, "Failed to alloc new filter output pic");
++                    mmal_queue_put_back(sys->out_q, out_buf);  // Wedge buf back into Q in the hope we can alloc a pic later
++                    out_buf = NULL;
++                    break;
++                }
++            }
++            out_buf = NULL;  // Now attached to pic or recycled
++
++#if TRACE_ALL
++            msg_Dbg(p_filter, "-- %s: Q pic=%p: seq_in=%d, seq_out=%d, delta=%d", __func__, out_pic, sys->seq_in, seq_out, seq_delta(sys->seq_in, seq_out));
++#endif
++
++            *pp_pic = out_pic;
++            pp_pic = &out_pic->p_next;
++
++            // Ignore 0 seqs
++            // Don't think these should actually happen
++            if (seq_out != 0)
++                sys->seq_out = seq_out;
++        }
++
++        // Crash on lockup
++        assert(ret_pics != NULL || seq_delta(sys->seq_in, sys->seq_out) < 5);
+     }
+
+-    status = mmal_component_enable(sys->component);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to enable component %s (status=%"PRIx32" %s)",
+-                sys->component->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++#if TRACE_ALL
++    msg_Dbg(p_filter, ">>> %s: pic=%p", __func__, ret_pics);
++#endif
++
++    return ret_pics;
++
++fail:
++    if (out_buf != NULL)
++        mmal_buffer_header_release(out_buf);
++    picture_Release(p_pic);
++    return NULL;
++}
++
++static void di_flush(filter_t *p_filter)
++{
++    filter_sys_t * const sys = p_filter->p_sys;
++
++#if TRACE_ALL
++    msg_Dbg(p_filter, "<<< %s", __func__);
++#endif
++
++    if (sys->input != NULL && sys->input->is_enabled)
++        mmal_port_disable(sys->input);
++
++    if (sys->output != NULL && sys->output->is_enabled)
++    {
++        if (sys->is_cma)
++        {
++            MMAL_BUFFER_HEADER_T * buf;
++            mmal_port_disable(sys->output);
++            while ((buf = mmal_queue_get(sys->out_q)) != NULL)
++                mmal_buffer_header_release(buf);
++        }
++        else
++        {
++            // Wedge anything we've got into the output port as that will free the underlying buffers
++            fill_output_from_q(p_filter, sys, sys->out_q);
++
++            mmal_port_disable(sys->output);
++
++            // If that dumped anything real into the out_q then have another go
++            if (mmal_queue_length(sys->out_q) != 0)
++            {
++                mmal_port_enable(sys->output, di_output_port_cb);
++                fill_output_from_q(p_filter, sys, sys->out_q);
++                mmal_port_disable(sys->output);
++                // Out q should now be empty & should remain so until the input is reenabled
++            }
++        }
++        mmal_port_enable(sys->output, di_output_port_cb);
++
++        // Leaving the input disabled is fine - but we want to leave the output enabled
++        // so we can retrieve buffers that are still bound to pictures
+     }
+
+-    sys->filtered_pictures = mmal_queue_create();
++    sys->seq_in = 1;
++    sys->seq_out = 15;
+
+-    filter->pf_video_filter = deinterlace;
+-    filter->pf_flush = flush;
++#if TRACE_ALL
++    msg_Dbg(p_filter, ">>> %s", __func__);
++#endif
++}
+
+-    vlc_sem_init(&sys->sem, 0);
+
+-out:
+-    if (ret != VLC_SUCCESS)
+-        Close(filter);
++static void pass_flush(filter_t *p_filter)
++{
++    // Nothing to do
++    VLC_UNUSED(p_filter);
++}
+
+-    return ret;
++static picture_t * pass_deinterlace(filter_t * p_filter, picture_t * p_pic)
++{
++    VLC_UNUSED(p_filter);
++
++    p_pic->b_progressive = true;
++    return p_pic;
+ }
+
+-static void Close(filter_t *filter)
++
++static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+ {
+-    filter_sys_t *sys = filter->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer;
++    filter_t *filter = (filter_t *)port->userdata;
++    MMAL_STATUS_T status;
+
+-    if (!sys)
++    if (buffer->cmd == MMAL_EVENT_ERROR) {
++        status = *(uint32_t *)buffer->data;
++        msg_Err(filter, "MMAL error %"PRIx32" \"%s\"", status,
++                mmal_status_to_string(status));
++    }
++
++    mmal_buffer_header_reset(buffer);
++    mmal_buffer_header_release(buffer);
++}
++
++static void CloseMmalDeinterlace(filter_t *filter)
++{
++    filter_sys_t * const sys = filter->p_sys;
++
++#if TRACE_ALL
++    msg_Dbg(filter, "<<< %s", __func__);
++#endif
++
++    if (sys == NULL)
+         return;
+
+-    if (sys->component && sys->component->control->is_enabled)
+-        mmal_port_disable(sys->component->control);
++    if (sys->use_passthrough)
++    {
++        free(sys);
++        return;
++    }
+
+-    if (sys->input && sys->input->is_enabled)
+-        mmal_port_disable(sys->input);
++    di_flush(filter);
+
+-    if (sys->output && sys->output->is_enabled)
+-        mmal_port_disable(sys->output);
++    if (sys->component && sys->component->control->is_enabled)
++        mmal_port_disable(sys->component->control);
+
+     if (sys->component && sys->component->is_enabled)
+         mmal_component_disable(sys->component);
+
+-    while ((buffer = mmal_queue_get(sys->filtered_pictures))) {
+-        picture_t *pic = (picture_t *)buffer->user_data;
+-        picture_Release(pic);
++    if (sys->in_pool != NULL)
++        mmal_pool_destroy(sys->in_pool);
++
++    hw_mmal_port_pool_ref_release(sys->out_ppr, false);
++    // Once we exit filter & sys are invalid so mark as such
++    if (sys->output != NULL)
++        sys->output->userdata = NULL;
++
++    if (sys->is_cma)
++    {
++        if (sys->output && sys->output->is_enabled)
++            mmal_port_disable(sys->output);
++
++        cma_buf_pool_deletez(&sys->cma_out_pool);
++
++        if (sys->out_pool != NULL)
++            mmal_pool_destroy(sys->out_pool);
+     }
+
+-    if (sys->filtered_pictures)
+-        mmal_queue_destroy(sys->filtered_pictures);
++    if (sys->out_q != NULL)
++        mmal_queue_destroy(sys->out_q);
+
+     if (sys->component)
+         mmal_component_release(sys->component);
+
+-    vlc_sem_destroy(&sys->sem);
++    cma_vcsm_exit(sys->vcsm_init_type);
++
+     free(sys);
++}
++
+
+-    bcm_host_deinit();
++static bool is_fmt_valid_in(const vlc_fourcc_t fmt)
++{
++    return fmt == VLC_CODEC_MMAL_OPAQUE ||
++           fmt == VLC_CODEC_MMAL_ZC_I420 ||
++           fmt == VLC_CODEC_MMAL_ZC_SAND8;
+ }
+
+-static int send_output_buffer(filter_t *filter)
++static int OpenMmalDeinterlace(filter_t *filter)
+ {
+-    filter_sys_t *sys = filter->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer;
++    int32_t frame_duration = filter->fmt_in.video.i_frame_rate != 0 ?
++            CLOCK_FREQ * filter->fmt_in.video.i_frame_rate_base /
++            filter->fmt_in.video.i_frame_rate : 0;
++
++    int ret = VLC_EGENERIC;
+     MMAL_STATUS_T status;
+-    picture_t *picture;
+-    int ret = 0;
++    filter_sys_t *sys;
++
++    msg_Dbg(filter, "<<< %s", __func__);
++
++    if (!is_fmt_valid_in(filter->fmt_in.video.i_chroma) ||
++        filter->fmt_out.video.i_chroma != filter->fmt_in.video.i_chroma)
++        return VLC_EGENERIC;
+
+-    if (!sys->output->is_enabled) {
+-        ret = VLC_EGENERIC;
+-        goto out;
++    sys = calloc(1, sizeof(filter_sys_t));
++    if (!sys)
++        return VLC_ENOMEM;
++    filter->p_sys = sys;
++
++    sys->seq_in = 1;
++    sys->seq_out = 15;
++    sys->is_cma = is_cma_buf_pic_chroma(filter->fmt_out.video.i_chroma);
++
++    if ((sys->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE) {
++        msg_Err(filter, "VCSM init failed");
++        goto fail;
++    }
++
++    if (rpi_is_model_pi4())
++    {
++        sys->half_rate = true;
++        sys->use_qpu = false;
++        sys->use_fast = true;
++    }
++    else
++    {
++        sys->half_rate = false;
++        sys->use_qpu = true;
++        sys->use_fast = false;
++    }
++    sys->use_passthrough = false;
++
++    if (filter->fmt_in.video.i_width * filter->fmt_in.video.i_height > 768 * 576)
++    {
++        // We get stressed if we have to try too hard - so make life easier
++        sys->half_rate = true;
++        // Also check we actually have enough memory to do this
++        // Memory always comes from GPU if Opaque
++        // Assume we have plenty of memory if it comes from CMA
++        if ((!sys->is_cma || sys->vcsm_init_type == VCSM_INIT_LEGACY) &&
++            hw_mmal_get_gpu_mem() < (96 << 20))
++        {
++            sys->use_passthrough = true;
++            msg_Warn(filter, "Deinterlace bypassed due to lack of GPU memory");
++        }
+     }
+
+-    picture = filter_NewPicture(filter);
+-    if (!picture) {
+-        msg_Warn(filter, "Failed to get new picture");
+-        ret = -1;
+-        goto out;
++    if (var_InheritBool(filter, MMAL_DEINTERLACE_NO_QPU))
++        sys->use_qpu = false;
++    if (var_InheritBool(filter, MMAL_DEINTERLACE_ADV))
++    {
++        sys->use_fast = false;
++        sys->use_passthrough = false;
++    }
++    if (var_InheritBool(filter, MMAL_DEINTERLACE_FAST))
++    {
++        sys->use_fast = true;
++        sys->use_passthrough = false;
++    }
++    if (var_InheritBool(filter, MMAL_DEINTERLACE_NONE))
++        sys->use_passthrough = true;
++    if (var_InheritBool(filter, MMAL_DEINTERLACE_FULL_RATE))
++        sys->half_rate = false;
++    if (var_InheritBool(filter, MMAL_DEINTERLACE_HALF_RATE))
++        sys->half_rate = true;
++
++    if (sys->use_passthrough)
++    {
++        filter->pf_video_filter = pass_deinterlace;
++        filter->pf_flush = pass_flush;
++        // Don't need VCSM - get rid of it now
++        cma_vcsm_exit(sys->vcsm_init_type);
++        sys->vcsm_init_type = VCSM_INIT_NONE;
++        return 0;
++    }
++
++    {
++        char dbuf0[5], dbuf1[5];
++        msg_Dbg(filter, "%s: %s,%dx%d [(%d,%d) %d/%d] -> %s,%dx%d [(%d,%d) %dx%d]: %s %s %s", __func__,
++                str_fourcc(dbuf0, filter->fmt_in.video.i_chroma),
++                filter->fmt_in.video.i_width, filter->fmt_in.video.i_height,
++                filter->fmt_in.video.i_x_offset, filter->fmt_in.video.i_y_offset,
++                filter->fmt_in.video.i_visible_width, filter->fmt_in.video.i_visible_height,
++                str_fourcc(dbuf1, filter->fmt_out.video.i_chroma),
++                filter->fmt_out.video.i_width, filter->fmt_out.video.i_height,
++                filter->fmt_out.video.i_x_offset, filter->fmt_out.video.i_y_offset,
++                filter->fmt_out.video.i_visible_width, filter->fmt_out.video.i_visible_height,
++                sys->use_qpu ? "QPU" : "VPU",
++                sys->use_fast ? "FAST" : "ADV",
++                sys->use_passthrough ? "PASS" : sys->half_rate ? "HALF" : "FULL");
+     }
+-    picture->format.i_frame_rate = filter->fmt_out.video.i_frame_rate;
+-    picture->format.i_frame_rate_base = filter->fmt_out.video.i_frame_rate_base;
+
+-    buffer = picture->p_sys->buffer;
+-    buffer->user_data = picture;
+-    buffer->cmd = 0;
++    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_DEINTERLACE, &sys->component);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(filter, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
++                MMAL_COMPONENT_DEFAULT_DEINTERLACE, status, mmal_status_to_string(status));
++        goto fail;
++    }
+
+-    mmal_picture_lock(picture);
++    {
++        const MMAL_PARAMETER_IMAGEFX_PARAMETERS_T imfx_param = {
++            { MMAL_PARAMETER_IMAGE_EFFECT_PARAMETERS, sizeof(imfx_param) },
++            sys->use_fast ?
++                MMAL_PARAM_IMAGEFX_DEINTERLACE_FAST :
++                MMAL_PARAM_IMAGEFX_DEINTERLACE_ADV,
++            4,
++            { 5 /* Frame type: mixed */, frame_duration, sys->half_rate, sys->use_qpu }
++        };
+
+-    status = mmal_port_send_buffer(sys->output, buffer);
++        status = mmal_port_parameter_set(sys->component->output[0], &imfx_param.hdr);
++        if (status != MMAL_SUCCESS) {
++            msg_Err(filter, "Failed to configure MMAL component %s (status=%"PRIx32" %s)",
++                    MMAL_COMPONENT_DEFAULT_DEINTERLACE, status, mmal_status_to_string(status));
++            goto fail;
++        }
++    }
++
++    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
++    status = mmal_port_enable(sys->component->control, control_port_cb);
+     if (status != MMAL_SUCCESS) {
+-        msg_Err(filter, "Failed to send buffer to output port (status=%"PRIx32" %s)",
+-                status, mmal_status_to_string(status));
+-        mmal_buffer_header_release(buffer);
+-        picture_Release(picture);
+-        ret = -1;
+-    } else {
+-        atomic_fetch_add(&sys->output_in_transit, 1);
+-        vlc_sem_post(&sys->sem);
++        msg_Err(filter, "Failed to enable control port %s (status=%"PRIx32" %s)",
++                sys->component->control->name, status, mmal_status_to_string(status));
++        goto fail;
+     }
+
+-out:
+-    return ret;
+-}
++    sys->input = sys->component->input[0];
++    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
++    sys->input->format->encoding = vlc_to_mmal_video_fourcc(&filter->fmt_in.video);
++    hw_mmal_vlc_fmt_to_mmal_fmt(sys->input->format, &filter->fmt_in.video);
+
+-static void fill_output_port(filter_t *filter)
+-{
+-    filter_sys_t *sys = filter->p_sys;
+-    /* allow at least 2 buffers in transit */
+-    unsigned max_buffers_in_transit = __MAX(2, MIN_NUM_BUFFERS_IN_TRANSIT);
+-    int buffers_available = sys->output->buffer_num -
+-        atomic_load(&sys->output_in_transit) -
+-        mmal_queue_length(sys->filtered_pictures);
+-    int buffers_to_send = max_buffers_in_transit - sys->output_in_transit;
+-    int i;
++    es_format_Copy(&filter->fmt_out, &filter->fmt_in);
++    if (!sys->half_rate)
++        filter->fmt_out.video.i_frame_rate *= 2;
+
+-    if (buffers_to_send > buffers_available)
+-        buffers_to_send = buffers_available;
++    status = mmal_port_format_commit(sys->input);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(filter, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
++                        sys->input->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++    sys->input->buffer_size = sys->input->buffer_size_recommended;
++    sys->input->buffer_num = 30;
++//    sys->input->buffer_num = sys->input->buffer_num_recommended;
+
+-#ifndef NDEBUG
+-    msg_Dbg(filter, "Send %d buffers to output port (available: %d, in_transit: %d, buffer_num: %d)",
+-                    buffers_to_send, buffers_available, sys->output_in_transit,
+-                    sys->output->buffer_num);
+-#endif
+-    for (i = 0; i < buffers_to_send; ++i) {
+-        if (send_output_buffer(filter) < 0)
+-            break;
++    if ((sys->in_pool = mmal_pool_create(sys->input->buffer_num, 0)) == NULL)
++    {
++        msg_Err(filter, "Failed to create input pool");
++        goto fail;
+     }
+-}
+
+-static picture_t *deinterlace(filter_t *filter, picture_t *picture)
+-{
+-    filter_sys_t *sys = filter->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer;
+-    picture_t *out_picture = NULL;
+-    picture_t *ret = NULL;
+-    MMAL_STATUS_T status;
+-    unsigned i = 0;
++    status = port_parameter_set_bool(sys->input, MMAL_PARAMETER_ZERO_COPY, true);
++    if (status != MMAL_SUCCESS) {
++       msg_Err(filter, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
++                sys->input->name, status, mmal_status_to_string(status));
++       goto fail;
++    }
+
+-    fill_output_port(filter);
++    status = mmal_port_enable(sys->input, di_input_port_cb);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(filter, "Failed to enable input port %s (status=%"PRIx32" %s)",
++                sys->input->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
+
+-    buffer = picture->p_sys->buffer;
+-    buffer->user_data = picture;
+-    buffer->pts = picture->date;
+-    buffer->cmd = 0;
+
+-    if (!picture->p_sys->displayed) {
+-        status = mmal_port_send_buffer(sys->input, buffer);
+-        if (status != MMAL_SUCCESS) {
+-            msg_Err(filter, "Failed to send buffer to input port (status=%"PRIx32" %s)",
+-                    status, mmal_status_to_string(status));
+-            picture_Release(picture);
+-        } else {
+-            picture->p_sys->displayed = true;
+-            atomic_fetch_add(&sys->input_in_transit, 1);
+-            vlc_sem_post(&sys->sem);
+-        }
+-    } else {
+-        picture_Release(picture);
+-    }
+-
+-    /*
+-     * Send output buffers
+-     */
+-    while(atomic_load(&sys->started) && i < 2) {
+-        if (buffer = mmal_queue_timedwait(sys->filtered_pictures, 2000)) {
+-            i++;
+-            if (!out_picture) {
+-                out_picture = (picture_t *)buffer->user_data;
+-                ret = out_picture;
+-            } else {
+-                out_picture->p_next = (picture_t *)buffer->user_data;
+-                out_picture = out_picture->p_next;
+-            }
+-            out_picture->date = buffer->pts;
+-        } else {
+-            msg_Dbg(filter, "Failed waiting for filtered picture");
+-            break;
+-        }
++    if ((sys->out_q = mmal_queue_create()) == NULL)
++    {
++        msg_Err(filter, "Failed to create out Q");
++        goto fail;
+     }
+-    if (out_picture)
+-        out_picture->p_next = NULL;
+
+-    return ret;
+-}
+-
+-static void flush(filter_t *filter)
+-{
+-    filter_sys_t *sys = filter->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer;
++    sys->output = sys->component->output[0];
++    mmal_format_full_copy(sys->output->format, sys->input->format);
+
+-    msg_Dbg(filter, "flush deinterlace filter");
++    if (!sys->is_cma)
++    {
++        if ((status = hw_mmal_opaque_output(VLC_OBJECT(filter), &sys->out_ppr, sys->output, 5, di_output_port_cb)) != MMAL_SUCCESS)
++            goto fail;
++    }
++    else
++    {
++        // CMA stuff
++        sys->output->userdata = (struct MMAL_PORT_USERDATA_T *)filter;
++
++        if ((sys->cma_out_pool = cma_buf_pool_new(8, 8, true, "deinterlace")) == NULL)
++        {
++            msg_Err(filter, "Failed to alloc cma buf pool");
++            goto fail;
++        }
+
+-    msg_Dbg(filter, "flush: flush ports (input: %d, output: %d in transit)",
+-            sys->input_in_transit, sys->output_in_transit);
+-    mmal_port_flush(sys->output);
+-    mmal_port_flush(sys->input);
+-
+-    msg_Dbg(filter, "flush: wait for all buffers to be returned");
+-    while (atomic_load(&sys->input_in_transit) ||
+-            atomic_load(&sys->output_in_transit))
+-        vlc_sem_wait(&sys->sem);
+-
+-    while ((buffer = mmal_queue_get(sys->filtered_pictures))) {
+-        picture_t *pic = (picture_t *)buffer->user_data;
+-        msg_Dbg(filter, "flush: release already filtered pic %p",
+-                (void *)pic);
+-        picture_Release(pic);
+-    }
+-    atomic_store(&sys->started, false);
+-    msg_Dbg(filter, "flush: done");
+-}
++        // Rate control done by CMA in flight logic, so have "inexhaustable" pool here
++        if ((sys->out_pool = mmal_pool_create(30, 0)) == NULL)
++        {
++            msg_Err(filter, "Failed to alloc out pool");
++            goto fail;
++        }
+
+-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+-{
+-    filter_t *filter = (filter_t *)port->userdata;
+-    MMAL_STATUS_T status;
++        port_parameter_set_bool(sys->output, MMAL_PARAMETER_ZERO_COPY, true);
+
+-    if (buffer->cmd == MMAL_EVENT_ERROR) {
+-        status = *(uint32_t *)buffer->data;
+-        msg_Err(filter, "MMAL error %"PRIx32" \"%s\"", status,
+-                mmal_status_to_string(status));
+-    }
++        if ((status = mmal_port_format_commit(sys->output)) != MMAL_SUCCESS)
++        {
++            msg_Err(filter, "Output port format commit failed");
++            goto fail;
++        }
+
+-    mmal_buffer_header_release(buffer);
+-}
++        sys->output->buffer_num = 30;
++        sys->output->buffer_size = sys->output->buffer_size_recommended;
+
+-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+-{
+-    picture_t *picture = (picture_t *)buffer->user_data;
+-    filter_t *filter = (filter_t *)port->userdata;
+-    filter_sys_t *sys = filter->p_sys;
++        // CB just drops all bufs into out_q
++        if ((status = mmal_port_enable(sys->output, di_output_port_cb)) != MMAL_SUCCESS)
++        {
++            msg_Err(filter, "Failed to enable output port %s (status=%"PRIx32" %s)",
++                    sys->output->name, status, mmal_status_to_string(status));
++            goto fail;
++        }
++    }
+
+-    if (picture) {
+-        picture_Release(picture);
+-    } else {
+-        msg_Warn(filter, "Got buffer without picture on input port - OOOPS");
+-        mmal_buffer_header_release(buffer);
++    status = mmal_component_enable(sys->component);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(filter, "Failed to enable component %s (status=%"PRIx32" %s)",
++                sys->component->name, status, mmal_status_to_string(status));
++        goto fail;
+     }
+
+-    atomic_fetch_sub(&sys->input_in_transit, 1);
+-    vlc_sem_post(&sys->sem);
++    filter->pf_video_filter = deinterlace;
++    filter->pf_flush = di_flush;
++    return 0;
++
++fail:
++    CloseMmalDeinterlace(filter);
++    return ret;
+ }
+
+-static void output_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+-{
+-    filter_t *filter = (filter_t *)port->userdata;
+-    filter_sys_t *sys = filter->p_sys;
+-    picture_t *picture;
++vlc_module_begin()
++    set_shortname(N_("MMAL deinterlace"))
++    set_description(N_("MMAL-based deinterlace filter plugin"))
++    set_capability("video filter", 900)
++    set_category(CAT_VIDEO)
++    set_subcategory(SUBCAT_VIDEO_VFILTER)
++    set_callbacks(OpenMmalDeinterlace, CloseMmalDeinterlace)
++    add_shortcut("deinterlace")
++    add_bool(MMAL_DEINTERLACE_NO_QPU, false, MMAL_DEINTERLACE_NO_QPU_TEXT,
++                    MMAL_DEINTERLACE_NO_QPU_LONGTEXT, true);
++    add_bool(MMAL_DEINTERLACE_ADV, false, MMAL_DEINTERLACE_ADV_TEXT,
++                    MMAL_DEINTERLACE_ADV_LONGTEXT, true);
++    add_bool(MMAL_DEINTERLACE_FAST, false, MMAL_DEINTERLACE_FAST_TEXT,
++                    MMAL_DEINTERLACE_FAST_LONGTEXT, true);
++    add_bool(MMAL_DEINTERLACE_NONE, false, MMAL_DEINTERLACE_NONE_TEXT,
++                    MMAL_DEINTERLACE_NONE_LONGTEXT, true);
++    add_bool(MMAL_DEINTERLACE_HALF_RATE, false, MMAL_DEINTERLACE_HALF_RATE_TEXT,
++                    MMAL_DEINTERLACE_HALF_RATE_LONGTEXT, true);
++    add_bool(MMAL_DEINTERLACE_FULL_RATE, false, MMAL_DEINTERLACE_FULL_RATE_TEXT,
++                    MMAL_DEINTERLACE_FULL_RATE_LONGTEXT, true);
++
++vlc_module_end()
++
+
+-    if (buffer->cmd == 0) {
+-        if (buffer->length > 0) {
+-            atomic_store(&sys->started, true);
+-            mmal_queue_put(sys->filtered_pictures, buffer);
+-            picture = (picture_t *)buffer->user_data;
+-        } else {
+-            picture = (picture_t *)buffer->user_data;
+-            picture_Release(picture);
+-        }
+-
+-        atomic_fetch_sub(&sys->output_in_transit, 1);
+-        vlc_sem_post(&sys->sem);
+-    } else if (buffer->cmd == MMAL_EVENT_FORMAT_CHANGED) {
+-        msg_Warn(filter, "MMAL_EVENT_FORMAT_CHANGED seen but not handled");
+-        mmal_buffer_header_release(buffer);
+-    } else {
+-        mmal_buffer_header_release(buffer);
+-    }
+-}
+--- /dev/null
++++ b/modules/hw/mmal/mmal_avcodec.c
+@@ -0,0 +1,2175 @@
++/*****************************************************************************
++ * video.c: video decoder using the libavcodec library
++ *****************************************************************************
++ * Copyright (C) 1999-2001 VLC authors and VideoLAN
++ * $Id$
++ *
++ * Authors: Laurent Aimar <fenrir@via.ecp.fr>
++ *          Gildas Bazin <gbazin@videolan.org>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU Lesser General Public License as published by
++ * the Free Software Foundation; either version 2.1 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
++ *****************************************************************************/
++
++/*****************************************************************************
++ * Preamble
++ *****************************************************************************/
++#include "config.h"
++
++#include <vlc_common.h>
++#include <vlc_codec.h>
++#include <vlc_avcodec.h>
++#include <vlc_cpu.h>
++#include <vlc_atomic.h>
++#include <assert.h>
++
++#include <libavcodec/avcodec.h>
++#include <libavutil/mem.h>
++#include <libavutil/pixdesc.h>
++#if (LIBAVUTIL_VERSION_MICRO >= 100 && LIBAVUTIL_VERSION_INT >= AV_VERSION_INT( 55, 16, 101 ) )
++#include <libavutil/mastering_display_metadata.h>
++#endif
++
++//#include "avcodec.h"
++//#include "va.h"
++
++#include <vlc_plugin.h>
++#include <libavutil/rpi_sand_fns.h>
++#include <libavcodec/rpi_zc.h>
++#include "../../codec/cc.h"
++#include "../../codec/avcodec/avcommon.h"  // ??? Beware over inclusion
++#include "mmal_cma.h"
++#include "mmal_picture.h"
++
++#define TRACE_ALL 0
++
++#define BUFFERS_IN_FLIGHT       5       // Default max value for in flight buffers
++#define BUFFERS_IN_FLIGHT_UHD   3       // Fewer if very big
++
++#define MMAL_AVCODEC_BUFFERS "mmal-avcodec-buffers"
++#define MMAL_AVCODEC_BUFFERS_TEXT N_("In flight buffer count before blocking.")
++#define MMAL_AVCODEC_BUFFERS_LONGTEXT N_("In flight buffer count before blocking. " \
++"Beware that incautious changing of this can lead to lockup. " \
++"Zero will disable the module.")
++
++
++// Fwd declarations required due to wanting to avoid reworking the original
++// code too much
++static void MmalAvcodecCloseDecoder( vlc_object_t *obj );
++
++
++/*****************************************************************************
++ * decoder_sys_t : decoder descriptor
++ *****************************************************************************/
++struct decoder_sys_t
++{
++    AVCodecContext *p_context;
++    const AVCodec  *p_codec;
++
++    /* Video decoder specific part */
++    date_t  pts;
++
++    /* Closed captions for decoders */
++    cc_data_t cc;
++
++    /* for frame skipping algo */
++    bool b_hurry_up;
++    bool b_show_corrupted;
++    bool b_from_preroll;
++    enum AVDiscard i_skip_frame;
++
++    /* how many decoded frames are late */
++    int     i_late_frames;
++    mtime_t i_late_frames_start;
++    mtime_t i_last_late_delay;
++
++    /* for direct rendering */
++    bool        b_direct_rendering;
++    atomic_bool b_dr_failure;
++
++    /* Hack to force display of still pictures */
++    bool b_first_frame;
++
++
++    /* */
++    bool palette_sent;
++
++    /* VA API */
++//    vlc_va_t *p_va;
++    enum PixelFormat pix_fmt;
++    int profile;
++    int level;
++
++    vlc_sem_t sem_mt;
++
++    // Rpi vars
++    cma_buf_pool_t * cma_pool;
++    bool pool_alloc_1;
++    vcsm_init_type_t vcsm_init_type;
++    int cma_in_flight_max;
++    // Debug
++    decoder_t * p_dec;
++};
++
++
++static vlc_fourcc_t
++ZcFindVlcChroma(const int i_ffmpeg_chroma)
++{
++    switch (i_ffmpeg_chroma)
++    {
++        // This is all we claim to deal with
++        // In theory RGB should be doable within our current framework
++        case AV_PIX_FMT_YUV420P:
++            return VLC_CODEC_MMAL_ZC_I420;
++        case AV_PIX_FMT_SAND128:
++        case AV_PIX_FMT_RPI4_8:
++            return VLC_CODEC_MMAL_ZC_SAND8;
++        case AV_PIX_FMT_SAND64_10:
++            return VLC_CODEC_MMAL_ZC_SAND10;
++        case AV_PIX_FMT_RPI4_10:
++            return VLC_CODEC_MMAL_ZC_SAND30;
++        default:
++            break;
++    }
++    return 0;
++}
++
++// Pix Fmt conv for MMal
++// video_fromat from ffmpeg pic_fmt
++static int
++ZcGetVlcChroma( video_format_t *fmt, int i_ffmpeg_chroma )
++{
++    fmt->i_rmask = 0;
++    fmt->i_gmask = 0;
++    fmt->i_bmask = 0;
++    fmt->i_chroma = ZcFindVlcChroma(i_ffmpeg_chroma);
++
++    return fmt->i_chroma == 0 ? -1 : 0;
++}
++
++
++// Format chooser is way simpler than vlc
++static enum PixelFormat
++ZcGetFormat(AVCodecContext *p_context, const enum PixelFormat *pi_fmt)
++{
++    enum PixelFormat swfmt = avcodec_default_get_format(p_context, pi_fmt);
++    for (size_t i = 0; pi_fmt[i] != AV_PIX_FMT_NONE; i++)
++    {
++        if (ZcFindVlcChroma(pi_fmt[i]) != 0)
++            return pi_fmt[i];
++    }
++    return swfmt;
++}
++
++
++static void cma_avbuf_pool_free(void * v)
++{
++    cma_buf_unref(v);
++}
++
++static unsigned int zc_buf_vcsm_handle(void * v)
++{
++    return cma_buf_vcsm_handle(v);
++}
++
++static unsigned int zc_buf_vc_handle(void * v)
++{
++    return cma_buf_vc_handle(v);
++}
++
++static void * zc_buf_map_arm(void * v)
++{
++    return cma_buf_addr(v);
++}
++
++static unsigned int zc_buf_map_vc(void * v)
++{
++    return cma_buf_vc_addr(v);
++}
++
++
++
++static const av_rpi_zc_buf_fn_tab_t zc_buf_fn_tab = {
++    .free = cma_avbuf_pool_free,
++
++    .vcsm_handle = zc_buf_vcsm_handle,
++    .vc_handle = zc_buf_vc_handle,
++    .map_arm = zc_buf_map_arm,
++    .map_vc = zc_buf_map_vc
++};
++
++
++static AVBufferRef *
++zc_alloc_buf(void * v, size_t size, const AVRpiZcFrameGeometry * geo)
++{
++    decoder_t * const dec = v;
++    decoder_sys_t * const sys = dec->p_sys;
++
++    VLC_UNUSED(geo);
++
++    assert(sys != NULL);
++
++    const unsigned int dec_pool_req = av_rpi_zc_get_decoder_pool_size(sys->p_context->opaque);
++    if (dec_pool_req != 0)
++    {
++        cma_buf_pool_resize(sys->cma_pool, dec_pool_req + sys->cma_in_flight_max, sys->cma_in_flight_max);
++
++        if (!sys->pool_alloc_1)
++        {
++            sys->pool_alloc_1 = true;
++            msg_Dbg(dec, "Pool size: (%d+%d) * %zd", dec_pool_req, sys->cma_in_flight_max, size);
++            if (cma_buf_pool_fill(sys->cma_pool, size) != 0)
++                msg_Warn(dec, "Failed to preallocate decoder pool (%d+%d) * %zd", dec_pool_req, sys->cma_in_flight_max, size);
++        }
++    }
++
++    void * const cmabuf = cma_buf_pool_alloc_buf(sys->cma_pool, size);
++
++    if (cmabuf == NULL)
++    {
++        msg_Err(dec, "CMA buf pool alloc buf failed");
++        return NULL;
++    }
++
++    AVBufferRef *const avbuf = av_rpi_zc_buf(cma_buf_size(cmabuf), 0, cmabuf, &zc_buf_fn_tab);
++
++    if (avbuf == NULL)
++    {
++        msg_Err(dec, "av_rpi_zc_buf failed");
++        cma_buf_unref(cmabuf);
++        return NULL;
++    }
++
++    return avbuf;
++}
++
++static void
++zc_free_pool(void * v)
++{
++    decoder_t * const dec = v;
++    cma_buf_pool_delete(dec->p_sys->cma_pool);
++}
++
++
++static const uint8_t shift_01[] = {0,1,1,1};
++static const uint8_t pb_1[] = {1,1,1,1};
++static const uint8_t pb_12[] = {1,2,2,2};
++static const uint8_t pb_24[] = {2,4,4,4};
++static const uint8_t pb_4[] = {4,4,4,4};
++
++static int set_pic_from_frame(picture_t * const pic, const AVFrame * const frame)
++{
++    const uint8_t * hs = shift_01;
++    const uint8_t * ws = shift_01;
++    const uint8_t * pb = pb_1;
++
++    switch (pic->format.i_chroma)
++    {
++        case VLC_CODEC_MMAL_ZC_RGB32:
++            pic->i_planes = 1;
++            pb = pb_4;
++            break;
++        case VLC_CODEC_MMAL_ZC_I420:
++            pic->i_planes = 3;
++            break;
++        case VLC_CODEC_MMAL_ZC_SAND8:
++            pic->i_planes = 2;
++            pb = pb_12;
++            break;
++        case VLC_CODEC_MMAL_ZC_SAND10:
++        case VLC_CODEC_MMAL_ZC_SAND30:  // Lies: SAND30 is "special"
++            pic->i_planes = 2;
++            pb = pb_24;
++            break;
++        default:
++            return VLC_EGENERIC;
++    }
++
++    const cma_buf_t * const cb = cma_buf_pic_get(pic);
++    uint8_t * const data = cma_buf_addr(cb);
++    if (data == NULL) {
++        return VLC_ENOMEM;
++    }
++
++    uint8_t * frame_end = frame->data[0] + cma_buf_size(cb);
++    for (int i = 0; i != pic->i_planes; ++i) {
++        // Calculate lines from gap between planes
++        // This will give us an accurate "height" for later use by MMAL
++        const int lines = ((i + 1 == pic->i_planes ? frame_end : frame->data[i + 1]) -
++                           frame->data[i]) / frame->linesize[i];
++        pic->p[i] = (plane_t){
++            .p_pixels = data + (frame->data[i] - frame->data[0]),
++            .i_lines = lines,
++            .i_pitch = frame->linesize[i],
++            .i_pixel_pitch = pb[i],
++            .i_visible_lines = av_frame_cropped_height(frame) >> hs[i],
++            .i_visible_pitch = av_frame_cropped_width(frame) >> ws[i]
++        };
++    }
++    return 0;
++}
++
++
++//============================================================================
++//
++// Nicked from avcodec/fourcc.c
++//
++// * Really we should probably use that directly
++
++/*
++ * Video Codecs
++ */
++
++struct vlc_avcodec_fourcc
++{
++    vlc_fourcc_t i_fourcc;
++    unsigned i_codec;
++};
++
++
++static const struct vlc_avcodec_fourcc video_codecs[] =
++{
++    { VLC_CODEC_MP1V, AV_CODEC_ID_MPEG1VIDEO },
++    { VLC_CODEC_MP2V, AV_CODEC_ID_MPEG2VIDEO }, /* prefer MPEG2 over MPEG1 */
++    { VLC_CODEC_MPGV, AV_CODEC_ID_MPEG2VIDEO }, /* prefer MPEG2 over MPEG1 */
++    /* AV_CODEC_ID_MPEG2VIDEO_XVMC */
++    { VLC_CODEC_H261, AV_CODEC_ID_H261 },
++    { VLC_CODEC_H263, AV_CODEC_ID_H263 },
++    { VLC_CODEC_RV10, AV_CODEC_ID_RV10 },
++    { VLC_CODEC_RV13, AV_CODEC_ID_RV10 },
++    { VLC_CODEC_RV20, AV_CODEC_ID_RV20 },
++    { VLC_CODEC_MJPG, AV_CODEC_ID_MJPEG },
++    { VLC_CODEC_MJPGB, AV_CODEC_ID_MJPEGB },
++    { VLC_CODEC_LJPG, AV_CODEC_ID_LJPEG },
++    { VLC_CODEC_SP5X, AV_CODEC_ID_SP5X },
++    { VLC_CODEC_JPEGLS, AV_CODEC_ID_JPEGLS },
++    { VLC_CODEC_MP4V, AV_CODEC_ID_MPEG4 },
++    /* AV_CODEC_ID_RAWVIDEO */
++    { VLC_CODEC_DIV1, AV_CODEC_ID_MSMPEG4V1 },
++    { VLC_CODEC_DIV2, AV_CODEC_ID_MSMPEG4V2 },
++    { VLC_CODEC_DIV3, AV_CODEC_ID_MSMPEG4V3 },
++    { VLC_CODEC_WMV1, AV_CODEC_ID_WMV1 },
++    { VLC_CODEC_WMV2, AV_CODEC_ID_WMV2 },
++    { VLC_CODEC_H263P, AV_CODEC_ID_H263P },
++    { VLC_CODEC_H263I, AV_CODEC_ID_H263I },
++    { VLC_CODEC_FLV1, AV_CODEC_ID_FLV1 },
++    { VLC_CODEC_SVQ1, AV_CODEC_ID_SVQ1 },
++    { VLC_CODEC_SVQ3, AV_CODEC_ID_SVQ3 },
++    { VLC_CODEC_DV, AV_CODEC_ID_DVVIDEO },
++    { VLC_CODEC_HUFFYUV, AV_CODEC_ID_HUFFYUV },
++    { VLC_CODEC_CYUV, AV_CODEC_ID_CYUV },
++    { VLC_CODEC_H264, AV_CODEC_ID_H264 },
++    { VLC_CODEC_INDEO3, AV_CODEC_ID_INDEO3 },
++    { VLC_CODEC_VP3, AV_CODEC_ID_VP3 },
++    { VLC_CODEC_THEORA, AV_CODEC_ID_THEORA },
++#if ( !defined( WORDS_BIGENDIAN ) )
++    /* Asus Video (Another thing that doesn't work on PPC) */
++    { VLC_CODEC_ASV1, AV_CODEC_ID_ASV1 },
++    { VLC_CODEC_ASV2, AV_CODEC_ID_ASV2 },
++#endif
++    { VLC_CODEC_FFV1, AV_CODEC_ID_FFV1 },
++    { VLC_CODEC_4XM, AV_CODEC_ID_4XM },
++    { VLC_CODEC_VCR1, AV_CODEC_ID_VCR1 },
++    { VLC_CODEC_CLJR, AV_CODEC_ID_CLJR },
++    { VLC_CODEC_MDEC, AV_CODEC_ID_MDEC },
++    { VLC_CODEC_ROQ, AV_CODEC_ID_ROQ },
++    { VLC_CODEC_INTERPLAY, AV_CODEC_ID_INTERPLAY_VIDEO },
++    { VLC_CODEC_XAN_WC3, AV_CODEC_ID_XAN_WC3 },
++    { VLC_CODEC_XAN_WC4, AV_CODEC_ID_XAN_WC4 },
++    { VLC_CODEC_RPZA, AV_CODEC_ID_RPZA },
++    { VLC_CODEC_CINEPAK, AV_CODEC_ID_CINEPAK },
++    { VLC_CODEC_WS_VQA, AV_CODEC_ID_WS_VQA },
++    { VLC_CODEC_MSRLE, AV_CODEC_ID_MSRLE },
++    { VLC_CODEC_MSVIDEO1, AV_CODEC_ID_MSVIDEO1 },
++    { VLC_CODEC_IDCIN, AV_CODEC_ID_IDCIN },
++    { VLC_CODEC_8BPS, AV_CODEC_ID_8BPS },
++    { VLC_CODEC_SMC, AV_CODEC_ID_SMC },
++    { VLC_CODEC_FLIC, AV_CODEC_ID_FLIC },
++    { VLC_CODEC_TRUEMOTION1, AV_CODEC_ID_TRUEMOTION1 },
++    { VLC_CODEC_VMDVIDEO, AV_CODEC_ID_VMDVIDEO },
++    { VLC_CODEC_LCL_MSZH, AV_CODEC_ID_MSZH },
++    { VLC_CODEC_LCL_ZLIB, AV_CODEC_ID_ZLIB },
++    { VLC_CODEC_QTRLE, AV_CODEC_ID_QTRLE },
++    { VLC_CODEC_TSCC, AV_CODEC_ID_TSCC },
++    { VLC_CODEC_ULTI, AV_CODEC_ID_ULTI },
++    { VLC_CODEC_QDRAW, AV_CODEC_ID_QDRAW },
++    { VLC_CODEC_VIXL, AV_CODEC_ID_VIXL },
++    { VLC_CODEC_QPEG, AV_CODEC_ID_QPEG },
++    { VLC_CODEC_PNG, AV_CODEC_ID_PNG },
++    { VLC_CODEC_PPM, AV_CODEC_ID_PPM },
++    /* AV_CODEC_ID_PBM */
++    { VLC_CODEC_PGM, AV_CODEC_ID_PGM },
++    { VLC_CODEC_PGMYUV, AV_CODEC_ID_PGMYUV },
++    { VLC_CODEC_PAM, AV_CODEC_ID_PAM },
++    { VLC_CODEC_FFVHUFF, AV_CODEC_ID_FFVHUFF },
++    { VLC_CODEC_RV30, AV_CODEC_ID_RV30 },
++    { VLC_CODEC_RV40, AV_CODEC_ID_RV40 },
++    { VLC_CODEC_VC1,  AV_CODEC_ID_VC1 },
++    { VLC_CODEC_WMVA, AV_CODEC_ID_VC1 },
++    { VLC_CODEC_WMV3, AV_CODEC_ID_WMV3 },
++    { VLC_CODEC_WMVP, AV_CODEC_ID_WMV3 },
++    { VLC_CODEC_LOCO, AV_CODEC_ID_LOCO },
++    { VLC_CODEC_WNV1, AV_CODEC_ID_WNV1 },
++    { VLC_CODEC_AASC, AV_CODEC_ID_AASC },
++    { VLC_CODEC_INDEO2, AV_CODEC_ID_INDEO2 },
++    { VLC_CODEC_FRAPS, AV_CODEC_ID_FRAPS },
++    { VLC_CODEC_TRUEMOTION2, AV_CODEC_ID_TRUEMOTION2 },
++    { VLC_CODEC_BMP, AV_CODEC_ID_BMP },
++    { VLC_CODEC_CSCD, AV_CODEC_ID_CSCD },
++    { VLC_CODEC_MMVIDEO, AV_CODEC_ID_MMVIDEO },
++    { VLC_CODEC_ZMBV, AV_CODEC_ID_ZMBV },
++    { VLC_CODEC_AVS, AV_CODEC_ID_AVS },
++    { VLC_CODEC_SMACKVIDEO, AV_CODEC_ID_SMACKVIDEO },
++    { VLC_CODEC_NUV, AV_CODEC_ID_NUV },
++    { VLC_CODEC_KMVC, AV_CODEC_ID_KMVC },
++    { VLC_CODEC_FLASHSV, AV_CODEC_ID_FLASHSV },
++    { VLC_CODEC_CAVS, AV_CODEC_ID_CAVS },
++    { VLC_CODEC_JPEG2000, AV_CODEC_ID_JPEG2000 },
++    { VLC_CODEC_VMNC, AV_CODEC_ID_VMNC },
++    { VLC_CODEC_VP5, AV_CODEC_ID_VP5 },
++    { VLC_CODEC_VP6, AV_CODEC_ID_VP6 },
++    { VLC_CODEC_VP6F, AV_CODEC_ID_VP6F },
++    { VLC_CODEC_TARGA, AV_CODEC_ID_TARGA },
++    { VLC_CODEC_DSICINVIDEO, AV_CODEC_ID_DSICINVIDEO },
++    { VLC_CODEC_TIERTEXSEQVIDEO, AV_CODEC_ID_TIERTEXSEQVIDEO },
++    { VLC_CODEC_TIFF, AV_CODEC_ID_TIFF },
++    { VLC_CODEC_GIF, AV_CODEC_ID_GIF },
++    { VLC_CODEC_DXA, AV_CODEC_ID_DXA },
++    { VLC_CODEC_DNXHD, AV_CODEC_ID_DNXHD },
++    { VLC_CODEC_THP, AV_CODEC_ID_THP },
++    { VLC_CODEC_SGI, AV_CODEC_ID_SGI },
++    { VLC_CODEC_C93, AV_CODEC_ID_C93 },
++    { VLC_CODEC_BETHSOFTVID, AV_CODEC_ID_BETHSOFTVID },
++    /* AV_CODEC_ID_PTX */
++    { VLC_CODEC_TXD, AV_CODEC_ID_TXD },
++    { VLC_CODEC_VP6A, AV_CODEC_ID_VP6A },
++    { VLC_CODEC_AMV, AV_CODEC_ID_AMV },
++    { VLC_CODEC_VB, AV_CODEC_ID_VB },
++    { VLC_CODEC_PCX, AV_CODEC_ID_PCX },
++    /* AV_CODEC_ID_SUNRAST */
++    { VLC_CODEC_INDEO4, AV_CODEC_ID_INDEO4 },
++    { VLC_CODEC_INDEO5, AV_CODEC_ID_INDEO5 },
++    { VLC_CODEC_MIMIC, AV_CODEC_ID_MIMIC },
++    { VLC_CODEC_RL2, AV_CODEC_ID_RL2 },
++    { VLC_CODEC_ESCAPE124, AV_CODEC_ID_ESCAPE124 },
++    { VLC_CODEC_DIRAC, AV_CODEC_ID_DIRAC },
++    { VLC_CODEC_BFI, AV_CODEC_ID_BFI },
++    { VLC_CODEC_CMV, AV_CODEC_ID_CMV },
++    { VLC_CODEC_MOTIONPIXELS, AV_CODEC_ID_MOTIONPIXELS },
++    { VLC_CODEC_TGV, AV_CODEC_ID_TGV },
++    { VLC_CODEC_TGQ, AV_CODEC_ID_TGQ },
++    { VLC_CODEC_TQI, AV_CODEC_ID_TQI },
++    { VLC_CODEC_AURA, AV_CODEC_ID_AURA },
++    /* AV_CODEC_ID_AURA2 */
++    /* AV_CODEC_ID_V210X */
++    { VLC_CODEC_TMV, AV_CODEC_ID_TMV },
++    { VLC_CODEC_V210, AV_CODEC_ID_V210 },
++#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT( 54, 50, 100 ) && LIBAVCODEC_VERSION_MICRO >= 100
++    { VLC_CODEC_VUYA, AV_CODEC_ID_AYUV },
++#endif
++    /* AV_CODEC_ID_DPX */
++    { VLC_CODEC_MAD, AV_CODEC_ID_MAD },
++    { VLC_CODEC_FRWU, AV_CODEC_ID_FRWU },
++    { VLC_CODEC_FLASHSV2, AV_CODEC_ID_FLASHSV2 },
++    /* AV_CODEC_ID_CDGRAPHICS */
++    /* AV_CODEC_ID_R210 */
++    { VLC_CODEC_ANM, AV_CODEC_ID_ANM },
++    { VLC_CODEC_BINKVIDEO, AV_CODEC_ID_BINKVIDEO },
++    /* AV_CODEC_ID_IFF_ILBM */
++    /* AV_CODEC_ID_IFF_BYTERUN1 */
++    { VLC_CODEC_KGV1, AV_CODEC_ID_KGV1 },
++    { VLC_CODEC_YOP, AV_CODEC_ID_YOP },
++    { VLC_CODEC_VP8, AV_CODEC_ID_VP8 },
++    /* AV_CODEC_ID_PICTOR */
++    /* AV_CODEC_ID_ANSI */
++    /* AV_CODEC_ID_A64_MULTI */
++    /* AV_CODEC_ID_A64_MULTI5 */
++    /* AV_CODEC_ID_R10K */
++    { VLC_CODEC_MXPEG, AV_CODEC_ID_MXPEG },
++    { VLC_CODEC_LAGARITH, AV_CODEC_ID_LAGARITH },
++    { VLC_CODEC_PRORES, AV_CODEC_ID_PRORES },
++    { VLC_CODEC_JV, AV_CODEC_ID_JV },
++    { VLC_CODEC_DFA, AV_CODEC_ID_DFA },
++    { VLC_CODEC_WMVP, AV_CODEC_ID_WMV3IMAGE },
++    { VLC_CODEC_WMVP2, AV_CODEC_ID_VC1IMAGE },
++    { VLC_CODEC_UTVIDEO, AV_CODEC_ID_UTVIDEO },
++    { VLC_CODEC_BMVVIDEO, AV_CODEC_ID_BMV_VIDEO },
++    { VLC_CODEC_VBLE, AV_CODEC_ID_VBLE },
++    { VLC_CODEC_DXTORY, AV_CODEC_ID_DXTORY },
++    /* AV_CODEC_ID_V410 */
++    /* AV_CODEC_ID_XWD */
++    { VLC_CODEC_CDXL, AV_CODEC_ID_CDXL },
++    /* AV_CODEC_ID_XBM */
++    /* AV_CODEC_ID_ZEROCODEC */
++    { VLC_CODEC_MSS1, AV_CODEC_ID_MSS1 },
++    { VLC_CODEC_MSA1, AV_CODEC_ID_MSA1 },
++    { VLC_CODEC_TSC2, AV_CODEC_ID_TSCC2 },
++    { VLC_CODEC_MTS2, AV_CODEC_ID_MTS2 },
++    { VLC_CODEC_CLLC, AV_CODEC_ID_CLLC },
++    { VLC_CODEC_MSS2, AV_CODEC_ID_MSS2 },
++    { VLC_CODEC_VP9, AV_CODEC_ID_VP9 },
++#if LIBAVCODEC_VERSION_CHECK( 57, 26, 0, 83, 101 )
++    { VLC_CODEC_AV1, AV_CODEC_ID_AV1 },
++#endif
++    { VLC_CODEC_ICOD, AV_CODEC_ID_AIC },
++    /* AV_CODEC_ID_ESCAPE130 */
++    { VLC_CODEC_G2M4, AV_CODEC_ID_G2M },
++    { VLC_CODEC_G2M2, AV_CODEC_ID_G2M },
++    { VLC_CODEC_G2M3, AV_CODEC_ID_G2M },
++    /* AV_CODEC_ID_WEBP */
++    { VLC_CODEC_HNM4_VIDEO, AV_CODEC_ID_HNM4_VIDEO },
++    { VLC_CODEC_HEVC, AV_CODEC_ID_HEVC },
++
++    { VLC_CODEC_FIC , AV_CODEC_ID_FIC },
++    /* AV_CODEC_ID_ALIAS_PIX */
++    /* AV_CODEC_ID_BRENDER_PIX */
++    /* AV_CODEC_ID_PAF_VIDEO */
++    /* AV_CODEC_ID_EXR */
++
++    { VLC_CODEC_VP7 , AV_CODEC_ID_VP7 },
++    /* AV_CODEC_ID_SANM */
++    /* AV_CODEC_ID_SGIRLE */
++    /* AV_CODEC_ID_MVC1 */
++    /* AV_CODEC_ID_MVC2 */
++    { VLC_CODEC_HQX, AV_CODEC_ID_HQX },
++
++    { VLC_CODEC_TDSC, AV_CODEC_ID_TDSC },
++
++    { VLC_CODEC_HQ_HQA, AV_CODEC_ID_HQ_HQA },
++
++    { VLC_CODEC_HAP, AV_CODEC_ID_HAP },
++    /* AV_CODEC_ID_DDS */
++
++    { VLC_CODEC_DXV, AV_CODEC_ID_DXV },
++
++    /* ffmpeg only: AV_CODEC_ID_BRENDER_PIX */
++    /* ffmpeg only: AV_CODEC_ID_Y41P */
++    /* ffmpeg only: AV_CODEC_ID_EXR */
++    /* ffmpeg only: AV_CODEC_ID_AVRP */
++    /* ffmpeg only: AV_CODEC_ID_012V */
++    /* ffmpeg only: AV_CODEC_ID_AVUI */
++    /* ffmpeg only: AV_CODEC_ID_TARGA_Y216 */
++    /* ffmpeg only: AV_CODEC_ID_V308 */
++    /* ffmpeg only: AV_CODEC_ID_V408 */
++    /* ffmpeg only: AV_CODEC_ID_YUV4 */
++    /* ffmpeg only: AV_CODEC_ID_SANM */
++    /* ffmpeg only: AV_CODEC_ID_PAF_VIDEO */
++    /* ffmpeg only: AV_CODEC_ID_AVRN */
++    /* ffmpeg only: AV_CODEC_ID_CPIA */
++    /* ffmpeg only: AV_CODEC_ID_XFACE */
++    /* ffmpeg only: AV_CODEC_ID_SGIRLE */
++    /* ffmpeg only: AV_CODEC_ID_MVC1 */
++    /* ffmpeg only: AV_CODEC_ID_MVC2 */
++    /* ffmpeg only: AV_CODEC_ID_SNOW */
++    /* ffmpeg only: AV_CODEC_ID_SMVJPEG */
++
++#if LIBAVCODEC_VERSION_CHECK( 57, 999, 999, 24, 102 )
++    { VLC_CODEC_CINEFORM, AV_CODEC_ID_CFHD },
++#endif
++
++#if LIBAVCODEC_VERSION_CHECK( 57, 999, 999, 70, 100 )
++    { VLC_CODEC_PIXLET, AV_CODEC_ID_PIXLET },
++#endif
++
++#if LIBAVCODEC_VERSION_CHECK( 57, 999, 999, 71, 101 )
++    { VLC_CODEC_SPEEDHQ, AV_CODEC_ID_SPEEDHQ },
++#endif
++
++#if LIBAVCODEC_VERSION_CHECK( 57, 999, 999, 79, 100 )
++    { VLC_CODEC_FMVC, AV_CODEC_ID_FMVC },
++#endif
++};
++
++// *** Really we should probably use GetFfmpegCodec with a pre-kludge for the bits we care about
++static bool
++ZcGetFfmpegCodec( enum es_format_category_e cat, vlc_fourcc_t i_fourcc,
++                     unsigned *pi_ffmpeg_codec, const char **ppsz_name )
++{
++    const struct vlc_avcodec_fourcc *base;
++    size_t count;
++
++    base = video_codecs;
++    count = ARRAY_SIZE(video_codecs);
++    i_fourcc = vlc_fourcc_GetCodec( cat, i_fourcc );
++
++    for( size_t i = 0; i < count; i++ )
++    {
++        if( base[i].i_fourcc == i_fourcc )
++        {
++            if( pi_ffmpeg_codec != NULL )
++                *pi_ffmpeg_codec = base[i].i_codec;
++            if( ppsz_name )
++                *ppsz_name = vlc_fourcc_GetDescription( cat, i_fourcc );
++            return true;
++        }
++    }
++    return false;
++}
++
++
++
++//============================================================================
++// Derived from codec/avcodec/avcodec.c
++
++static AVCodecContext *
++ZcFfmpeg_AllocContext( decoder_t *p_dec,
++                                     const AVCodec **restrict codecp )
++{
++    unsigned i_codec_id;
++    const char *psz_namecodec;
++    const AVCodec *p_codec = NULL;
++
++    /* *** determine codec type *** */
++    if( !ZcGetFfmpegCodec( p_dec->fmt_in.i_cat, p_dec->fmt_in.i_codec,
++                         &i_codec_id, &psz_namecodec ) )
++         return NULL;
++
++    msg_Dbg( p_dec, "using %s %s", AVPROVIDER(LIBAVCODEC), LIBAVCODEC_IDENT );
++
++    /* Initialization must be done before avcodec_find_decoder() */
++    vlc_init_avcodec(VLC_OBJECT(p_dec));
++
++    /* *** ask ffmpeg for a decoder *** */
++    char *psz_decoder = var_InheritString( p_dec, "avcodec-codec" );
++    if( psz_decoder != NULL )
++    {
++        p_codec = avcodec_find_decoder_by_name( psz_decoder );
++        if( !p_codec )
++            msg_Err( p_dec, "Decoder `%s' not found", psz_decoder );
++        else if( p_codec->id != i_codec_id )
++        {
++            msg_Err( p_dec, "Decoder `%s' can't handle %4.4s",
++                    psz_decoder, (char*)&p_dec->fmt_in.i_codec );
++            p_codec = NULL;
++        }
++        free( psz_decoder );
++    }
++    if( !p_codec )
++//        p_codec = avcodec_find_decoder( i_codec_id );
++    {
++        if( p_dec->fmt_in.i_codec != VLC_CODEC_HEVC )
++            p_codec = avcodec_find_decoder(i_codec_id);
++        else
++        {
++            psz_namecodec = rpi_is_model_pi4() ? "hevc" : "hevc_rpi";
++            msg_Info(p_dec, "Looking for HEVC decoder '%s'", psz_namecodec);
++            p_codec = avcodec_find_decoder_by_name(psz_namecodec);
++        }
++    }
++
++    if( !p_codec )
++    {
++        msg_Dbg( p_dec, "codec not found (%s)", psz_namecodec );
++        return NULL;
++    }
++
++    *codecp = p_codec;
++
++    /* *** get a p_context *** */
++    AVCodecContext *avctx = avcodec_alloc_context3(p_codec);
++    if( unlikely(avctx == NULL) )
++        return NULL;
++
++    avctx->debug = var_InheritInteger( p_dec, "avcodec-debug" );
++    avctx->opaque = p_dec;
++    return avctx;
++}
++
++/*****************************************************************************
++ * ffmpeg_OpenCodec:
++ *****************************************************************************/
++
++static int
++ZcFfmpeg_OpenCodec( decoder_t *p_dec, AVCodecContext *ctx,
++                      const AVCodec *codec )
++{
++    char *psz_opts = var_InheritString( p_dec, "avcodec-options" );
++    AVDictionary *options = NULL;
++    int ret;
++
++    if (psz_opts) {
++        vlc_av_get_options(psz_opts, &options);
++        free(psz_opts);
++    }
++
++    if (av_rpi_zc_init2(ctx, p_dec, zc_alloc_buf, zc_free_pool) != 0)
++    {
++        msg_Err(p_dec, "Failed to init AV ZC");
++        return VLC_EGENERIC;
++    }
++
++    vlc_avcodec_lock();
++    ret = avcodec_open2( ctx, codec, options ? &options : NULL );
++    vlc_avcodec_unlock();
++
++    AVDictionaryEntry *t = NULL;
++    while ((t = av_dict_get(options, "", t, AV_DICT_IGNORE_SUFFIX))) {
++        msg_Err( p_dec, "Unknown option \"%s\"", t->key );
++    }
++    av_dict_free(&options);
++
++    if( ret < 0 )
++    {
++        msg_Err( p_dec, "cannot start codec (%s)", codec->name );
++        return VLC_EGENERIC;
++    }
++
++    msg_Dbg( p_dec, "codec (%s) started", codec->name );
++    return VLC_SUCCESS;
++}
++
++//============================================================================
++// Derived from 3.0.7.1 codec/avcodec/video.c
++
++static inline void wait_mt(decoder_sys_t *sys)
++{
++#if 1
++    // As we only ever update the output in our main thread this lock is
++    // redundant
++    VLC_UNUSED(sys);
++#else
++    vlc_sem_wait(&sys->sem_mt);
++#endif
++}
++
++static inline void post_mt(decoder_sys_t *sys)
++{
++#if 1
++    // As we only ever update the output in our main thread this lock is
++    // redundant
++    VLC_UNUSED(sys);
++#else
++    vlc_sem_post(&sys->sem_mt);
++#endif
++}
++
++/*****************************************************************************
++ * Local prototypes
++ *****************************************************************************/
++static void ffmpeg_InitCodec      ( decoder_t * );
++static int  DecodeVideo( decoder_t *, block_t * );
++static void Flush( decoder_t * );
++
++static uint32_t ffmpeg_CodecTag( vlc_fourcc_t fcc )
++{
++    uint8_t *p = (uint8_t*)&fcc;
++    return p[0] | (p[1] << 8) | (p[2] << 16) | (p[3] << 24);
++}
++
++/*****************************************************************************
++ * Local Functions
++ *****************************************************************************/
++
++/**
++ * Sets the decoder output format.
++ */
++static int lavc_GetVideoFormat(decoder_t *dec, video_format_t *restrict fmt,
++                               AVCodecContext *ctx, enum AVPixelFormat pix_fmt,
++                               enum AVPixelFormat sw_pix_fmt)
++{
++    int width = ctx->coded_width;
++    int height = ctx->coded_height;
++
++    video_format_Init(fmt, 0);
++
++#if 1
++    VLC_UNUSED(sw_pix_fmt);
++    if ((fmt->i_chroma = ZcFindVlcChroma(pix_fmt)) == 0)
++        return -1;
++#else
++    if (pix_fmt == sw_pix_fmt)
++    {   /* software decoding */
++        int aligns[AV_NUM_DATA_POINTERS];
++
++        if (GetVlcChroma(fmt, pix_fmt))
++            return -1;
++
++        /* The libavcodec palette can only be fetched when the first output
++         * frame is decoded. Assume that the current chroma is RGB32 while we
++         * are waiting for a valid palette. Indeed, fmt_out.video.p_palette
++         * doesn't trigger a new vout request, but a new chroma yes. */
++        if (pix_fmt == AV_PIX_FMT_PAL8 && !dec->fmt_out.video.p_palette)
++            fmt->i_chroma = VLC_CODEC_RGB32;
++
++        avcodec_align_dimensions2(ctx, &width, &height, aligns);
++    }
++    else /* hardware decoding */
++        fmt->i_chroma = vlc_va_GetChroma(pix_fmt, sw_pix_fmt);
++#endif
++
++    if( width == 0 || height == 0 || width > 8192 || height > 8192 ||
++        width < ctx->width || height < ctx->height )
++    {
++        msg_Err(dec, "Invalid frame size %dx%d vsz %dx%d",
++                     width, height, ctx->width, ctx->height );
++        return -1; /* invalid display size */
++    }
++
++    fmt->i_width = width;
++    fmt->i_height = height;
++    fmt->i_visible_width = ctx->width;
++    fmt->i_visible_height = ctx->height;
++
++    /* If an aspect-ratio was specified in the input format then force it */
++    if (dec->fmt_in.video.i_sar_num > 0 && dec->fmt_in.video.i_sar_den > 0)
++    {
++        fmt->i_sar_num = dec->fmt_in.video.i_sar_num;
++        fmt->i_sar_den = dec->fmt_in.video.i_sar_den;
++    }
++    else
++    {
++        fmt->i_sar_num = ctx->sample_aspect_ratio.num;
++        fmt->i_sar_den = ctx->sample_aspect_ratio.den;
++
++        if (fmt->i_sar_num == 0 || fmt->i_sar_den == 0)
++            fmt->i_sar_num = fmt->i_sar_den = 1;
++    }
++
++    if (dec->fmt_in.video.i_frame_rate > 0
++     && dec->fmt_in.video.i_frame_rate_base > 0)
++    {
++        fmt->i_frame_rate = dec->fmt_in.video.i_frame_rate;
++        fmt->i_frame_rate_base = dec->fmt_in.video.i_frame_rate_base;
++    }
++    else if (ctx->framerate.num > 0 && ctx->framerate.den > 0)
++    {
++        fmt->i_frame_rate = ctx->framerate.num;
++        fmt->i_frame_rate_base = ctx->framerate.den;
++# if LIBAVCODEC_VERSION_MICRO <  100
++        // for some reason libav don't thinkg framerate presents actually same thing as in ffmpeg
++        fmt->i_frame_rate_base *= __MAX(ctx->ticks_per_frame, 1);
++# endif
++    }
++    else if (ctx->time_base.num > 0 && ctx->time_base.den > 0)
++    {
++        fmt->i_frame_rate = ctx->time_base.den;
++        fmt->i_frame_rate_base = ctx->time_base.num
++                                 * __MAX(ctx->ticks_per_frame, 1);
++    }
++
++    /* FIXME we should only set the known values and let the core decide
++     * later of fallbacks, but we can't do that with a boolean */
++    switch ( ctx->color_range )
++    {
++    case AVCOL_RANGE_JPEG:
++        fmt->b_color_range_full = true;
++        break;
++    case AVCOL_RANGE_UNSPECIFIED:
++        fmt->b_color_range_full = !vlc_fourcc_IsYUV( fmt->i_chroma );
++        break;
++    case AVCOL_RANGE_MPEG:
++    default:
++        fmt->b_color_range_full = false;
++        break;
++    }
++
++    switch( ctx->colorspace )
++    {
++        case AVCOL_SPC_BT709:
++            fmt->space = COLOR_SPACE_BT709;
++            break;
++        case AVCOL_SPC_SMPTE170M:
++        case AVCOL_SPC_BT470BG:
++            fmt->space = COLOR_SPACE_BT601;
++            break;
++        case AVCOL_SPC_BT2020_NCL:
++        case AVCOL_SPC_BT2020_CL:
++            fmt->space = COLOR_SPACE_BT2020;
++            break;
++        default:
++            break;
++    }
++
++    switch( ctx->color_trc )
++    {
++        case AVCOL_TRC_LINEAR:
++            fmt->transfer = TRANSFER_FUNC_LINEAR;
++            break;
++        case AVCOL_TRC_GAMMA22:
++            fmt->transfer = TRANSFER_FUNC_SRGB;
++            break;
++        case AVCOL_TRC_BT709:
++            fmt->transfer = TRANSFER_FUNC_BT709;
++            break;
++        case AVCOL_TRC_SMPTE170M:
++        case AVCOL_TRC_BT2020_10:
++        case AVCOL_TRC_BT2020_12:
++            fmt->transfer = TRANSFER_FUNC_BT2020;
++            break;
++#if LIBAVUTIL_VERSION_CHECK( 55, 14, 0, 31, 100)
++        case AVCOL_TRC_ARIB_STD_B67:
++            fmt->transfer = TRANSFER_FUNC_ARIB_B67;
++            break;
++#endif
++#if LIBAVUTIL_VERSION_CHECK( 55, 17, 0, 37, 100)
++        case AVCOL_TRC_SMPTE2084:
++            fmt->transfer = TRANSFER_FUNC_SMPTE_ST2084;
++            break;
++        case AVCOL_TRC_SMPTE240M:
++            fmt->transfer = TRANSFER_FUNC_SMPTE_240;
++            break;
++        case AVCOL_TRC_GAMMA28:
++            fmt->transfer = TRANSFER_FUNC_BT470_BG;
++            break;
++#endif
++        default:
++            break;
++    }
++
++    switch( ctx->color_primaries )
++    {
++        case AVCOL_PRI_BT709:
++            fmt->primaries = COLOR_PRIMARIES_BT709;
++            break;
++        case AVCOL_PRI_BT470BG:
++            fmt->primaries = COLOR_PRIMARIES_BT601_625;
++            break;
++        case AVCOL_PRI_SMPTE170M:
++        case AVCOL_PRI_SMPTE240M:
++            fmt->primaries = COLOR_PRIMARIES_BT601_525;
++            break;
++        case AVCOL_PRI_BT2020:
++            fmt->primaries = COLOR_PRIMARIES_BT2020;
++            break;
++        default:
++            break;
++    }
++
++    switch( ctx->chroma_sample_location )
++    {
++        case AVCHROMA_LOC_LEFT:
++            fmt->chroma_location = CHROMA_LOCATION_LEFT;
++            break;
++        case AVCHROMA_LOC_CENTER:
++            fmt->chroma_location = CHROMA_LOCATION_CENTER;
++            break;
++        case AVCHROMA_LOC_TOPLEFT:
++            fmt->chroma_location = CHROMA_LOCATION_TOP_LEFT;
++            break;
++        default:
++            break;
++    }
++
++    return 0;
++}
++
++static int lavc_UpdateVideoFormat(decoder_t *dec, AVCodecContext *ctx,
++                                  enum AVPixelFormat fmt,
++                                  enum AVPixelFormat swfmt)
++{
++    video_format_t fmt_out;
++    int val;
++#if TRACE_ALL
++    msg_Dbg(dec, "<<< %s", __func__);
++#endif
++    val = lavc_GetVideoFormat(dec, &fmt_out, ctx, fmt, swfmt);
++    if (val)
++    {
++        msg_Dbg(dec, "Failed to get format");
++        return val;
++    }
++
++    /* always have date in fields/ticks units */
++    if(dec->p_sys->pts.i_divider_num)
++        date_Change(&dec->p_sys->pts, fmt_out.i_frame_rate *
++                                      __MAX(ctx->ticks_per_frame, 1),
++                                      fmt_out.i_frame_rate_base);
++    else
++        date_Init(&dec->p_sys->pts, fmt_out.i_frame_rate *
++                                    __MAX(ctx->ticks_per_frame, 1),
++                                    fmt_out.i_frame_rate_base);
++
++    fmt_out.p_palette = dec-> fmt_out.video.p_palette;
++    dec->fmt_out.video.p_palette = NULL;
++
++    es_format_Change(&dec->fmt_out, VIDEO_ES, fmt_out.i_chroma);
++    dec->fmt_out.video = fmt_out;
++    dec->fmt_out.video.orientation = dec->fmt_in.video.orientation;
++    dec->fmt_out.video.projection_mode = dec->fmt_in.video.projection_mode;
++    dec->fmt_out.video.multiview_mode = dec->fmt_in.video.multiview_mode;
++    dec->fmt_out.video.pose = dec->fmt_in.video.pose;
++    if ( dec->fmt_in.video.mastering.max_luminance )
++        dec->fmt_out.video.mastering = dec->fmt_in.video.mastering;
++    dec->fmt_out.video.lighting = dec->fmt_in.video.lighting;
++
++    val = decoder_UpdateVideoFormat(dec);
++#if TRACE_ALL
++    msg_Dbg(dec, ">>> %s: rv=%d", __func__, val);
++#endif
++    return val;
++}
++
++static int OpenVideoCodec( decoder_t *p_dec )
++{
++    decoder_sys_t *p_sys = p_dec->p_sys;
++    AVCodecContext *ctx = p_sys->p_context;
++    const AVCodec *codec = p_sys->p_codec;
++    int ret;
++
++    if( ctx->extradata_size <= 0 )
++    {
++        if( codec->id == AV_CODEC_ID_VC1 ||
++            codec->id == AV_CODEC_ID_THEORA )
++        {
++            msg_Warn( p_dec, "waiting for extra data for codec %s",
++                      codec->name );
++            return 1;
++        }
++    }
++
++    ctx->width  = p_dec->fmt_in.video.i_visible_width;
++    ctx->height = p_dec->fmt_in.video.i_visible_height;
++
++    ctx->coded_width = p_dec->fmt_in.video.i_width;
++    ctx->coded_height = p_dec->fmt_in.video.i_height;
++
++    ctx->bits_per_coded_sample = p_dec->fmt_in.video.i_bits_per_pixel;
++    p_sys->pix_fmt = AV_PIX_FMT_NONE;
++    p_sys->profile = -1;
++    p_sys->level = -1;
++    cc_Init( &p_sys->cc );
++
++    set_video_color_settings( &p_dec->fmt_in.video, ctx );
++    if( p_dec->fmt_in.video.i_frame_rate_base &&
++        p_dec->fmt_in.video.i_frame_rate &&
++        (double) p_dec->fmt_in.video.i_frame_rate /
++                 p_dec->fmt_in.video.i_frame_rate_base < 6 )
++    {
++        ctx->flags |= AV_CODEC_FLAG_LOW_DELAY;
++    }
++
++    post_mt( p_sys );
++    ret = ZcFfmpeg_OpenCodec( p_dec, ctx, codec );
++    wait_mt( p_sys );
++    if( ret < 0 )
++        return ret;
++
++    switch( ctx->active_thread_type )
++    {
++        case FF_THREAD_FRAME:
++            msg_Dbg( p_dec, "using frame thread mode with %d threads",
++                     ctx->thread_count );
++            break;
++        case FF_THREAD_SLICE:
++            msg_Dbg( p_dec, "using slice thread mode with %d threads",
++                     ctx->thread_count );
++            break;
++        case 0:
++            if( ctx->thread_count > 1 )
++                msg_Warn( p_dec, "failed to enable threaded decoding" );
++            break;
++        default:
++            msg_Warn( p_dec, "using unknown thread mode with %d threads",
++                      ctx->thread_count );
++            break;
++    }
++    return 0;
++}
++
++/*****************************************************************************
++ * InitVideo: initialize the video decoder
++ *****************************************************************************
++ * the ffmpeg codec will be opened, some memory allocated. The vout is not yet
++ * opened (done after the first decoded frame).
++ *****************************************************************************/
++static int MmalAvcodecOpenDecoder( vlc_object_t *obj )
++{
++    decoder_t *p_dec = (decoder_t *)obj;
++    const AVCodec *p_codec;
++
++    int extra_buffers = var_InheritInteger(p_dec, MMAL_AVCODEC_BUFFERS);
++
++    if (extra_buffers < 0)
++    {
++        extra_buffers = p_dec->fmt_in.video.i_height * p_dec->fmt_in.video.i_width >= 1920 * 1088 ?
++            BUFFERS_IN_FLIGHT_UHD : BUFFERS_IN_FLIGHT;
++    }
++
++    if (extra_buffers <= 0)
++    {
++        msg_Dbg(p_dec, "%s: extra_buffers=%d - cannot use module", __func__, extra_buffers);
++        return VLC_EGENERIC;
++    }
++
++    const vcsm_init_type_t vcsm_type = cma_vcsm_init();
++    const int vcsm_size =
++        vcsm_type == VCSM_INIT_LEGACY ? hw_mmal_get_gpu_mem() : 512 << 20;
++
++#if 1
++    {
++        char buf1[5], buf2[5], buf2a[5];
++        char buf3[5], buf4[5];
++        uint32_t in_fcc = 0;
++        msg_Dbg(p_dec, "%s: <<< (%s/%s)[%s] %dx%d -> (%s/%s) %dx%d [%s/%d] xb:%d", __func__,
++                str_fourcc(buf1, p_dec->fmt_in.i_codec),
++                str_fourcc(buf2, p_dec->fmt_in.video.i_chroma),
++                str_fourcc(buf2a, in_fcc),
++                p_dec->fmt_in.video.i_width, p_dec->fmt_in.video.i_height,
++                str_fourcc(buf3, p_dec->fmt_out.i_codec),
++                str_fourcc(buf4, p_dec->fmt_out.video.i_chroma),
++                p_dec->fmt_out.video.i_width, p_dec->fmt_out.video.i_height,
++                cma_vcsm_init_str(vcsm_type), vcsm_size, extra_buffers);
++    }
++#endif
++
++    if( vcsm_type == VCSM_INIT_NONE )
++        return VLC_EGENERIC;
++#if 1
++    if( (p_dec->fmt_in.i_codec != VLC_CODEC_HEVC &&
++         (vcsm_type == VCSM_INIT_CMA || vcsm_size < (96 << 20))) ||
++        (p_dec->fmt_in.i_codec == VLC_CODEC_HEVC &&
++         vcsm_size < (128 << 20)))
++    {
++        cma_vcsm_exit(vcsm_type);
++        return VLC_EGENERIC;
++    }
++#endif
++
++    AVCodecContext *p_context = ZcFfmpeg_AllocContext( p_dec, &p_codec );
++    if( p_context == NULL )
++    {
++        cma_vcsm_exit(vcsm_type);
++        return VLC_EGENERIC;
++    }
++
++    int i_val;
++
++    /* Allocate the memory needed to store the decoder's structure */
++    decoder_sys_t *p_sys = calloc( 1, sizeof(*p_sys) );
++    if( unlikely(p_sys == NULL) )
++    {
++        avcodec_free_context( &p_context );
++        cma_vcsm_exit(vcsm_type);
++        return VLC_ENOMEM;
++    }
++
++    p_dec->p_sys = p_sys;
++    p_sys->p_context = p_context;
++    p_sys->p_codec = p_codec;
++    p_sys->p_dec = p_dec;
++//    p_sys->p_va = NULL;
++    p_sys->cma_in_flight_max = extra_buffers;
++    p_sys->vcsm_init_type = vcsm_type;
++    vlc_sem_init( &p_sys->sem_mt, 0 );
++
++    /* ***** Fill p_context with init values ***** */
++    p_context->codec_tag = ffmpeg_CodecTag( p_dec->fmt_in.i_original_fourcc ?
++                                p_dec->fmt_in.i_original_fourcc : p_dec->fmt_in.i_codec );
++
++    /*  ***** Get configuration of ffmpeg plugin ***** */
++    p_context->workaround_bugs =
++        var_InheritInteger( p_dec, "avcodec-workaround-bugs" );
++    p_context->err_recognition =
++        var_InheritInteger( p_dec, "avcodec-error-resilience" );
++
++    if( var_CreateGetBool( p_dec, "grayscale" ) )
++        p_context->flags |= AV_CODEC_FLAG_GRAY;
++
++    /* ***** Output always the frames ***** */
++    p_context->flags |= AV_CODEC_FLAG_OUTPUT_CORRUPT;
++
++    i_val = var_CreateGetInteger( p_dec, "avcodec-skiploopfilter" );
++    if( i_val >= 4 ) p_context->skip_loop_filter = AVDISCARD_ALL;
++    else if( i_val == 3 ) p_context->skip_loop_filter = AVDISCARD_NONKEY;
++    else if( i_val == 2 ) p_context->skip_loop_filter = AVDISCARD_BIDIR;
++    else if( i_val == 1 ) p_context->skip_loop_filter = AVDISCARD_NONREF;
++    else p_context->skip_loop_filter = AVDISCARD_DEFAULT;
++
++    if( var_CreateGetBool( p_dec, "avcodec-fast" ) )
++        p_context->flags2 |= AV_CODEC_FLAG2_FAST;
++
++    /* ***** libavcodec frame skipping ***** */
++    p_sys->b_hurry_up = var_CreateGetBool( p_dec, "avcodec-hurry-up" );
++    p_sys->b_show_corrupted = var_CreateGetBool( p_dec, "avcodec-corrupted" );
++
++    i_val = var_CreateGetInteger( p_dec, "avcodec-skip-frame" );
++    if( i_val >= 4 ) p_sys->i_skip_frame = AVDISCARD_ALL;
++    else if( i_val == 3 ) p_sys->i_skip_frame = AVDISCARD_NONKEY;
++    else if( i_val == 2 ) p_sys->i_skip_frame = AVDISCARD_BIDIR;
++    else if( i_val == 1 ) p_sys->i_skip_frame = AVDISCARD_NONREF;
++    else if( i_val == -1 ) p_sys->i_skip_frame = AVDISCARD_NONE;
++    else p_sys->i_skip_frame = AVDISCARD_DEFAULT;
++    p_context->skip_frame = p_sys->i_skip_frame;
++
++    i_val = var_CreateGetInteger( p_dec, "avcodec-skip-idct" );
++    if( i_val >= 4 ) p_context->skip_idct = AVDISCARD_ALL;
++    else if( i_val == 3 ) p_context->skip_idct = AVDISCARD_NONKEY;
++    else if( i_val == 2 ) p_context->skip_idct = AVDISCARD_BIDIR;
++    else if( i_val == 1 ) p_context->skip_idct = AVDISCARD_NONREF;
++    else if( i_val == -1 ) p_context->skip_idct = AVDISCARD_NONE;
++    else p_context->skip_idct = AVDISCARD_DEFAULT;
++
++    /* ***** libavcodec direct rendering ***** */
++    p_sys->b_direct_rendering = false;
++    atomic_init(&p_sys->b_dr_failure, false);
++    if( var_CreateGetBool( p_dec, "avcodec-dr" ) &&
++       (p_codec->capabilities & AV_CODEC_CAP_DR1) &&
++        /* No idea why ... but this fixes flickering on some TSCC streams */
++        p_sys->p_codec->id != AV_CODEC_ID_TSCC &&
++        p_sys->p_codec->id != AV_CODEC_ID_CSCD &&
++        p_sys->p_codec->id != AV_CODEC_ID_CINEPAK )
++    {
++        /* Some codecs set pix_fmt only after the 1st frame has been decoded,
++         * so we need to do another check in ffmpeg_GetFrameBuf() */
++        p_sys->b_direct_rendering = true;
++    }
++
++    p_context->get_format = ZcGetFormat;
++#if 0
++    p_context->get_format = ffmpeg_GetFormat;
++    /* Always use our get_buffer wrapper so we can calculate the
++     * PTS correctly */
++    p_context->get_buffer2 = lavc_GetFrame;
++    p_context->opaque = p_dec;
++#endif
++
++    int i_thread_count = var_InheritInteger( p_dec, "avcodec-threads" );
++    if( i_thread_count <= 0 )
++#if 1
++    {
++        // Pick 5 threads for everything on Pi except for HEVC where the h/w
++        // really limits the useful size to 3
++        i_thread_count = p_codec->id == AV_CODEC_ID_HEVC ? 3 : 5;
++    }
++#else
++    {
++        i_thread_count = vlc_GetCPUCount();
++        if( i_thread_count > 1 )
++            i_thread_count++;
++
++        //FIXME: take in count the decoding time
++#if VLC_WINSTORE_APP
++        i_thread_count = __MIN( i_thread_count, 6 );
++#else
++        i_thread_count = __MIN( i_thread_count, p_codec->id == AV_CODEC_ID_HEVC ? 10 : 6 );
++#endif
++    }
++    i_thread_count = __MIN( i_thread_count, p_codec->id == AV_CODEC_ID_HEVC ? 32 : 16 );
++#endif
++    msg_Dbg( p_dec, "allowing %d thread(s) for decoding", i_thread_count );
++    p_context->thread_count = i_thread_count;
++    p_context->thread_safe_callbacks = true;
++
++    switch( p_codec->id )
++    {
++        case AV_CODEC_ID_MPEG4:
++        case AV_CODEC_ID_H263:
++            p_context->thread_type = 0;
++            break;
++        case AV_CODEC_ID_MPEG1VIDEO:
++        case AV_CODEC_ID_MPEG2VIDEO:
++            p_context->thread_type &= ~FF_THREAD_SLICE;
++            /* fall through */
++# if (LIBAVCODEC_VERSION_INT < AV_VERSION_INT(55, 1, 0))
++        case AV_CODEC_ID_H264:
++        case AV_CODEC_ID_VC1:
++        case AV_CODEC_ID_WMV3:
++            p_context->thread_type &= ~FF_THREAD_FRAME;
++# endif
++        default:
++            break;
++    }
++
++    if( p_context->thread_type & FF_THREAD_FRAME )
++        p_dec->i_extra_picture_buffers = 2 * p_context->thread_count;
++
++    /* ***** misc init ***** */
++    date_Init(&p_sys->pts, 1, 30001);
++    date_Set(&p_sys->pts, VLC_TS_INVALID);
++    p_sys->b_first_frame = true;
++    p_sys->i_late_frames = 0;
++    p_sys->b_from_preroll = false;
++
++    /* Set output properties */
++    if( ZcGetVlcChroma( &p_dec->fmt_out.video, p_context->pix_fmt ) != VLC_SUCCESS )
++    {
++        /* we are doomed. but not really, because most codecs set their pix_fmt later on */
++//        p_dec->fmt_out.i_codec = VLC_CODEC_I420;
++        p_dec->fmt_out.i_codec = VLC_CODEC_MMAL_ZC_I420;
++    }
++    p_dec->fmt_out.i_codec = p_dec->fmt_out.video.i_chroma;
++
++    p_dec->fmt_out.video.orientation = p_dec->fmt_in.video.orientation;
++
++    if( p_dec->fmt_in.video.p_palette ) {
++        p_sys->palette_sent = false;
++        p_dec->fmt_out.video.p_palette = malloc( sizeof(video_palette_t) );
++        if( p_dec->fmt_out.video.p_palette )
++            *p_dec->fmt_out.video.p_palette = *p_dec->fmt_in.video.p_palette;
++    } else
++        p_sys->palette_sent = true;
++
++    if ((p_sys->cma_pool = cma_buf_pool_new(p_sys->cma_in_flight_max, p_sys->cma_in_flight_max, false, "mmal_avcodec")) == NULL)
++    {
++        msg_Err(p_dec, "CMA pool alloc failure");
++        goto fail;
++    }
++
++    /* ***** init this codec with special data ***** */
++    ffmpeg_InitCodec( p_dec );
++
++    /* ***** Open the codec ***** */
++    if( OpenVideoCodec( p_dec ) < 0 )
++    {
++        vlc_sem_destroy( &p_sys->sem_mt );
++        free( p_sys );
++        avcodec_free_context( &p_context );
++        return VLC_EGENERIC;
++    }
++
++    p_dec->pf_decode = DecodeVideo;
++    p_dec->pf_flush  = Flush;
++
++    /* XXX: Writing input format makes little sense. */
++    if( p_context->profile != FF_PROFILE_UNKNOWN )
++        p_dec->fmt_in.i_profile = p_context->profile;
++    if( p_context->level != FF_LEVEL_UNKNOWN )
++        p_dec->fmt_in.i_level = p_context->level;
++
++#if 1
++    // Most of the time we have nothing useful by way of a format here
++    // wait till we've decoded something
++#else
++    // Update output format
++    if (lavc_UpdateVideoFormat(p_dec, p_context, p_context->pix_fmt,
++                               p_context->pix_fmt) != 0)
++    {
++        msg_Err(p_dec, "Unable to update format: pix_fmt=%d", p_context->pix_fmt);
++//        goto fail;
++    }
++#endif
++
++#if TRACE_ALL
++    msg_Dbg(p_dec, "<<< %s: OK", __func__);
++#endif
++    return VLC_SUCCESS;
++
++fail:
++    MmalAvcodecCloseDecoder(VLC_OBJECT(p_dec));
++
++#if TRACE_ALL
++    msg_Dbg(p_dec, "<<< %s: FAIL", __func__);
++#endif
++
++    return VLC_EGENERIC;
++}
++
++/*****************************************************************************
++ * Flush:
++ *****************************************************************************/
++static void Flush( decoder_t *p_dec )
++{
++    decoder_sys_t *p_sys = p_dec->p_sys;
++    AVCodecContext *p_context = p_sys->p_context;
++
++#if TRACE_ALL
++    msg_Dbg(p_dec, "<<< %s", __func__);
++#endif
++
++    date_Set(&p_sys->pts, VLC_TS_INVALID); /* To make sure we recover properly */
++    p_sys->i_late_frames = 0;
++    cc_Flush( &p_sys->cc );
++
++    /* Abort pictures in order to unblock all avcodec workers threads waiting
++     * for a picture. This will avoid a deadlock between avcodec_flush_buffers
++     * and workers threads */
++// It would probably be good to use AbortPicture but that often deadlocks on close
++// and given that we wait for pics in the main thread it should be unneeded (whereas
++// cma is alloced in the depths of ffmpeg on its own threads)
++//    decoder_AbortPictures( p_dec, true );
++    cma_buf_pool_cancel(p_sys->cma_pool);
++
++    post_mt( p_sys );
++    /* do not flush buffers if codec hasn't been opened (theora/vorbis/VC1) */
++    if( avcodec_is_open( p_context ) )
++        avcodec_flush_buffers( p_context );
++    wait_mt( p_sys );
++
++    /* Reset cancel state to false */
++    cma_buf_pool_uncancel(p_sys->cma_pool);
++//    decoder_AbortPictures( p_dec, false );
++
++#if TRACE_ALL
++    msg_Dbg(p_dec, ">>> %s", __func__);
++#endif
++
++}
++
++static bool check_block_validity( decoder_sys_t *p_sys, block_t *block )
++{
++    if( !block)
++        return true;
++
++    if( block->i_flags & (BLOCK_FLAG_DISCONTINUITY|BLOCK_FLAG_CORRUPTED) )
++    {
++        date_Set( &p_sys->pts, VLC_TS_INVALID ); /* To make sure we recover properly */
++        cc_Flush( &p_sys->cc );
++
++        p_sys->i_late_frames = 0;
++        if( block->i_flags & BLOCK_FLAG_CORRUPTED )
++        {
++            block_Release( block );
++            return false;
++        }
++    }
++    return true;
++}
++
++static bool check_block_being_late( decoder_sys_t *p_sys, block_t *block, mtime_t current_time)
++{
++    if( !block )
++        return false;
++    if( block->i_flags & BLOCK_FLAG_PREROLL )
++    {
++        /* Do not care about late frames when prerolling
++         * TODO avoid decoding of non reference frame
++         * (ie all B except for H264 where it depends only on nal_ref_idc) */
++        p_sys->i_late_frames = 0;
++        p_sys->b_from_preroll = true;
++        p_sys->i_last_late_delay = INT64_MAX;
++    }
++
++    if( p_sys->i_late_frames <= 0 )
++        return false;
++
++    if( current_time - p_sys->i_late_frames_start > (5*CLOCK_FREQ))
++    {
++        date_Set( &p_sys->pts, VLC_TS_INVALID ); /* To make sure we recover properly */
++        block_Release( block );
++        p_sys->i_late_frames--;
++        return true;
++    }
++    return false;
++}
++
++static bool check_frame_should_be_dropped( decoder_sys_t *p_sys, AVCodecContext *p_context, bool *b_need_output_picture )
++{
++    if( p_sys->i_late_frames <= 4)
++        return false;
++
++    *b_need_output_picture = false;
++    if( p_sys->i_late_frames < 12 )
++    {
++        p_context->skip_frame =
++                (p_sys->i_skip_frame <= AVDISCARD_NONREF) ?
++                AVDISCARD_NONREF : p_sys->i_skip_frame;
++    }
++    else
++    {
++        /* picture too late, won't decode
++         * but break picture until a new I, and for mpeg4 ...*/
++        p_sys->i_late_frames--; /* needed else it will never be decrease */
++        return true;
++    }
++    return false;
++}
++
++static mtime_t interpolate_next_pts( decoder_t *p_dec, AVFrame *frame )
++{
++    decoder_sys_t *p_sys = p_dec->p_sys;
++    AVCodecContext *p_context = p_sys->p_context;
++
++    if( date_Get( &p_sys->pts ) == VLC_TS_INVALID ||
++        p_sys->pts.i_divider_num == 0 )
++        return VLC_TS_INVALID;
++
++    int i_tick = p_context->ticks_per_frame;
++    if( i_tick <= 0 )
++        i_tick = 1;
++
++    /* interpolate the next PTS */
++    return date_Increment( &p_sys->pts, i_tick + frame->repeat_pict );
++}
++
++static void update_late_frame_count( decoder_t *p_dec, block_t *p_block,
++                                     mtime_t current_time, mtime_t i_pts,
++                                     mtime_t i_next_pts )
++{
++    decoder_sys_t *p_sys = p_dec->p_sys;
++   /* Update frame late count (except when doing preroll) */
++   mtime_t i_display_date = VLC_TS_INVALID;
++   if( !p_block || !(p_block->i_flags & BLOCK_FLAG_PREROLL) )
++       i_display_date = decoder_GetDisplayDate( p_dec, i_pts );
++
++   mtime_t i_threshold = i_next_pts != VLC_TS_INVALID ? (i_next_pts - i_pts) / 2 : 20000;
++
++   if( i_display_date > VLC_TS_INVALID && i_display_date + i_threshold <= current_time )
++   {
++       /* Out of preroll, consider only late frames on rising delay */
++       if( p_sys->b_from_preroll )
++       {
++           if( p_sys->i_last_late_delay > current_time - i_display_date )
++           {
++               p_sys->i_last_late_delay = current_time - i_display_date;
++               return;
++           }
++           p_sys->b_from_preroll = false;
++       }
++
++       p_sys->i_late_frames++;
++       if( p_sys->i_late_frames == 1 )
++           p_sys->i_late_frames_start = current_time;
++
++   }
++   else
++   {
++       p_sys->i_late_frames = 0;
++   }
++}
++
++
++static int DecodeSidedata( decoder_t *p_dec, const AVFrame *frame, picture_t *p_pic )
++{
++    decoder_sys_t *p_sys = p_dec->p_sys;
++    bool format_changed = false;
++
++#if (LIBAVUTIL_VERSION_MICRO >= 100 && LIBAVUTIL_VERSION_INT >= AV_VERSION_INT( 55, 16, 101 ) )
++#define FROM_AVRAT(default_factor, avrat) \
++(uint64_t)(default_factor) * (avrat).num / (avrat).den
++    const AVFrameSideData *metadata =
++            av_frame_get_side_data( frame,
++                                    AV_FRAME_DATA_MASTERING_DISPLAY_METADATA );
++    if ( metadata )
++    {
++        const AVMasteringDisplayMetadata *hdr_meta =
++                (const AVMasteringDisplayMetadata *) metadata->data;
++        if ( hdr_meta->has_luminance )
++        {
++#define ST2086_LUMA_FACTOR 10000
++            p_pic->format.mastering.max_luminance =
++                    FROM_AVRAT(ST2086_LUMA_FACTOR, hdr_meta->max_luminance);
++            p_pic->format.mastering.min_luminance =
++                    FROM_AVRAT(ST2086_LUMA_FACTOR, hdr_meta->min_luminance);
++        }
++        if ( hdr_meta->has_primaries )
++        {
++#define ST2086_RED   2
++#define ST2086_GREEN 0
++#define ST2086_BLUE  1
++#define LAV_RED    0
++#define LAV_GREEN  1
++#define LAV_BLUE   2
++#define ST2086_PRIM_FACTOR 50000
++            p_pic->format.mastering.primaries[ST2086_RED*2   + 0] =
++                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_RED][0]);
++            p_pic->format.mastering.primaries[ST2086_RED*2   + 1] =
++                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_RED][1]);
++            p_pic->format.mastering.primaries[ST2086_GREEN*2 + 0] =
++                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_GREEN][0]);
++            p_pic->format.mastering.primaries[ST2086_GREEN*2 + 1] =
++                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_GREEN][1]);
++            p_pic->format.mastering.primaries[ST2086_BLUE*2  + 0] =
++                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_BLUE][0]);
++            p_pic->format.mastering.primaries[ST2086_BLUE*2  + 1] =
++                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->display_primaries[LAV_BLUE][1]);
++            p_pic->format.mastering.white_point[0] =
++                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->white_point[0]);
++            p_pic->format.mastering.white_point[1] =
++                    FROM_AVRAT(ST2086_PRIM_FACTOR, hdr_meta->white_point[1]);
++        }
++
++        if ( memcmp( &p_dec->fmt_out.video.mastering,
++                     &p_pic->format.mastering,
++                     sizeof(p_pic->format.mastering) ) )
++        {
++            p_dec->fmt_out.video.mastering = p_pic->format.mastering;
++            format_changed = true;
++        }
++#undef FROM_AVRAT
++    }
++#endif
++#if (LIBAVUTIL_VERSION_MICRO >= 100 && LIBAVUTIL_VERSION_INT >= AV_VERSION_INT( 55, 60, 100 ) )
++    const AVFrameSideData *metadata_lt =
++            av_frame_get_side_data( frame,
++                                    AV_FRAME_DATA_CONTENT_LIGHT_LEVEL );
++    if ( metadata_lt )
++    {
++        const AVContentLightMetadata *light_meta =
++                (const AVContentLightMetadata *) metadata_lt->data;
++        p_pic->format.lighting.MaxCLL = light_meta->MaxCLL;
++        p_pic->format.lighting.MaxFALL = light_meta->MaxFALL;
++        if ( memcmp( &p_dec->fmt_out.video.lighting,
++                     &p_pic->format.lighting,
++                     sizeof(p_pic->format.lighting) ) )
++        {
++            p_dec->fmt_out.video.lighting  = p_pic->format.lighting;
++            format_changed = true;
++        }
++    }
++#endif
++
++    if (format_changed && decoder_UpdateVideoFormat( p_dec ))
++        return -1;
++
++    const AVFrameSideData *p_avcc = av_frame_get_side_data( frame, AV_FRAME_DATA_A53_CC );
++    if( p_avcc )
++    {
++        cc_Extract( &p_sys->cc, CC_PAYLOAD_RAW, true, p_avcc->data, p_avcc->size );
++        if( p_sys->cc.b_reorder || p_sys->cc.i_data )
++        {
++            block_t *p_cc = block_Alloc( p_sys->cc.i_data );
++            if( p_cc )
++            {
++                memcpy( p_cc->p_buffer, p_sys->cc.p_data, p_sys->cc.i_data );
++                if( p_sys->cc.b_reorder )
++                    p_cc->i_dts = p_cc->i_pts = p_pic->date;
++                else
++                    p_cc->i_pts = p_cc->i_dts;
++                decoder_cc_desc_t desc;
++                desc.i_608_channels = p_sys->cc.i_608channels;
++                desc.i_708_channels = p_sys->cc.i_708channels;
++                desc.i_reorder_depth = 4;
++                decoder_QueueCc( p_dec, p_cc, &desc );
++            }
++            cc_Flush( &p_sys->cc );
++        }
++    }
++    return 0;
++}
++
++/*****************************************************************************
++ * DecodeBlock: Called to decode one or more frames
++ *****************************************************************************/
++
++static picture_t *DecodeBlock( decoder_t *p_dec, block_t **pp_block, bool *error )
++{
++    decoder_sys_t *p_sys = p_dec->p_sys;
++    AVCodecContext *p_context = p_sys->p_context;
++    /* Boolean if we assume that we should get valid pic as result */
++    bool b_need_output_picture = true;
++
++    /* Boolean for END_OF_SEQUENCE */
++    bool eos_spotted = false;
++
++#if TRACE_ALL
++    msg_Dbg(p_dec, "<<< %s: (buf_size=%d)", __func__, pp_block == NULL || *pp_block == NULL ? 0 : (*pp_block)->i_buffer);
++#endif
++
++    block_t *p_block;
++    mtime_t current_time;
++    picture_t *p_pic = NULL;
++    AVFrame *frame = NULL;
++
++    // By default we are OK
++    *error = false;
++
++    if( !p_context->extradata_size && p_dec->fmt_in.i_extra )
++    {
++        ffmpeg_InitCodec( p_dec );
++        if( !avcodec_is_open( p_context ) )
++            OpenVideoCodec( p_dec );
++    }
++
++    p_block = pp_block ? *pp_block : NULL;
++    if(!p_block && !(p_sys->p_codec->capabilities & AV_CODEC_CAP_DELAY) )
++        return NULL;
++
++    if( !avcodec_is_open( p_context ) )
++    {
++        if( p_block )
++            block_Release( p_block );
++        return NULL;
++    }
++
++    if( !check_block_validity( p_sys, p_block ) )
++        return NULL;
++
++    current_time = mdate();
++    if( p_dec->b_frame_drop_allowed &&  check_block_being_late( p_sys, p_block, current_time) )
++    {
++        msg_Err( p_dec, "more than 5 seconds of late video -> "
++                 "dropping frame (computer too slow ?)" );
++        return NULL;
++    }
++
++
++    /* A good idea could be to decode all I pictures and see for the other */
++
++    /* Defaults that if we aren't in prerolling, we want output picture
++       same for if we are flushing (p_block==NULL) */
++    if( !p_block || !(p_block->i_flags & BLOCK_FLAG_PREROLL) )
++        b_need_output_picture = true;
++    else
++        b_need_output_picture = false;
++
++    /* Change skip_frame config only if hurry_up is enabled */
++    if( p_sys->b_hurry_up )
++    {
++        p_context->skip_frame = p_sys->i_skip_frame;
++
++        /* Check also if we should/can drop the block and move to next block
++            as trying to catchup the speed*/
++        if( p_dec->b_frame_drop_allowed &&
++            check_frame_should_be_dropped( p_sys, p_context, &b_need_output_picture ) )
++        {
++            if( p_block )
++                block_Release( p_block );
++            msg_Warn( p_dec, "More than 11 late frames, dropping frame" );
++            return NULL;
++        }
++    }
++    if( !b_need_output_picture )
++    {
++        p_context->skip_frame = __MAX( p_context->skip_frame,
++                                              AVDISCARD_NONREF );
++    }
++
++    /*
++     * Do the actual decoding now */
++
++    /* Don't forget that libavcodec requires a little more bytes
++     * that the real frame size */
++    if( p_block && p_block->i_buffer > 0 )
++    {
++        eos_spotted = ( p_block->i_flags & BLOCK_FLAG_END_OF_SEQUENCE ) != 0;
++
++        p_block = block_Realloc( p_block, 0,
++                            p_block->i_buffer + FF_INPUT_BUFFER_PADDING_SIZE );
++        if( !p_block )
++            return NULL;
++        p_block->i_buffer -= FF_INPUT_BUFFER_PADDING_SIZE;
++        *pp_block = p_block;
++        memset( p_block->p_buffer + p_block->i_buffer, 0,
++                FF_INPUT_BUFFER_PADDING_SIZE );
++    }
++
++    while( !p_block || p_block->i_buffer > 0 || eos_spotted )
++    {
++        int i_used;
++        AVPacket pkt;
++
++        post_mt( p_sys );
++
++        av_init_packet( &pkt );
++        if( p_block && p_block->i_buffer > 0 )
++        {
++            pkt.data = p_block->p_buffer;
++            pkt.size = p_block->i_buffer;
++            pkt.pts = p_block->i_pts > VLC_TS_INVALID ? p_block->i_pts : AV_NOPTS_VALUE;
++            pkt.dts = p_block->i_dts > VLC_TS_INVALID ? p_block->i_dts : AV_NOPTS_VALUE;
++        }
++        else
++        {
++            /* Return delayed frames if codec has CODEC_CAP_DELAY */
++            pkt.data = NULL;
++            pkt.size = 0;
++        }
++
++        if( !p_sys->palette_sent )
++        {
++            uint8_t *pal = av_packet_new_side_data(&pkt, AV_PKT_DATA_PALETTE, AVPALETTE_SIZE);
++            if (pal) {
++                memcpy(pal, p_dec->fmt_in.video.p_palette->palette, AVPALETTE_SIZE);
++                p_sys->palette_sent = true;
++            }
++        }
++
++        /* Make sure we don't reuse the same timestamps twice */
++        if( p_block )
++        {
++            p_block->i_pts =
++            p_block->i_dts = VLC_TS_INVALID;
++        }
++
++        int ret = avcodec_send_packet(p_context, &pkt);
++        if( ret != 0 && ret != AVERROR(EAGAIN) )
++        {
++            if (ret == AVERROR(ENOMEM) || ret == AVERROR(EINVAL))
++            {
++                msg_Err(p_dec, "avcodec_send_packet critical error");
++                *error = true;
++            }
++            av_packet_unref( &pkt );
++            break;
++        }
++        i_used = ret != AVERROR(EAGAIN) ? pkt.size : 0;
++        av_packet_unref( &pkt );
++
++        frame = av_frame_alloc();
++        if (unlikely(frame == NULL))
++        {
++            *error = true;
++            break;
++        }
++
++        ret = avcodec_receive_frame(p_context, frame);
++        if( ret != 0 && ret != AVERROR(EAGAIN) )
++        {
++            msg_Dbg(p_dec, "No receive");
++            if (ret == AVERROR(ENOMEM) || ret == AVERROR(EINVAL))
++            {
++                msg_Err(p_dec, "avcodec_receive_frame critical error");
++                *error = true;
++            }
++            av_frame_free(&frame);
++            /* After draining, we need to reset decoder with a flush */
++            if( ret == AVERROR_EOF )
++                avcodec_flush_buffers( p_sys->p_context );
++            break;
++        }
++        bool not_received_frame = ret;
++
++        wait_mt( p_sys );
++
++        if( eos_spotted )
++            p_sys->b_first_frame = true;
++
++        if( p_block )
++        {
++            if( p_block->i_buffer <= 0 )
++                eos_spotted = false;
++
++            /* Consumed bytes */
++            p_block->p_buffer += i_used;
++            p_block->i_buffer -= i_used;
++        }
++
++        /* Nothing to display */
++        if( not_received_frame )
++        {
++//            msg_Dbg(p_dec, "No rx: used=%d", i_used);
++            av_frame_free(&frame);
++            if( i_used == 0 ) break;
++            continue;
++        }
++
++        /* Compute the PTS */
++#ifdef FF_API_PKT_PTS
++        mtime_t i_pts = frame->pts;
++#else
++        mtime_t i_pts = frame->pkt_pts;
++#endif
++        if (i_pts == AV_NOPTS_VALUE )
++            i_pts = frame->pkt_dts;
++
++        if( i_pts == AV_NOPTS_VALUE )
++            i_pts = date_Get( &p_sys->pts );
++
++        /* Interpolate the next PTS */
++        if( i_pts > VLC_TS_INVALID )
++            date_Set( &p_sys->pts, i_pts );
++
++        const mtime_t i_next_pts = interpolate_next_pts(p_dec, frame);
++
++        update_late_frame_count( p_dec, p_block, current_time, i_pts, i_next_pts);
++
++        if( !b_need_output_picture ||
++//            ( !p_sys->p_va && !frame->linesize[0] ) ||
++           ( !frame->linesize[0] ) ||
++           ( p_dec->b_frame_drop_allowed && (frame->flags & AV_FRAME_FLAG_CORRUPT) &&
++             !p_sys->b_show_corrupted ) )
++        {
++            av_frame_free(&frame);
++//            msg_Dbg(p_dec, "Bad frame");
++            continue;
++        }
++
++        if( p_context->pix_fmt == AV_PIX_FMT_PAL8
++         && !p_dec->fmt_out.video.p_palette )
++        {
++            /* See AV_PIX_FMT_PAL8 comment in avc_GetVideoFormat(): update the
++             * fmt_out palette and change the fmt_out chroma to request a new
++             * vout */
++            assert( p_dec->fmt_out.video.i_chroma != VLC_CODEC_RGBP );
++
++            video_palette_t *p_palette;
++            p_palette = p_dec->fmt_out.video.p_palette
++                      = malloc( sizeof(video_palette_t) );
++            if( !p_palette )
++            {
++                *error = true;
++                av_frame_free(&frame);
++                break;
++            }
++            static_assert( sizeof(p_palette->palette) == AVPALETTE_SIZE,
++                           "Palette size mismatch between vlc and libavutil" );
++            assert( frame->data[1] != NULL );
++            memcpy( p_palette->palette, frame->data[1], AVPALETTE_SIZE );
++            p_palette->i_entries = AVPALETTE_COUNT;
++            p_dec->fmt_out.video.i_chroma = VLC_CODEC_RGBP;
++            if( decoder_UpdateVideoFormat( p_dec ) )
++            {
++                av_frame_free(&frame);
++                continue;
++            }
++        }
++
++#if 1
++        {
++            cma_buf_t * const cb = av_rpi_zc_buf_v(frame->buf[0]);
++
++            if (cb == NULL)
++            {
++                msg_Err(p_dec, "Frame has no attached CMA buffer");
++                goto fail;
++            }
++
++            if (lavc_UpdateVideoFormat(p_dec, p_context, p_context->pix_fmt,
++                                       p_context->pix_fmt) != 0)
++            {
++                msg_Err(p_dec, "Failed to update format");
++                goto fail;
++            }
++
++            if ((p_pic = decoder_NewPicture(p_dec)) == NULL)
++            {
++                msg_Err(p_dec, "Failed to allocate pic");
++                goto fail;
++            }
++
++            if (cma_buf_pic_attach(cma_buf_ref(cb), p_pic) != 0)
++            {
++                cma_buf_unref(cb);  // Undo the in_flight
++                char dbuf0[5];
++                msg_Err(p_dec, "Failed to attach bufs to pic: fmt=%s", str_fourcc(dbuf0, p_pic->format.i_chroma));
++                goto fail;
++            }
++
++            // ****** Set planes etc.
++            set_pic_from_frame(p_pic, frame);
++        }
++#else
++        picture_t *p_pic = frame->opaque;
++        if( p_pic == NULL )
++        {   /* When direct rendering is not used, get_format() and get_buffer()
++             * might not be called. The output video format must be set here
++             * then picture buffer can be allocated. */
++            if (p_sys->p_va == NULL
++             && lavc_UpdateVideoFormat(p_dec, p_context, p_context->pix_fmt,
++                                       p_context->pix_fmt) == 0)
++                p_pic = decoder_NewPicture(p_dec);
++
++            if( !p_pic )
++            {
++                av_frame_free(&frame);
++                break;
++            }
++
++            /* Fill picture_t from AVFrame */
++            if( lavc_CopyPicture( p_dec, p_pic, frame ) != VLC_SUCCESS )
++            {
++                av_frame_free(&frame);
++                picture_Release( p_pic );
++                break;
++            }
++        }
++        else
++        {
++            /* Some codecs can return the same frame multiple times. By the
++             * time that the same frame is returned a second time, it will be
++             * too late to clone the underlying picture. So clone proactively.
++             * A single picture CANNOT be queued multiple times.
++             */
++            p_pic = picture_Clone( p_pic );
++            if( unlikely(p_pic == NULL) )
++            {
++                av_frame_free(&frame);
++                break;
++            }
++        }
++#endif
++
++        if( !p_dec->fmt_in.video.i_sar_num || !p_dec->fmt_in.video.i_sar_den )
++        {
++            /* Fetch again the aspect ratio in case it changed */
++            p_dec->fmt_out.video.i_sar_num
++                = p_context->sample_aspect_ratio.num;
++            p_dec->fmt_out.video.i_sar_den
++                = p_context->sample_aspect_ratio.den;
++
++            if( !p_dec->fmt_out.video.i_sar_num || !p_dec->fmt_out.video.i_sar_den )
++            {
++                p_dec->fmt_out.video.i_sar_num = 1;
++                p_dec->fmt_out.video.i_sar_den = 1;
++            }
++        }
++
++        p_pic->date = i_pts;
++        /* Hack to force display of still pictures */
++        p_pic->b_force = p_sys->b_first_frame;
++        p_pic->i_nb_fields = 2 + frame->repeat_pict;
++        p_pic->b_progressive = !frame->interlaced_frame;
++        p_pic->b_top_field_first = frame->top_field_first;
++
++        if (DecodeSidedata(p_dec, frame, p_pic))
++            i_pts = VLC_TS_INVALID;
++
++        av_frame_free(&frame);
++
++        /* Send decoded frame to vout */
++        if (i_pts > VLC_TS_INVALID)
++        {
++            p_sys->b_first_frame = false;
++#if TRACE_ALL
++            msg_Dbg(p_dec, ">>> %s: Got pic", __func__);
++#endif
++            return p_pic;
++        }
++        else
++            picture_Release( p_pic );
++    }
++
++    if( p_block )
++        block_Release( p_block );
++
++#if TRACE_ALL
++     msg_Dbg(p_dec, ">>> %s: NULL", __func__);
++#endif
++    return NULL;
++
++fail:
++#if TRACE_ALL
++     msg_Dbg(p_dec, ">>> %s: FAIL", __func__);
++#endif
++    av_frame_free(&frame);
++    if (p_pic != NULL)
++        picture_Release(p_pic);
++    if (p_block != NULL)
++        block_Release(p_block);
++    *error = true;
++    return NULL;
++}
++
++static int DecodeVideo( decoder_t *p_dec, block_t *p_block )
++{
++    block_t **pp_block = p_block ? &p_block : NULL;
++    picture_t *p_pic;
++    bool error = false;
++    while( ( p_pic = DecodeBlock( p_dec, pp_block, &error ) ) != NULL )
++        decoder_QueueVideo( p_dec, p_pic );
++    return VLCDEC_SUCCESS;
++// Easiest to just ignore all errors - returning a real error seems to
++// kill output forever
++//    return error ? VLCDEC_ECRITICAL : VLCDEC_SUCCESS;
++}
++
++/*****************************************************************************
++ * EndVideo: decoder destruction
++ *****************************************************************************
++ * This function is called when the thread ends after a successful
++ * initialization.
++ *****************************************************************************/
++static void MmalAvcodecCloseDecoder( vlc_object_t *obj )
++{
++    decoder_t *p_dec = (decoder_t *)obj;
++    decoder_sys_t *p_sys = p_dec->p_sys;
++    AVCodecContext *ctx = p_sys->p_context;
++//    void *hwaccel_context;
++
++    msg_Dbg(obj, "<<< %s", __func__);
++
++    post_mt( p_sys );
++
++    cma_buf_pool_cancel(p_sys->cma_pool);  // Abort any pending frame allocs
++
++    /* do not flush buffers if codec hasn't been opened (theora/vorbis/VC1) */
++    if( avcodec_is_open( ctx ) )
++        avcodec_flush_buffers( ctx );
++
++    av_rpi_zc_uninit2(ctx);
++
++    wait_mt( p_sys );
++
++    cc_Flush( &p_sys->cc );
++
++//    hwaccel_context = ctx->hwaccel_context;
++    avcodec_free_context( &ctx );
++
++//    if( p_sys->p_va )
++//        vlc_va_Delete( p_sys->p_va, &hwaccel_context );
++
++    cma_vcsm_exit(p_sys->vcsm_init_type);
++
++    vlc_sem_destroy( &p_sys->sem_mt );
++    free( p_sys );
++}
++
++/*****************************************************************************
++ * ffmpeg_InitCodec: setup codec extra initialization data for ffmpeg
++ *****************************************************************************/
++static void ffmpeg_InitCodec( decoder_t *p_dec )
++{
++    decoder_sys_t *p_sys = p_dec->p_sys;
++    size_t i_size = p_dec->fmt_in.i_extra;
++
++    if( !i_size ) return;
++
++    if( p_sys->p_codec->id == AV_CODEC_ID_SVQ3 )
++    {
++        uint8_t *p;
++
++        p_sys->p_context->extradata_size = i_size + 12;
++        p = p_sys->p_context->extradata =
++            av_malloc( p_sys->p_context->extradata_size +
++                       FF_INPUT_BUFFER_PADDING_SIZE );
++        if( !p )
++            return;
++
++        memcpy( &p[0],  "SVQ3", 4 );
++        memset( &p[4], 0, 8 );
++        memcpy( &p[12], p_dec->fmt_in.p_extra, i_size );
++
++        /* Now remove all atoms before the SMI one */
++        if( p_sys->p_context->extradata_size > 0x5a &&
++            strncmp( (char*)&p[0x56], "SMI ", 4 ) )
++        {
++            uint8_t *psz = &p[0x52];
++
++            while( psz < &p[p_sys->p_context->extradata_size - 8] )
++            {
++                uint_fast32_t atom_size = GetDWBE( psz );
++                if( atom_size <= 1 )
++                {
++                    /* FIXME handle 1 as long size */
++                    break;
++                }
++                if( !strncmp( (char*)&psz[4], "SMI ", 4 ) )
++                {
++                    memmove( &p[0x52], psz,
++                             &p[p_sys->p_context->extradata_size] - psz );
++                    break;
++                }
++
++                psz += atom_size;
++            }
++        }
++    }
++    else
++    {
++        p_sys->p_context->extradata_size = i_size;
++        p_sys->p_context->extradata =
++            av_malloc( i_size + FF_INPUT_BUFFER_PADDING_SIZE );
++        if( p_sys->p_context->extradata )
++        {
++            memcpy( p_sys->p_context->extradata,
++                    p_dec->fmt_in.p_extra, i_size );
++            memset( p_sys->p_context->extradata + i_size,
++                    0, FF_INPUT_BUFFER_PADDING_SIZE );
++        }
++    }
++}
++
++
++vlc_module_begin()
++    set_category( CAT_INPUT )
++    set_subcategory( SUBCAT_INPUT_VCODEC )
++    set_shortname(N_("MMAL avcodec"))
++    set_description(N_("MMAL buffered avcodec "))
++    set_capability("video decoder", 80)
++    add_shortcut("mmal_avcodec")
++    add_integer(MMAL_AVCODEC_BUFFERS, -1, MMAL_AVCODEC_BUFFERS_TEXT,
++                    MMAL_AVCODEC_BUFFERS_LONGTEXT, true)
++    set_callbacks(MmalAvcodecOpenDecoder, MmalAvcodecCloseDecoder)
++vlc_module_end()
++
+--- /dev/null
++++ b/modules/hw/mmal/mmal_cma.c
+@@ -0,0 +1,668 @@
++#ifdef HAVE_CONFIG_H
++# include "config.h"
++#endif
++
++#include <stdatomic.h>
++#include <unistd.h>
++#include <fcntl.h>
++#include <sys/ioctl.h>
++#include <sys/mman.h>
++
++#include <interface/vcsm/user-vcsm.h>
++
++#include <vlc_common.h>
++#include <vlc_picture.h>
++
++#include "mmal_cma.h"
++#include "mmal_picture.h"
++
++#include <assert.h>
++
++#define TRACE_ALL 0
++
++//-----------------------------------------------------------------------------
++//
++// Generic pool functions
++// Knows nothing about pool entries
++
++typedef void * cma_pool_alloc_fn(void * v, size_t size);
++typedef void cma_pool_free_fn(void * v, void * el, size_t size);
++
++#if TRACE_ALL
++static atomic_int pool_seq;
++#endif
++
++// Pool structure
++// Ref count is held by pool owner and pool els that have been got
++// Els in the pool do not count towards its ref count
++struct cma_pool_fixed_s
++{
++    atomic_int ref_count;
++
++    vlc_mutex_t lock;
++    unsigned int n_in;
++    unsigned int n_out;
++    unsigned int pool_size;
++    int flight_size;
++    size_t el_size;
++    void ** pool;
++
++    bool cancel;
++    int in_flight;
++    vlc_cond_t flight_cond;
++
++    void * alloc_v;
++    cma_pool_alloc_fn * el_alloc_fn;
++    cma_pool_free_fn * el_free_fn;
++    cma_pool_on_delete_fn * on_delete_fn;
++
++    const char * name;
++#if TRACE_ALL
++    int seq;
++#endif
++};
++
++static inline unsigned int inc_mod(const unsigned int n, const unsigned int m)
++{
++    return n + 1 >= m ? 0 : n + 1;
++}
++
++static void free_pool(const cma_pool_fixed_t * const p, void ** const pool,
++                      const unsigned int pool_size, const size_t el_size)
++{
++    if (pool == NULL)
++        return;
++
++    for (unsigned int n = 0; n != pool_size; ++n)
++        if (pool[n] != NULL)
++            p->el_free_fn(p->alloc_v, pool[n], el_size);
++    free(pool);
++}
++
++// Just kill this - no checks
++static void cma_pool_fixed_delete(cma_pool_fixed_t * const p)
++{
++    cma_pool_on_delete_fn *const on_delete_fn = p->on_delete_fn;
++    void *const v = p->alloc_v;
++
++    free_pool(p, p->pool, p->pool_size, p->el_size);
++
++    if (p->name != NULL)
++        free((void *)p->name);  // Discard const
++
++    vlc_cond_destroy(&p->flight_cond);
++    vlc_mutex_destroy(&p->lock);
++    free(p);
++
++    // Inform our container that we are dead (if it cares)
++    if (on_delete_fn)
++        on_delete_fn(v);
++}
++
++static void cma_pool_fixed_unref(cma_pool_fixed_t * const p)
++{
++    if (atomic_fetch_sub(&p->ref_count, 1) <= 1)
++        cma_pool_fixed_delete(p);
++}
++
++static void cma_pool_fixed_ref(cma_pool_fixed_t * const p)
++{
++    atomic_fetch_add(&p->ref_count, 1);
++}
++
++static void cma_pool_fixed_inc_in_flight(cma_pool_fixed_t * const p)
++{
++    vlc_mutex_lock(&p->lock);
++    ++p->in_flight;
++    vlc_mutex_unlock(&p->lock);
++}
++
++static void cma_pool_fixed_dec_in_flight(cma_pool_fixed_t * const p)
++{
++    vlc_mutex_lock(&p->lock);
++    if (--p->in_flight == 0)
++        vlc_cond_signal(&p->flight_cond);
++    vlc_mutex_unlock(&p->lock);
++}
++
++static void * cma_pool_fixed_get(cma_pool_fixed_t * const p, const size_t req_el_size, const bool inc_flight, const bool no_pool)
++{
++    void * v = NULL;
++
++    vlc_mutex_lock(&p->lock);
++
++    for (;;)
++    {
++        if (req_el_size != p->el_size)
++        {
++            void ** const deadpool = p->pool;
++            const size_t dead_size = p->el_size;
++            const unsigned int dead_n = p->pool_size;
++
++            p->pool = NULL;
++            p->n_in = 0;
++            p->n_out = 0;
++            p->el_size = req_el_size;
++
++            if (deadpool != NULL)
++            {
++                vlc_mutex_unlock(&p->lock);
++                // Do the free old op outside the mutex in case the free is slow
++                free_pool(p, deadpool, dead_n, dead_size);
++                vlc_mutex_lock(&p->lock);
++                continue;
++            }
++        }
++
++        // Late abort if flush or cancel so we can still kill the pool
++        if (req_el_size == 0 || p->cancel)
++        {
++            vlc_mutex_unlock(&p->lock);
++            return NULL;
++        }
++
++        if (p->pool != NULL && !no_pool)
++        {
++            v = p->pool[p->n_in];
++            if (v != NULL)
++            {
++                p->pool[p->n_in] = NULL;
++                p->n_in = inc_mod(p->n_in, p->pool_size);
++                break;
++            }
++        }
++
++        if (p->in_flight <= 0)
++            break;
++
++        vlc_cond_wait(&p->flight_cond, &p->lock);
++    }
++
++    if (inc_flight)
++        ++p->in_flight;
++
++    vlc_mutex_unlock(&p->lock);
++
++    if (v == NULL && req_el_size != 0)
++        v = p->el_alloc_fn(p->alloc_v, req_el_size);
++
++    // Tag ref
++    if (v != NULL)
++        cma_pool_fixed_ref(p);
++    // Remove flight if we set it and error
++    else if (inc_flight)
++        cma_pool_fixed_dec_in_flight(p);
++
++    return v;
++}
++
++static void cma_pool_fixed_put(cma_pool_fixed_t * const p, void * v, const size_t el_size, const bool was_in_flight)
++{
++    vlc_mutex_lock(&p->lock);
++
++    if (el_size == p->el_size && (p->pool == NULL || p->pool[p->n_out] == NULL))
++    {
++        if (p->pool == NULL)
++            p->pool = calloc(p->pool_size, sizeof(void*));
++
++        p->pool[p->n_out] = v;
++        p->n_out = inc_mod(p->n_out, p->pool_size);
++        v = NULL;
++    }
++
++    if (was_in_flight)
++        --p->in_flight;
++
++    vlc_mutex_unlock(&p->lock);
++
++    vlc_cond_signal(&p->flight_cond);
++
++    if (v != NULL)
++        p->el_free_fn(p->alloc_v, v, el_size);
++
++    cma_pool_fixed_unref(p);
++}
++
++static int cma_pool_fixed_resize(cma_pool_fixed_t * const p,
++                           const unsigned int new_pool_size, const int new_flight_size)
++{
++    void ** dead_pool = NULL;
++    size_t dead_size = 0;
++    unsigned int dead_n = 0;
++
++    // This makes this non-reentrant but saves us a lot of time in the normal
++    // "nothing happens" case
++    if (p->pool_size == new_pool_size && p->flight_size == new_flight_size)
++        return 0;
++
++    vlc_mutex_lock(&p->lock);
++
++    if (p->pool != NULL && new_pool_size != p->pool_size)
++    {
++        void ** const new_pool = calloc(new_pool_size, sizeof(void*));
++        unsigned int d, s;
++        dead_pool = p->pool;
++        dead_size = p->el_size;
++        dead_n = p->pool_size;
++
++        if (new_pool == NULL)
++        {
++            vlc_mutex_unlock(&p->lock);
++            return -1;
++        }
++
++        for (d = 0, s = p->n_in; d != new_pool_size && (new_pool[d] = dead_pool[s]) != NULL; ++d, s = inc_mod(s, dead_n))
++            dead_pool[s] = NULL;
++
++        p->n_out = 0;
++        p->n_in = (d != new_pool_size) ? d : 0;
++        p->pool = new_pool;
++    }
++
++    p->pool_size = new_pool_size;
++    if (new_flight_size > p->flight_size)
++        vlc_cond_broadcast(&p->flight_cond);  // Lock still active so nothing happens till we release it
++    p->in_flight += p->flight_size - new_flight_size;
++    p->flight_size = new_flight_size;
++
++    vlc_mutex_unlock(&p->lock);
++
++    free_pool(p, dead_pool, dead_n, dead_size);
++    return 0;
++}
++
++static int cma_pool_fixed_fill(cma_pool_fixed_t * const p, const size_t el_size)
++{
++    for (;;)
++    {
++        vlc_mutex_lock(&p->lock);
++        bool done = el_size == p->el_size && p->pool != NULL && p->pool[p->n_out] != NULL;
++        vlc_mutex_unlock(&p->lock);
++        if (done)
++            break;
++        void * buf = cma_pool_fixed_get(p, el_size, false, true);
++        if (buf == NULL)
++            return -ENOMEM;
++        cma_pool_fixed_put(p, buf, el_size, false);
++    }
++    return 0;
++}
++
++static void cma_pool_fixed_cancel(cma_pool_fixed_t * const p)
++{
++    vlc_mutex_lock(&p->lock);
++    p->cancel = true;
++    vlc_cond_broadcast(&p->flight_cond);
++    vlc_mutex_unlock(&p->lock);
++}
++
++static void cma_pool_fixed_uncancel(cma_pool_fixed_t * const p)
++{
++    vlc_mutex_lock(&p->lock);
++    p->cancel = false;
++    vlc_mutex_unlock(&p->lock);
++}
++
++
++// Purge pool & unref
++static void cma_pool_fixed_kill(cma_pool_fixed_t * const p)
++{
++    if (p == NULL)
++        return;
++
++    // This flush is not strictly needed but it reclaims what memory we can reclaim asap
++    cma_pool_fixed_get(p, 0, false, false);
++    cma_pool_fixed_unref(p);
++}
++
++// Create a new pool
++static cma_pool_fixed_t*
++cma_pool_fixed_new(const unsigned int pool_size,
++                   const int flight_size,
++                   void * const alloc_v,
++                   cma_pool_alloc_fn * const alloc_fn, cma_pool_free_fn * const free_fn,
++                   cma_pool_on_delete_fn * const on_delete_fn,
++                   const char * const name)
++{
++    cma_pool_fixed_t* const p = calloc(1, sizeof(cma_pool_fixed_t));
++    if (p == NULL)
++        return NULL;
++
++    atomic_store(&p->ref_count, 1);
++    vlc_mutex_init(&p->lock);
++    vlc_cond_init(&p->flight_cond);
++
++    p->pool_size = pool_size;
++    p->flight_size = flight_size;
++    p->in_flight = -flight_size;
++
++    p->alloc_v = alloc_v;
++    p->el_alloc_fn = alloc_fn;
++    p->el_free_fn = free_fn;
++    p->on_delete_fn = on_delete_fn;
++    p->name = name == NULL ? NULL : strdup(name);
++#if TRACE_ALL
++    p->seq = atomic_fetch_add(&pool_seq, 1);
++#endif
++
++    return p;
++}
++
++// ---------------------------------------------------------------------------
++//
++// CMA buffer functions - uses cma_pool_fixed for pooling
++
++struct cma_buf_pool_s {
++    cma_pool_fixed_t * pool;
++    vcsm_init_type_t init_type;
++
++    bool all_in_flight;
++#if TRACE_ALL
++    size_t alloc_n;
++    size_t alloc_size;
++#endif
++};
++
++typedef struct cma_buf_s {
++    atomic_int ref_count;
++    cma_buf_pool_t * cbp;
++    bool in_flight;
++    size_t size;
++    unsigned int vcsm_h;   // VCSM handle from initial alloc
++    unsigned int vc_h;     // VC handle for ZC mmal buffers
++    unsigned int vc_addr;  // VC addr - unused by us but wanted by FFmpeg
++    int fd;                // dmabuf handle for GL
++    void * mmap;           // ARM mapped address
++    picture_context_t *ctx2;
++} cma_buf_t;
++
++static void cma_pool_delete(cma_buf_t * const cb)
++{
++    assert(atomic_load(&cb->ref_count) == 0);
++#if TRACE_ALL
++    cb->cbp->alloc_size -= cb->size;
++    --cb->cbp->alloc_n;
++    fprintf(stderr, "%s[%d:%s]: N=%d, Total=%d\n", __func__, cb->cbp->pool->seq, cb->cbp->pool->name, cb->cbp->alloc_n, cb->cbp->alloc_size);
++#endif
++
++    if (cb->ctx2 != NULL)
++        cb->ctx2->destroy(cb->ctx2);
++
++    if (cb->mmap != MAP_FAILED)
++    {
++        if (cb->cbp->init_type == VCSM_INIT_CMA)
++            munmap(cb->mmap, cb->size);
++        else
++            vcsm_unlock_hdl(cb->vcsm_h);
++    }
++    if (cb->fd != -1)
++        close(cb->fd);
++    if (cb->vcsm_h != 0)
++        vcsm_free(cb->vcsm_h);
++    free(cb);
++}
++
++static void cma_pool_free_cb(void * v, void * el, size_t size)
++{
++    VLC_UNUSED(v);
++    VLC_UNUSED(size);
++
++    cma_pool_delete(el);
++}
++
++static void * cma_pool_alloc_cb(void * v, size_t size)
++{
++    cma_buf_pool_t * const cbp = v;
++
++    cma_buf_t * const cb = malloc(sizeof(cma_buf_t));
++    if (cb == NULL)
++        return NULL;
++
++    *cb = (cma_buf_t){
++        .ref_count = ATOMIC_VAR_INIT(0),
++        .cbp = cbp,
++        .in_flight = 0,
++        .size = size,
++        .vcsm_h = 0,
++        .vc_h = 0,
++        .fd = -1,
++        .mmap = MAP_FAILED,
++        .ctx2 = NULL
++    };
++#if TRACE_ALL
++    cb->cbp->alloc_size += cb->size;
++    ++cb->cbp->alloc_n;
++    fprintf(stderr, "%s[%d:%s]: N=%d, Total=%d\n", __func__, cbp->pool->seq, cbp->pool->name, cbp->alloc_n, cbp->alloc_size);
++#endif
++
++    // 0x80 is magic value to force full ARM-side mapping - otherwise
++    // cache requests can cause kernel crashes
++    if ((cb->vcsm_h = vcsm_malloc_cache(size, VCSM_CACHE_TYPE_HOST | 0x80, "VLC frame")) == 0)
++    {
++#if TRACE_ALL
++        fprintf(stderr, "vcsm_malloc_cache fail\n");
++#endif
++        goto fail;
++    }
++
++    if ((cb->vc_h = vcsm_vc_hdl_from_hdl(cb->vcsm_h)) == 0)
++    {
++#if TRACE_ALL
++        fprintf(stderr, "vcsm_vc_hdl_from_hdl fail\n");
++#endif
++        goto fail;
++    }
++
++    if (cbp->init_type == VCSM_INIT_CMA)
++    {
++        if ((cb->fd = vcsm_export_dmabuf(cb->vcsm_h)) == -1)
++        {
++#if TRACE_ALL
++            fprintf(stderr, "vcsm_export_dmabuf fail\n");
++#endif
++            goto fail;
++        }
++
++        if ((cb->mmap = mmap(NULL, cb->size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, cb->fd, 0)) == MAP_FAILED)
++            goto fail;
++    }
++    else
++    {
++        void * arm_addr;
++        if ((arm_addr = vcsm_lock(cb->vcsm_h)) == NULL)
++        {
++#if TRACE_ALL
++            fprintf(stderr, "vcsm_lock fail\n");
++#endif
++            goto fail;
++        }
++        cb->mmap = arm_addr;
++    }
++
++    cb->vc_addr = vcsm_vc_addr_from_hdl(cb->vcsm_h);
++
++    return cb;
++
++fail:
++    cma_pool_delete(cb);
++    return NULL;
++}
++
++// Pool has died - safe now to exit vcsm
++static void cma_buf_pool_on_delete_cb(void * v)
++{
++    cma_buf_pool_t * const cbp = v;
++
++    cma_vcsm_exit(cbp->init_type);
++    free(cbp);
++}
++
++void cma_buf_pool_cancel(cma_buf_pool_t * const cbp)
++{
++    if (cbp == NULL || cbp->pool == NULL)
++        return;
++
++    cma_pool_fixed_cancel(cbp->pool);
++}
++
++void cma_buf_pool_uncancel(cma_buf_pool_t * const cbp)
++{
++    if (cbp == NULL || cbp->pool == NULL)
++        return;
++
++    cma_pool_fixed_uncancel(cbp->pool);
++}
++
++// User finished with pool
++void cma_buf_pool_delete(cma_buf_pool_t * const cbp)
++{
++    if (cbp == NULL)
++        return;
++
++    if (cbp->pool != NULL)
++    {
++        // We will call cma_buf_pool_on_delete_cb when the pool finally dies
++        // (might be now) which will free up our env.
++        cma_pool_fixed_kill(cbp->pool);
++    }
++    else
++    {
++        // Had no pool for some reason (error) but must still finish cleanup
++        cma_buf_pool_on_delete_cb(cbp);
++    }
++}
++
++int cma_buf_pool_fill(cma_buf_pool_t * const cbp, const size_t el_size)
++{
++    return cma_pool_fixed_fill(cbp->pool, el_size);
++}
++
++int cma_buf_pool_resize(cma_buf_pool_t * const cbp,
++                        const unsigned int new_pool_size, const int new_flight_size)
++{
++    return cma_pool_fixed_resize(cbp->pool, new_pool_size, new_flight_size);
++}
++
++cma_buf_pool_t * cma_buf_pool_new(const unsigned int pool_size, const unsigned int flight_size, const bool all_in_flight, const char * const name)
++{
++    vcsm_init_type_t const init_type = cma_vcsm_init();
++    if (init_type == VCSM_INIT_NONE)
++        return NULL;
++
++    cma_buf_pool_t * const cbp = calloc(1, sizeof(cma_buf_pool_t));
++    if (cbp == NULL)
++        return NULL;
++
++    cbp->init_type = init_type;
++    cbp->all_in_flight = all_in_flight;
++
++    if ((cbp->pool = cma_pool_fixed_new(pool_size, flight_size, cbp, cma_pool_alloc_cb, cma_pool_free_cb, cma_buf_pool_on_delete_cb, name)) == NULL)
++        goto fail;
++    return cbp;
++
++fail:
++    cma_buf_pool_delete(cbp);
++    return NULL;
++}
++
++
++void cma_buf_in_flight(cma_buf_t * const cb)
++{
++    if (!cb->cbp->all_in_flight)
++    {
++        assert(!cb->in_flight);
++        cb->in_flight = true;
++        cma_pool_fixed_inc_in_flight(cb->cbp->pool);
++    }
++}
++
++void cma_buf_end_flight(cma_buf_t * const cb)
++{
++    if (cb != NULL && !cb->cbp->all_in_flight && cb->in_flight)
++    {
++        cb->in_flight = false;
++        cma_pool_fixed_dec_in_flight(cb->cbp->pool);
++    }
++}
++
++
++// Return vcsm handle
++unsigned int cma_buf_vcsm_handle(const cma_buf_t * const cb)
++{
++    return cb->vcsm_h;
++}
++
++size_t cma_buf_size(const cma_buf_t * const cb)
++{
++    return cb->size;
++}
++
++int cma_buf_add_context2(cma_buf_t *const cb, picture_context_t * const ctx2)
++{
++    if (cb->ctx2 != NULL)
++        return VLC_EGENERIC;
++
++    cb->ctx2 = ctx2;
++    return VLC_SUCCESS;
++}
++
++unsigned int cma_buf_vc_handle(const cma_buf_t *const cb)
++{
++    return cb->vc_h;
++}
++
++int cma_buf_fd(const cma_buf_t *const cb)
++{
++    return cb->fd;
++}
++
++void * cma_buf_addr(const cma_buf_t *const cb)
++{
++    return cb->mmap;
++}
++
++unsigned int cma_buf_vc_addr(const cma_buf_t *const cb)
++{
++    return cb->vc_addr;
++}
++
++
++picture_context_t * cma_buf_context2(const cma_buf_t *const cb)
++{
++    return cb->ctx2;
++}
++
++
++void cma_buf_unref(cma_buf_t * const cb)
++{
++    if (cb == NULL)
++        return;
++    if (atomic_fetch_sub(&cb->ref_count, 1) <= 1)
++    {
++        const bool was_in_flight = cb->in_flight;
++        cb->in_flight = false;
++        cma_pool_fixed_put(cb->cbp->pool, cb, cb->size, was_in_flight);
++    }
++}
++
++cma_buf_t * cma_buf_ref(cma_buf_t * const cb)
++{
++    if (cb == NULL)
++        return NULL;
++    atomic_fetch_add(&cb->ref_count, 1);
++    return cb;
++}
++
++cma_buf_t * cma_buf_pool_alloc_buf(cma_buf_pool_t * const cbp, const size_t size)
++{
++    cma_buf_t *const cb = cma_pool_fixed_get(cbp->pool, size, cbp->all_in_flight, false);
++
++    if (cb == NULL)
++        return NULL;
++
++    cb->in_flight = cbp->all_in_flight;
++    // When 1st allocated or retrieved from the pool the block will have a
++    // ref count of 0 so ref here
++    return cma_buf_ref(cb);
++}
++
+--- /dev/null
++++ b/modules/hw/mmal/mmal_cma.h
+@@ -0,0 +1,71 @@
++#ifndef VLC_MMAL_MMAL_CMA_H_
++#define VLC_MMAL_MMAL_CMA_H_
++
++
++struct cma_pool_fixed_s;
++typedef struct cma_pool_fixed_s cma_pool_fixed_t;
++
++typedef void * cma_pool_alloc_fn(void * v, size_t size);
++typedef void cma_pool_free_fn(void * v, void * el, size_t size);
++typedef void cma_pool_on_delete_fn(void * v);
++
++#if 0
++void cma_pool_fixed_unref(cma_pool_fixed_t * const p);
++void cma_pool_fixed_ref(cma_pool_fixed_t * const p);
++void * cma_pool_fixed_get(cma_pool_fixed_t * const p, const size_t req_el_size, const bool in_flight);
++void cma_pool_fixed_put(cma_pool_fixed_t * const p, void * v, const size_t el_size, const bool was_in_flight);
++void cma_pool_fixed_inc_in_flight(cma_pool_fixed_t * const p);
++void cma_pool_fixed_dec_in_flight(cma_pool_fixed_t * const p);
++void cma_pool_fixed_cancel(cma_pool_fixed_t * const p);
++void cma_pool_fixed_uncancel(cma_pool_fixed_t * const p);
++void cma_pool_fixed_kill(cma_pool_fixed_t * const p);
++int cma_pool_fixed_resize(cma_pool_fixed_t * const p,
++                          const unsigned int new_pool_size, const int new_flight_size);
++cma_pool_fixed_t * cma_pool_fixed_new(const unsigned int pool_size,
++                   const int flight_size,
++                   void * const alloc_v,
++                   cma_pool_alloc_fn * const alloc_fn, cma_pool_free_fn * const free_fn,
++                   cma_pool_on_delete_fn * const on_delete_fn,
++                   const char * const name);
++#endif
++
++struct cma_buf_s;
++typedef struct cma_buf_s cma_buf_t;
++
++void cma_buf_in_flight(cma_buf_t * const cb);
++void cma_buf_end_flight(cma_buf_t * const cb);
++unsigned int cma_buf_vcsm_handle(const cma_buf_t * const cb);
++size_t cma_buf_size(const cma_buf_t * const cb);
++int cma_buf_add_context2(cma_buf_t *const cb, picture_context_t * const ctx2);
++unsigned int cma_buf_vc_handle(const cma_buf_t *const cb);
++int cma_buf_fd(const cma_buf_t *const cb);
++void * cma_buf_addr(const cma_buf_t *const cb);
++unsigned int cma_buf_vc_addr(const cma_buf_t *const cb);
++picture_context_t * cma_buf_context2(const cma_buf_t *const cb);
++
++void cma_buf_unref(cma_buf_t * const cb);
++cma_buf_t * cma_buf_ref(cma_buf_t * const cb);
++
++struct cma_buf_pool_s;
++typedef struct cma_buf_pool_s cma_buf_pool_t;
++
++cma_buf_t * cma_buf_pool_alloc_buf(cma_buf_pool_t * const p, const size_t size);
++void cma_buf_pool_cancel(cma_buf_pool_t * const cbp);
++void cma_buf_pool_uncancel(cma_buf_pool_t * const cbp);
++void cma_buf_pool_delete(cma_buf_pool_t * const p);
++int cma_buf_pool_fill(cma_buf_pool_t * const cbp, const size_t el_size);
++int cma_buf_pool_resize(cma_buf_pool_t * const cbp,
++                          const unsigned int new_pool_size, const int new_flight_size);
++cma_buf_pool_t * cma_buf_pool_new(const unsigned int pool_size, const unsigned int flight_size,
++                                  const bool all_in_flight, const char * const name);
++
++static inline void cma_buf_pool_deletez(cma_buf_pool_t ** const pp)
++{
++    cma_buf_pool_t * const p = *pp;
++    if (p != NULL) {
++        *pp = NULL;
++        cma_buf_pool_delete(p);
++    }
++}
++
++#endif // VLC_MMAL_MMAL_CMA_H_
+--- /dev/null
++++ b/modules/hw/mmal/mmal_gl.h
+@@ -0,0 +1,45 @@
++// Trim this include list!
++
++#include <libdrm/drm.h>
++#include <libdrm/drm_mode.h>
++#include <libdrm/drm_fourcc.h>
++//#include <xf86drm.h>
++//#include <xf86drmMode.h>
++#include <X11/Xlib.h>
++#include <X11/Xutil.h>
++#include <X11/Xlib-xcb.h>
++#include <epoxy/gl.h>
++#include <epoxy/egl.h>
++#include <xcb/xcb.h>
++#include <xcb/dri3.h>
++
++struct mmal_gl_converter_s;
++
++typedef struct cma_buf_s {
++    struct mmal_gl_converter_s * sys;
++
++    size_t size;
++    __u32 h_dumb;
++    int fd;
++    unsigned int h_vcsm;
++    void * mapped_addr;
++    GLuint texture;
++} cma_buf_t;
++
++typedef struct cma_pic_sys_s {
++    cma_buf_t * cmabuf;
++} cma_pic_sys_t;
++
++static inline unsigned int
++hw_mmal_h_vcsm(const picture_t * const pic)
++{
++    const cma_pic_sys_t *const pic_sys = (cma_pic_sys_t *)pic->p_sys;
++
++    if (pic->format.i_chroma != VLC_CODEC_MMAL_GL_RGB32 ||
++        pic_sys == NULL || pic_sys->cmabuf == NULL) {
++        return 0;
++    }
++
++    return pic_sys->cmabuf->h_vcsm;
++}
++
+--- /dev/null
++++ b/modules/hw/mmal/mmal_piccpy_neon.S
+@@ -0,0 +1,105 @@
++// Copy pix
++
++        .syntax unified
++        .arm
++//      .thumb
++        .text
++        .align 16
++        .arch armv7-a
++        .fpu neon-vfpv4
++
++
++.macro  function name
++        .global  \name
++#ifdef __ELF__
++        .type    \name, %function
++#endif
++\name:
++.endm
++
++
++.macro  piccpy_to_8, bit_depth
++        subs     r2, #128
++        vpush    {q4-q7}
++        blt      2f
++1:
++        vldm     r1!, {q0-q7}
++        subs     r2, #128
++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
++        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
++        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
++        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
++        vldm     r1!, {q8-q15}
++        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
++        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
++        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
++        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
++        vqrshrn.u16 d8,  q8,  #\bit_depth - 8
++        vqrshrn.u16 d9,  q9,  #\bit_depth - 8
++        vqrshrn.u16 d10, q10, #\bit_depth - 8
++        vqrshrn.u16 d11, q11, #\bit_depth - 8
++        vqrshrn.u16 d12, q12, #\bit_depth - 8
++        vqrshrn.u16 d13, q13, #\bit_depth - 8
++        vqrshrn.u16 d14, q14, #\bit_depth - 8
++        vqrshrn.u16 d15, q15, #\bit_depth - 8
++        vstm     r0!, {q0-q7}
++        bge      1b
++2:
++        adds     r2, #64
++        blt      1f
++
++        vldm     r1!, {q0-q7}
++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
++        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
++        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
++        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
++        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
++        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
++        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
++        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
++        vstm     r0!, {q0-q3}
++1:
++        adds     r2, #32
++        blt      1f
++
++        vldm     r1!, {q0-q3}
++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
++        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
++        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
++        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
++        vstm     r0!, {q0-q1}
++1:
++        adds     r2, #16
++        blt      1f
++
++        vldm     r1!, {q0-q1}
++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
++        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
++        vstm     r0!, {q0}
++1:
++        adds     r2, #8
++        blt      1f
++
++        vldm     r1!, {q0}
++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
++        vstr     d0, [r0]
++        add      r0, #8
++1:
++        adds     r2, #4
++        blt      1f
++
++        vldr     d0, [r1]
++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
++        vstr     s0, [r0]
++1:
++        vpop     {q4-q7}
++        bx       lr
++.endm
++
++
++@ [r0] Dest
++@ [r1] Src
++@  r2  Pels
++function mmal_piccpy_10_to_8_neon
++        piccpy_to_8 10
++
+--- a/modules/hw/mmal/mmal_picture.c
++++ b/modules/hw/mmal/mmal_picture.c
+@@ -21,25 +21,1542 @@
+  * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
+  *****************************************************************************/
+
++// We would really like to use vlc_thread.h but the detach thread stuff can't be
++// used here :-(
++#include <pthread.h>
++
++#include <stdatomic.h>
++#include <unistd.h>
++#include <fcntl.h>
++
+ #include <vlc_common.h>
++#include <vlc_cpu.h>
+ #include <vlc_picture.h>
++
++#pragma GCC diagnostic push
++#pragma GCC diagnostic ignored "-Wbad-function-cast"
++#include <bcm_host.h>
++#pragma GCC diagnostic pop
+ #include <interface/mmal/mmal.h>
++#include <interface/mmal/util/mmal_util.h>
++#include <interface/mmal/util/mmal_default_components.h>
++#include <interface/vmcs_host/vcgencmd.h>
++#include <interface/vcsm/user-vcsm.h>
+
++#include "mmal_cma.h"
+ #include "mmal_picture.h"
++#include "transform_ops.h"
++
++#define TRACE_TRANSFORMS 0
++
++#define UINT64_SIZE(s) (((s) + sizeof(uint64_t) - 1)/sizeof(uint64_t))
++
++static inline char safe_char(const unsigned int c0)
++{
++    const unsigned int c = c0 & 0xff;
++    return c > ' ' && c < 0x7f ? c : '.';
++}
++
++const char * str_fourcc(char * const buf, const unsigned int fcc)
++{
++    if (fcc == 0)
++        return "----";
++    buf[0] = safe_char(fcc >> 0);
++    buf[1] = safe_char(fcc >> 8);
++    buf[2] = safe_char(fcc >> 16);
++    buf[3] = safe_char(fcc >> 24);
++    buf[4] = 0;
++    return buf;
++}
++
++// WB + Inv
++static inline void flush_range(void * const start, const size_t len)
++{
++    uint64_t buf[UINT64_SIZE(sizeof(struct vcsm_user_clean_invalid2_s) + sizeof(struct vcsm_user_clean_invalid2_block_s))];
++    struct vcsm_user_clean_invalid2_s * const b = (struct vcsm_user_clean_invalid2_s *)buf;
++
++    *b = (struct vcsm_user_clean_invalid2_s){
++        .op_count = 1
++    };
++
++    b->s[0] = (struct vcsm_user_clean_invalid2_block_s){
++        .invalidate_mode = 3,   // wb + invalidate
++        .block_count = 1,
++        .start_address = start, // Rely on clean inv to fix up align & size boundries
++        .block_size = len,
++        .inter_block_stride = 0
++    };
++
++    vcsm_clean_invalid2(b);
++}
++
++MMAL_FOURCC_T vlc_to_mmal_color_space(const video_color_space_t vlc_cs)
++{
++    switch (vlc_cs)
++    {
++        case COLOR_SPACE_BT601:
++            return MMAL_COLOR_SPACE_ITUR_BT601;
++        case COLOR_SPACE_BT709:
++            return MMAL_COLOR_SPACE_ITUR_BT709;
++        default:
++            break;
++    }
++    return MMAL_COLOR_SPACE_UNKNOWN;
++}
++
++MMAL_FOURCC_T vlc_to_mmal_video_fourcc(const video_frame_format_t * const vf_vlc)
++{
++    switch (vf_vlc->i_chroma) {
++        case VLC_CODEC_MMAL_ZC_RGB32:
++        case VLC_CODEC_RGB32:
++        {
++            // VLC RGB32 aka RV32 means we have to look at the mask values
++            const uint32_t r = vf_vlc->i_rmask;
++            const uint32_t g = vf_vlc->i_gmask;
++            const uint32_t b = vf_vlc->i_bmask;
++            if (r == 0xff0000 && g == 0xff00 && b == 0xff)
++                return MMAL_ENCODING_BGRA;
++            if (r == 0xff && g == 0xff00 && b == 0xff0000)
++                return MMAL_ENCODING_RGBA;
++            if (r == 0xff000000 && g == 0xff0000 && b == 0xff00)
++                return MMAL_ENCODING_ABGR;
++            if (r == 0xff00 && g == 0xff0000 && b == 0xff000000)
++                return MMAL_ENCODING_ARGB;
++            break;
++        }
++        case VLC_CODEC_RGB16:
++        {
++            // VLC RGB16 aka RV16 means we have to look at the mask values
++            const uint32_t r = vf_vlc->i_rmask;
++            const uint32_t g = vf_vlc->i_gmask;
++            const uint32_t b = vf_vlc->i_bmask;
++            if (r == 0xf800 && g == 0x7e0 && b == 0x1f)
++                return MMAL_ENCODING_RGB16;
++            break;
++        }
++        case VLC_CODEC_I420:
++        case VLC_CODEC_MMAL_ZC_I420:
++            return MMAL_ENCODING_I420;
++        case VLC_CODEC_RGBA:
++            return MMAL_ENCODING_RGBA;
++        case VLC_CODEC_BGRA:
++            return MMAL_ENCODING_BGRA;
++        case VLC_CODEC_ARGB:
++            return MMAL_ENCODING_ARGB;
++        // VLC_CODEC_ABGR does not exist in VLC
++        case VLC_CODEC_MMAL_OPAQUE:
++            return MMAL_ENCODING_OPAQUE;
++        case VLC_CODEC_MMAL_ZC_SAND8:
++            return MMAL_ENCODING_YUVUV128;
++        case VLC_CODEC_MMAL_ZC_SAND10:
++            return MMAL_ENCODING_YUVUV64_10;
++        case VLC_CODEC_MMAL_ZC_SAND30:
++            return MMAL_ENCODING_YUV10_COL;
++        default:
++            break;
++    }
++    return 0;
++}
++
++static void vlc_fmt_to_video_format(MMAL_VIDEO_FORMAT_T *const vf_mmal, const video_frame_format_t * const vf_vlc)
++{
++    const unsigned int wmask = (vf_vlc->i_chroma == VLC_CODEC_MMAL_ZC_I420 ||
++                                vf_vlc->i_chroma == VLC_CODEC_I420) ? 31 : 15;
++
++    vf_mmal->width          = (vf_vlc->i_width + wmask) & ~wmask;
++    vf_mmal->height         = (vf_vlc->i_height + 15) & ~15;
++    vf_mmal->crop.x         = vf_vlc->i_x_offset;
++    vf_mmal->crop.y         = vf_vlc->i_y_offset;
++    vf_mmal->crop.width     = vf_vlc->i_visible_width;
++    vf_mmal->crop.height    = vf_vlc->i_visible_height;
++    if (vf_vlc->i_sar_num == 0 || vf_vlc->i_sar_den == 0) {
++        vf_mmal->par.num        = 1;
++        vf_mmal->par.den        = 1;
++    } else {
++        vf_mmal->par.num        = vf_vlc->i_sar_num;
++        vf_mmal->par.den        = vf_vlc->i_sar_den;
++    }
++    vf_mmal->frame_rate.num = vf_vlc->i_frame_rate;
++    vf_mmal->frame_rate.den = vf_vlc->i_frame_rate_base;
++    vf_mmal->color_space    = vlc_to_mmal_color_space(vf_vlc->space);
++}
++
++
++void hw_mmal_vlc_fmt_to_mmal_fmt(MMAL_ES_FORMAT_T *const es_fmt, const video_frame_format_t * const vf_vlc)
++{
++    vlc_fmt_to_video_format(&es_fmt->es->video, vf_vlc);
++}
++
++bool hw_mmal_vlc_pic_to_mmal_fmt_update(MMAL_ES_FORMAT_T *const es_fmt, const picture_t * const pic)
++{
++    MMAL_VIDEO_FORMAT_T vf_new_ss;
++    MMAL_VIDEO_FORMAT_T *const vf_old = &es_fmt->es->video;
++    MMAL_VIDEO_FORMAT_T *const vf_new = &vf_new_ss;
++
++    vlc_fmt_to_video_format(vf_new, &pic->format);
++
++    // If we have a format that might have come from ffmpeg then rework for
++    // a better guess as to layout. All sand stuff is "special" with regards to
++    // width/height vs real layout so leave as is if that
++    if ((pic->format.i_chroma == VLC_CODEC_MMAL_ZC_I420 ||
++         pic->format.i_chroma == VLC_CODEC_MMAL_ZC_RGB32) &&
++        pic->p[0].i_pixel_pitch != 0)
++    {
++        // Now overwrite width/height with a better guess as to actual layout info
++        vf_new->height = pic->p[0].i_lines;
++        vf_new->width = pic->p[0].i_pitch / pic->p[0].i_pixel_pitch;
++    }
++
++    if (
++        vf_new->width          != vf_old->width          ||
++        vf_new->height         != vf_old->height         ||
++        vf_new->crop.x         != vf_old->crop.x         ||
++        vf_new->crop.y         != vf_old->crop.y         ||
++        vf_new->crop.width     != vf_old->crop.width     ||
++        vf_new->crop.height    != vf_old->crop.height    ||
++        vf_new->par.num        != vf_old->par.num        ||
++        vf_new->par.den        != vf_old->par.den        ||
++        // Frame rate ignored
++        vf_new->color_space    != vf_old->color_space)
++    {
++#if 0
++        char dbuf0[5], dbuf1[5];
++        printf("%dx%d (%d,%d %dx%d) par:%d/%d %s -> %dx%d (%d,%d %dx%d) par:%d/%d %s\n",
++               vf_old->width          ,
++               vf_old->height         ,
++               vf_old->crop.x         ,
++               vf_old->crop.y         ,
++               vf_old->crop.width     ,
++               vf_old->crop.height    ,
++               vf_old->par.num        ,
++               vf_old->par.den        ,
++               str_fourcc(dbuf0, vf_old->color_space)    ,
++               vf_new->width          ,
++               vf_new->height         ,
++               vf_new->crop.x         ,
++               vf_new->crop.y         ,
++               vf_new->crop.width     ,
++               vf_new->crop.height    ,
++               vf_new->par.num        ,
++               vf_new->par.den        ,
++               str_fourcc(dbuf1, vf_new->color_space)    );
++#endif
++        *vf_old = *vf_new;
++        return true;
++    }
++    return false;
++}
++
++
++hw_mmal_port_pool_ref_t * hw_mmal_port_pool_ref_create(MMAL_PORT_T * const port,
++   const unsigned int headers, const uint32_t payload_size)
++{
++    hw_mmal_port_pool_ref_t * ppr = calloc(1, sizeof(hw_mmal_port_pool_ref_t));
++    if (ppr == NULL)
++        return NULL;
++
++    if ((ppr->pool = mmal_port_pool_create(port, headers, payload_size)) == NULL)
++        goto fail;
++
++    ppr->port = port;
++    atomic_store(&ppr->refs, 1);
++    return ppr;
++
++fail:
++    free(ppr);
++    return NULL;
++}
++
++static void do_detached(void *(*fn)(void *), void * v)
++{
++    pthread_t dothread;
++    pthread_create(&dothread, NULL, fn, v);
++    pthread_detach(dothread);
++}
++
++// Destroy a ppr - aranged s.t. it has the correct prototype for a pthread
++static void * kill_ppr(void * v)
++{
++    hw_mmal_port_pool_ref_t * const ppr = v;
++    if (ppr->port->is_enabled)
++        mmal_port_disable(ppr->port);  // Avoid annoyed messages from MMAL when we kill the pool
++    mmal_port_pool_destroy(ppr->port, ppr->pool);
++    free(ppr);
++    return NULL;
++}
++
++void hw_mmal_port_pool_ref_release(hw_mmal_port_pool_ref_t * const ppr, const bool in_cb)
++{
++    if (ppr == NULL)
++        return;
++    if (atomic_fetch_sub(&ppr->refs, 1) != 1)
++        return;
++    if (in_cb)
++        do_detached(kill_ppr, ppr);
++    else
++        kill_ppr(ppr);
++}
++
++// Put buffer in port if possible - if not then release to pool
++// Returns true if sent, false if recycled
++bool hw_mmal_port_pool_ref_recycle(hw_mmal_port_pool_ref_t * const ppr, MMAL_BUFFER_HEADER_T * const buf)
++{
++    mmal_buffer_header_reset(buf);
++    buf->user_data = NULL;
++
++    if (mmal_port_send_buffer(ppr->port, buf) == MMAL_SUCCESS)
++        return true;
++    mmal_buffer_header_release(buf);
++    return false;
++}
++
++MMAL_STATUS_T hw_mmal_port_pool_ref_fill(hw_mmal_port_pool_ref_t * const ppr)
++{
++    MMAL_BUFFER_HEADER_T * buf;
++    MMAL_STATUS_T err = MMAL_SUCCESS;
++
++    while ((buf = mmal_queue_get(ppr->pool->queue)) != NULL) {
++        if ((err = mmal_port_send_buffer(ppr->port, buf)) != MMAL_SUCCESS)
++        {
++            mmal_queue_put_back(ppr->pool->queue, buf);
++            break;
++        }
++    }
++    return err;
++}
++
++
++MMAL_STATUS_T hw_mmal_opaque_output(vlc_object_t * const obj,
++                                    hw_mmal_port_pool_ref_t ** pppr,
++                                    MMAL_PORT_T * const port,
++                                    const unsigned int extra_buffers, MMAL_PORT_BH_CB_T callback)
++{
++    MMAL_STATUS_T status;
++
++    port->userdata = (struct MMAL_PORT_USERDATA_T *)obj;
++
++    status = port_parameter_set_uint32(port, MMAL_PARAMETER_EXTRA_BUFFERS, extra_buffers);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(obj, "Failed to set MMAL_PARAMETER_EXTRA_BUFFERS on output port (status=%"PRIx32" %s)",
++                status, mmal_status_to_string(status));
++        return status;
++    }
++
++    status = port_parameter_set_bool(port, MMAL_PARAMETER_ZERO_COPY, 1);
++    if (status != MMAL_SUCCESS) {
++       msg_Err(obj, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
++                port->name, status, mmal_status_to_string(status));
++       return status;
++    }
++
++    port->format->encoding = MMAL_ENCODING_OPAQUE;
++    port->format->encoding_variant = 0;
++    if ((status = mmal_port_format_commit(port)) != MMAL_SUCCESS)
++    {
++        msg_Err(obj, "Failed to commit format on port %s (status=%"PRIx32" %s)",
++                 port->name, status, mmal_status_to_string(status));
++        return status;
++    }
++
++    port->buffer_num = 30;
++    port->buffer_size = port->buffer_size_recommended;
++
++    if ((*pppr = hw_mmal_port_pool_ref_create(port, port->buffer_num, port->buffer_size)) == NULL) {
++        msg_Err(obj, "Failed to create output pool");
++        return status;
++    }
++
++    status = mmal_port_enable(port, callback);
++    if (status != MMAL_SUCCESS) {
++        hw_mmal_port_pool_ref_release(*pppr, false);
++        *pppr = NULL;
++        msg_Err(obj, "Failed to enable output port %s (status=%"PRIx32" %s)",
++                port->name, status, mmal_status_to_string(status));
++        return status;
++    }
++
++    return MMAL_SUCCESS;
++}
++
++
++void hw_mmal_pic_ctx_destroy(picture_context_t * pic_ctx_cmn)
++{
++    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic_ctx_cmn;
++    unsigned int i;
++
++    for (i = 0; i != ctx->buf_count; ++i) {
++        if (ctx->bufs[i] != NULL)
++            mmal_buffer_header_release(ctx->bufs[i]);
++    }
++
++    cma_buf_end_flight(ctx->cb);
++    cma_buf_unref(ctx->cb);
++
++    free(ctx);
++}
++
++picture_context_t * hw_mmal_pic_ctx_copy(picture_context_t * pic_ctx_cmn)
++{
++    const pic_ctx_mmal_t * const src_ctx = (pic_ctx_mmal_t *)pic_ctx_cmn;
++    pic_ctx_mmal_t * const dst_ctx = calloc(1, sizeof(*dst_ctx));
++    unsigned int i;
++
++    if (dst_ctx == NULL)
++        return NULL;
++
++    // Copy
++    dst_ctx->cmn = src_ctx->cmn;
++
++    dst_ctx->cb = cma_buf_ref(src_ctx->cb);
++
++    dst_ctx->buf_count = src_ctx->buf_count;
++    for (i = 0; i != src_ctx->buf_count; ++i) {
++        dst_ctx->bufs[i] = src_ctx->bufs[i];
++        if (dst_ctx->bufs[i] != NULL)
++            mmal_buffer_header_acquire(dst_ctx->bufs[i]);
++    }
++
++    return &dst_ctx->cmn;
++}
++
++static MMAL_BOOL_T
++buf_pre_release_cb(MMAL_BUFFER_HEADER_T * buf, void *userdata)
++{
++    hw_mmal_port_pool_ref_t * const ppr = userdata;
++
++    // Kill the callback - otherwise we will go in circles!
++    mmal_buffer_header_pre_release_cb_set(buf, (MMAL_BH_PRE_RELEASE_CB_T)0, NULL);
++    mmal_buffer_header_acquire(buf);  // Ref it again
++
++    // As we have re-acquired the buffer we need a full release
++    // (not continue) to zap the ref count back to zero
++    // This is "safe" 'cos we have already reset the cb
++    hw_mmal_port_pool_ref_recycle(ppr, buf);
++    hw_mmal_port_pool_ref_release(ppr, true); // Assume in callback
++
++    return MMAL_TRUE;
++}
++
++// Buffer belongs to context on successful return from this fn
++// is still valid on failure
++picture_context_t *
++hw_mmal_gen_context(MMAL_BUFFER_HEADER_T * buf, hw_mmal_port_pool_ref_t * const ppr)
++{
++    pic_ctx_mmal_t * const ctx = calloc(1, sizeof(pic_ctx_mmal_t));
++
++    if (ctx == NULL)
++        return NULL;
++
++    // If we have an associated ppr then ref & set appropriate callbacks
++    if (ppr != NULL) {
++        hw_mmal_port_pool_ref_acquire(ppr);
++        mmal_buffer_header_pre_release_cb_set(buf, buf_pre_release_cb, ppr);
++        buf->user_data = NULL;
++    }
++
++    ctx->cmn.copy = hw_mmal_pic_ctx_copy;
++    ctx->cmn.destroy = hw_mmal_pic_ctx_destroy;
++
++    ctx->buf_count = 1;
++    ctx->bufs[0] = buf;
++
++    return &ctx->cmn;
++}
++
++// n is els
++// * Make NEON!
++typedef void piccpy_fn(void * dest, const void * src, size_t n);
++
++extern piccpy_fn mmal_piccpy_10_to_8_neon;
++
++static void piccpy_10_to_8_c(void * dest, const void * src, size_t n)
++{
++    uint8_t * d = dest;
++    const uint16_t * s = src;
++    while (n-- != 0)
++        *d++ = *s++ >> 2;
++}
++
++// Do a stride converting copy - if the strides are the same and line_len is
++// close then do a single block copy - we don't expect to have to preserve
++// pixels in the output frame
++static void mem_copy_2d(uint8_t * d_ptr, const size_t d_stride,
++                        const uint8_t * s_ptr, const size_t s_stride,
++                        size_t lines, const size_t line_len)
++{
++    if (s_stride == d_stride && d_stride < line_len + 32)
++    {
++        memcpy(d_ptr, s_ptr, d_stride * lines);
++    }
++    else
++    {
++        while (lines-- != 0) {
++            memcpy(d_ptr, s_ptr, line_len);
++            d_ptr += d_stride;
++            s_ptr += s_stride;
++        }
++    }
++}
++
++// line_len in D units
++static void mem_copy_2d_10_to_8(uint8_t * d_ptr, const size_t d_stride,
++                        const uint8_t * s_ptr, const size_t s_stride,
++                        size_t lines, const size_t line_len)
++{
++    piccpy_fn * const docpy = vlc_CPU_ARM_NEON() ? mmal_piccpy_10_to_8_neon : piccpy_10_to_8_c;
++    if (s_stride == d_stride * 2 && d_stride < line_len + 32)
++    {
++        docpy(d_ptr, s_ptr, d_stride * lines);
++    }
++    else
++    {
++        while (lines-- != 0) {
++            docpy(d_ptr, s_ptr, line_len);
++            d_ptr += d_stride;
++            s_ptr += s_stride;
++        }
++    }
++}
++
++
++int hw_mmal_copy_pic_to_buf(void * const buf_data,
++                            uint32_t * const pLength,
++                            const MMAL_ES_FORMAT_T * const fmt,
++                            const picture_t * const pic)
++{
++    const MMAL_VIDEO_FORMAT_T *const video = &fmt->es->video;
++    uint8_t * const dest = buf_data;
++    size_t length = 0;
++
++    //**** Worry about x/y_offsets
++
++    assert(fmt->encoding == MMAL_ENCODING_I420);
++
++    switch (pic->format.i_chroma) {
++        case VLC_CODEC_I420:
++        {
++            const size_t y_size = video->width * video->height;
++            mem_copy_2d(dest, video->width,
++                 pic->p[0].p_pixels, pic->p[0].i_pitch,
++                 video->crop.height,
++                 video->crop.width);
++
++            mem_copy_2d(dest + y_size, video->width / 2,
++                 pic->p[1].p_pixels, pic->p[1].i_pitch,
++                 video->crop.height / 2,
++                 video->crop.width / 2);
++
++            mem_copy_2d(dest + y_size + y_size / 4, video->width / 2,
++                 pic->p[2].p_pixels, pic->p[2].i_pitch,
++                 video->crop.height / 2,
++                 video->crop.width / 2);
++
++            // And make sure it is actually in memory
++            length = y_size + y_size / 2;
++            break;
++        }
++
++        case VLC_CODEC_I420_10L:
++        {
++            const size_t y_size = video->width * video->height;
++            mem_copy_2d_10_to_8(dest, video->width,
++                 pic->p[0].p_pixels, pic->p[0].i_pitch,
++                 video->crop.height,
++                 video->crop.width);
++
++            mem_copy_2d_10_to_8(dest + y_size, video->width / 2,
++                 pic->p[1].p_pixels, pic->p[1].i_pitch,
++                 video->crop.height / 2,
++                 video->crop.width / 2);
++
++            mem_copy_2d_10_to_8(dest + y_size + y_size / 4, video->width / 2,
++                 pic->p[2].p_pixels, pic->p[2].i_pitch,
++                 video->crop.height / 2,
++                 video->crop.width / 2);
++
++            // And make sure it is actually in memory
++            length = y_size + y_size / 2;
++            break;
++        }
++
++        default:
++            if (pLength != NULL)
++                *pLength = 0;
++            return VLC_EBADVAR;
++    }
++
++    if (cma_vcsm_type() == VCSM_INIT_LEGACY) {  // ** CMA is currently always uncached
++        flush_range(dest, length);
++    }
++
++    if (pLength != NULL)
++        *pLength = (uint32_t)length;
++
++    return VLC_SUCCESS;
++}
++
++
++static MMAL_BOOL_T rep_buf_free_cb(MMAL_BUFFER_HEADER_T *header, void *userdata)
++{
++    cma_buf_t * const cb = userdata;
++    VLC_UNUSED(header);
++
++    cma_buf_unref(cb);
++    return MMAL_FALSE;
++}
++
++static int cma_buf_buf_attach(MMAL_BUFFER_HEADER_T * const buf, cma_buf_t * const cb)
++{
++    // Just a CMA buffer - fill in new buffer
++    const uintptr_t vc_h = cma_buf_vc_handle(cb);
++    if (vc_h == 0)
++        return VLC_EGENERIC;
++
++    mmal_buffer_header_reset(buf);
++    buf->data       = (uint8_t *)vc_h;
++    buf->alloc_size = cma_buf_size(cb);
++    buf->length     = buf->alloc_size;
++    // Ensure cb remains valid for the duration of this buffer
++    mmal_buffer_header_pre_release_cb_set(buf, rep_buf_free_cb, cma_buf_ref(cb));
++    return VLC_SUCCESS;
++}
++
++MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_copied(const picture_t *const pic,
++                                              MMAL_POOL_T * const rep_pool,
++                                              MMAL_PORT_T * const port,
++                                              cma_buf_pool_t * const cbp)
++{
++    MMAL_BUFFER_HEADER_T *const buf = mmal_queue_wait(rep_pool->queue);
++    if (buf == NULL)
++        goto fail0;
++
++    cma_buf_t * const cb = cma_buf_pool_alloc_buf(cbp, port->buffer_size);
++    if (cb == NULL)
++        goto fail1;
++
++    if (cma_buf_buf_attach(buf, cb) != VLC_SUCCESS)
++        goto fail2;
++
++    pic_to_buf_copy_props(buf, pic);
++
++    if (hw_mmal_copy_pic_to_buf(cma_buf_addr(cb), &buf->length, port->format, pic) != VLC_SUCCESS)
++        goto fail2;
++    buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
++
++    cma_buf_unref(cb);
++    return buf;
++
++fail2:
++    cma_buf_unref(cb);
++fail1:
++    mmal_buffer_header_release(buf);
++fail0:
++    return NULL;
++}
++
++MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_replicated(const picture_t *const pic, MMAL_POOL_T * const rep_pool)
++{
++    pic_ctx_mmal_t *const ctx = (pic_ctx_mmal_t *)pic->context;
++    MMAL_BUFFER_HEADER_T *const rep_buf = mmal_queue_wait(rep_pool->queue);
++
++    if (rep_buf == NULL)
++        return NULL;
++
++    if (ctx->bufs[0] != NULL)
++    {
++        // Existing buffer - replicate it
++        if (mmal_buffer_header_replicate(rep_buf, ctx->bufs[0]) != MMAL_SUCCESS)
++            goto fail;
++    }
++    else if (ctx->cb != NULL)
++    {
++        // Just a CMA buffer - fill in new buffer
++        if (cma_buf_buf_attach(rep_buf, ctx->cb) != 0)
++            goto fail;
++    }
++    else
++        goto fail;
++
++    pic_to_buf_copy_props(rep_buf, pic);
++    return rep_buf;
++
++fail:
++    mmal_buffer_header_release(rep_buf);
++    return NULL;
++}
++
++
++
++
++int hw_mmal_get_gpu_mem(void) {
++    static int stashed_val = -2;
++    VCHI_INSTANCE_T vchi_instance;
++    VCHI_CONNECTION_T *vchi_connection = NULL;
++    char rbuf[1024] = { 0 };
++
++    if (stashed_val >= -1)
++        return stashed_val;
++
++    if (vchi_initialise(&vchi_instance) != 0)
++        goto fail0;
++
++    //create a vchi connection
++    if (vchi_connect(NULL, 0, vchi_instance) != 0)
++        goto fail0;
++
++    vc_vchi_gencmd_init(vchi_instance, &vchi_connection, 1);
++
++    //send the gencmd for the argument
++    if (vc_gencmd_send("get_mem gpu") != 0)
++        goto fail;
++
++    if (vc_gencmd_read_response(rbuf, sizeof(rbuf) - 1) != 0)
++        goto fail;
++
++    if (strncmp(rbuf, "gpu=", 4) != 0)
++        goto fail;
++
++    char *p;
++    unsigned long m = strtoul(rbuf + 4, &p, 10);
++
++    if (p[0] != 'M' || p[1] != '\0')
++        stashed_val = -1;
++    else
++        stashed_val = (int)m << 20;
++
++    vc_gencmd_stop();
++
++    //close the vchi connection
++    vchi_disconnect(vchi_instance);
++
++    return stashed_val;
++
++fail:
++    vc_gencmd_stop();
++    vchi_disconnect(vchi_instance);
++fail0:
++    stashed_val = -1;
++    return -1;
++};
++
++// ===========================================================================
++
++typedef struct pool_ent_s
++{
++    struct pool_ent_s * next;
++    struct pool_ent_s * prev;
++
++    atomic_int ref_count;
++    unsigned int seq;
++
++    size_t size;
++
++    int vcsm_hdl;
++    int vc_hdl;
++    void * buf;
++
++    unsigned int width;
++    unsigned int height;
++    MMAL_FOURCC_T enc_type;
++
++    picture_t * pic;
++} pool_ent_t;
++
++
++typedef struct ent_list_hdr_s
++{
++    pool_ent_t * ents;
++    pool_ent_t * tail;
++    unsigned int n;
++} ent_list_hdr_t;
++
++#define ENT_LIST_HDR_INIT (ent_list_hdr_t){ \
++   .ents = NULL, \
++   .tail = NULL, \
++   .n = 0 \
++}
++
++struct vzc_pool_ctl_s
++{
++    atomic_int ref_count;
++
++    ent_list_hdr_t ent_pool;
++    ent_list_hdr_t ents_cur;
++    ent_list_hdr_t ents_prev;
++
++    unsigned int max_n;
++    unsigned int seq;
++
++    vlc_mutex_t lock;
++
++    MMAL_POOL_T * buf_pool;
++
++    vcsm_init_type_t vcsm_init_type;
++};
++
++typedef struct vzc_subbuf_ent_s
++{
++    pool_ent_t * ent;
++    MMAL_RECT_T pic_rect;
++    MMAL_RECT_T orig_dest_rect;
++    MMAL_DISPLAYREGION_T dreg;
++} vzc_subbuf_ent_t;
++
++
++static pool_ent_t * ent_extract(ent_list_hdr_t * const elh, pool_ent_t * const ent)
++{
++//    printf("List %p [%d]: Ext %p\n", elh, elh->n, ent);
++
++    if (ent == NULL)
++        return NULL;
++
++    if (ent->next == NULL)
++        elh->tail = ent->prev;
++    else
++        ent->next->prev = ent->prev;
++
++    if (ent->prev == NULL)
++        elh->ents = ent->next;
++    else
++        ent->prev->next = ent->next;
++
++    ent->prev = ent->next = NULL;
++
++    --elh->n;
++
++    return ent;  // For convienience
++}
++
++static inline pool_ent_t * ent_extract_tail(ent_list_hdr_t * const elh)
++{
++    return ent_extract(elh, elh->tail);
++}
++
++static void ent_add_head(ent_list_hdr_t * const elh, pool_ent_t * const ent)
++{
++//    printf("List %p [%d]: Add %p\n", elh, elh->n, ent);
++
++    if ((ent->next = elh->ents) == NULL)
++        elh->tail = ent;
++    else
++        ent->next->prev = ent;
++
++    ent->prev = NULL;
++    elh->ents = ent;
++    ++elh->n;
++}
++
++static void ent_free(pool_ent_t * const ent)
++{
++//    printf("Free ent: %p\n", ent);
++    if (ent != NULL) {
++        // If we still have a ref to a pic - kill it now
++        if (ent->pic != NULL)
++            picture_Release(ent->pic);
++
++        // Free contents
++        vcsm_unlock_hdl(ent->vcsm_hdl);
++
++        vcsm_free(ent->vcsm_hdl);
++
++        free(ent);
++    }
++}
++
++static void ent_free_list(ent_list_hdr_t * const elh)
++{
++    pool_ent_t * ent = elh->ents;
++
++//    printf("Free list: %p [%d]\n", elh, elh->n);
++
++    *elh = ENT_LIST_HDR_INIT;
++
++    while (ent != NULL) {
++        pool_ent_t * const t = ent;
++        ent = t->next;
++        ent_free(t);
++    }
++}
++
++static void ent_list_move(ent_list_hdr_t * const dst, ent_list_hdr_t * const src)
++{
++//    printf("Move %p->%p\n", src, dst);
++
++    *dst = *src;
++    *src = ENT_LIST_HDR_INIT;
++}
++
++// Scans "backwards" as that should give us the fastest match if we are
++// presented with pics in the same order each time
++static pool_ent_t * ent_list_extract_pic_ent(ent_list_hdr_t * const elh, picture_t * const pic)
++{
++    pool_ent_t *ent = elh->tail;
++
++//    printf("Find list: %p [%d]; pic:%p\n", elh, elh->n, pic);
++
++    while (ent != NULL) {
++//        printf("Check ent: %p, pic:%p\n", ent, ent->pic);
++
++        if (ent->pic == pic)
++            return ent_extract(elh, ent);
++        ent = ent->prev;
++    }
++    return NULL;
++}
++
++#define POOL_ENT_ALLOC_BLOCK  0x10000
++
++static pool_ent_t * pool_ent_alloc_new(size_t req_size)
++{
++    pool_ent_t * ent = calloc(1, sizeof(*ent));
++    const size_t alloc_size = (req_size + POOL_ENT_ALLOC_BLOCK - 1) & ~(POOL_ENT_ALLOC_BLOCK - 1);
++
++    if (ent == NULL)
++        return NULL;
++
++    ent->next = ent->prev = NULL;
++
++    // Alloc from vcsm
++    if ((ent->vcsm_hdl = vcsm_malloc_cache(alloc_size, VCSM_CACHE_TYPE_HOST, (char *)"vlc-subpic")) == -1)
++        goto fail1;
++    if ((ent->vc_hdl = vcsm_vc_hdl_from_hdl(ent->vcsm_hdl)) == 0)
++        goto fail2;
++    if ((ent->buf = vcsm_lock(ent->vcsm_hdl)) == NULL)
++        goto fail2;
++
++    ent->size = alloc_size;
++    return ent;
++
++fail2:
++    vcsm_free(ent->vcsm_hdl);
++fail1:
++    free(ent);
++    return NULL;
++}
++
++static inline pool_ent_t * pool_ent_ref(pool_ent_t * const ent)
++{
++//    int n = atomic_fetch_add(&ent->ref_count, 1) + 1;
++//    printf("Ref: %p: %d\n", ent, n);
++    atomic_fetch_add(&ent->ref_count, 1);
++    return ent;
++}
++
++static void pool_recycle(vzc_pool_ctl_t * const pc, pool_ent_t * const ent)
++{
++    pool_ent_t * xs = NULL;
++    int n;
++
++    if (ent == NULL)
++        return;
++
++    n = atomic_fetch_sub(&ent->ref_count, 1) - 1;
++
++//    printf("%s: Pool: %p: Ent: %p: %d\n", __func__, &pc->ent_pool, ent, n);
++
++    if (n != 0)
++        return;
++
++    if (ent->pic != NULL) {
++        picture_Release(ent->pic);
++        ent->pic = NULL;
++    }
++
++    vlc_mutex_lock(&pc->lock);
++
++    // If we have a full pool then extract the LRU and free it
++    // Free done outside mutex
++    if (pc->ent_pool.n >= pc->max_n)
++        xs = ent_extract_tail(&pc->ent_pool);
++
++    ent_add_head(&pc->ent_pool, ent);
++
++    vlc_mutex_unlock(&pc->lock);
++
++    ent_free(xs);
++}
++
++// * This could be made more efficient, but this is easy
++static void pool_recycle_list(vzc_pool_ctl_t * const pc, ent_list_hdr_t * const elh)
++{
++    pool_ent_t * ent;
++    while ((ent = ent_extract_tail(elh)) != NULL) {
++        pool_recycle(pc, ent);
++    }
++}
++
++static pool_ent_t * pool_best_fit(vzc_pool_ctl_t * const pc, size_t req_size)
++{
++    pool_ent_t * best = NULL;
++
++    vlc_mutex_lock(&pc->lock);
++
++    {
++        pool_ent_t * ent = pc->ent_pool.ents;
++
++        // Simple scan
++        while (ent != NULL) {
++            if (ent->size >= req_size && ent->size <= req_size * 2 + POOL_ENT_ALLOC_BLOCK &&
++                    (best == NULL || best->size > ent->size))
++                best = ent;
++            ent = ent->next;
++        }
++
++        // extract best from chain if we've found it
++        ent_extract(&pc->ent_pool, best);
++    }
++
++    vlc_mutex_unlock(&pc->lock);
++
++    if (best == NULL)
++        best = pool_ent_alloc_new(req_size);
++
++    if ((best->seq = ++pc->seq) == 0)
++        best->seq = ++pc->seq;  // Never allow to be zero
++
++    atomic_store(&best->ref_count, 1);
++    return best;
++}
++
++
++const vlc_fourcc_t hw_mmal_vzc_subpicture_chromas[] = { VLC_CODEC_RGBA, VLC_CODEC_BGRA, VLC_CODEC_ARGB, 0 };
++
++void hw_mmal_vzc_buf_get_wh(MMAL_BUFFER_HEADER_T * const buf, int * const pW, int * const pH)
++{
++    const pool_ent_t *const ent = ((vzc_subbuf_ent_t *)buf->user_data)->ent;
++    *pW = ent->width;
++    *pH = ent->height;
++}
++
++bool hw_mmal_vzc_buf_set_format(MMAL_BUFFER_HEADER_T * const buf, MMAL_ES_FORMAT_T * const es_fmt)
++{
++    const pool_ent_t *const ent = ((vzc_subbuf_ent_t *)buf->user_data)->ent;
++    MMAL_VIDEO_FORMAT_T * const v_fmt = &es_fmt->es->video;
++
++    es_fmt->type = MMAL_ES_TYPE_VIDEO;
++    es_fmt->encoding = ent->enc_type;
++    es_fmt->encoding_variant = 0;
++
++    v_fmt->width = ent->width;
++    v_fmt->height = ent->height;
++    v_fmt->crop.x = 0;
++    v_fmt->crop.y = 0;
++    v_fmt->crop.width = ent->width;
++    v_fmt->crop.height = ent->height;
++
++    return true;
++}
++
++void hw_mmal_vzc_buf_frame_size(MMAL_BUFFER_HEADER_T * const buf,
++                                uint32_t * const pWidth, uint32_t * const pHeight)
++{
++    const pool_ent_t *const ent = ((vzc_subbuf_ent_t *)buf->user_data)->ent;
++    *pWidth = ent->width;
++    *pHeight = ent->height;
++}
++
++
++MMAL_DISPLAYREGION_T * hw_mmal_vzc_buf_region(MMAL_BUFFER_HEADER_T * const buf)
++{
++    vzc_subbuf_ent_t * sb = buf->user_data;
++    return &sb->dreg;
++}
++
++static inline int rescale_x(int x, int mul, int div)
++{
++    return div == 0 ? x * mul : (x * mul + div/2) / div;
++}
++
++static void rescale_rect(MMAL_RECT_T * const d, const MMAL_RECT_T * const s, const MMAL_RECT_T * mul_rect, const MMAL_RECT_T * div_rect)
++{
++    d->x      = rescale_x(s->x - div_rect->x, mul_rect->width,  div_rect->width)  + mul_rect->x;
++    d->y      = rescale_x(s->y - div_rect->y, mul_rect->height, div_rect->height) + mul_rect->y;
++    d->width  = rescale_x(s->width,           mul_rect->width,  div_rect->width);
++    d->height = rescale_x(s->height,          mul_rect->height, div_rect->height);
++#if TRACE_TRANSFORMS
++    fprintf(stderr, "(%d,%d %dx%d) * (%d,%d %dx%d) / (%d,%d %dx%d) -> (%d,%d %dx%d)\n",
++            s->x, s->y, s->width, s->height,
++            mul_rect->x, mul_rect->y, mul_rect->width, mul_rect->height,
++            div_rect->x, div_rect->y, div_rect->width, div_rect->height,
++            d->x, d->y, d->width, d->height);
++#endif
++}
++
++static MMAL_RECT_T
++rect_untransform(MMAL_RECT_T s, const MMAL_RECT_T c, const MMAL_DISPLAYTRANSFORM_T t)
++{
++#if TRACE_TRANSFORMS
++    fprintf(stderr, "t=%d, s=%d,%d:%dx%d, c=%d,%d:%dx%d -> ", (int)t,
++           s.x,s.y,s.width,s.height,
++           c.x,c.y,c.width,c.height);
++#endif
++    if (is_transform_hflip(t))
++        s = rect_hflip(s, c);
++    if (is_transform_vflip(t) != 0)
++        s = rect_vflip(s, c);
++    if (is_transform_transpose(t) != 0)
++        s = rect_transpose(s);
++#if TRACE_TRANSFORMS
++    fprintf(stderr, "s=%d,%d:%dx%d\n",
++           s.x,s.y,s.width,s.height);
++#endif
++    return s;
++}
++
++void hw_mmal_vzc_buf_scale_dest_rect(MMAL_BUFFER_HEADER_T * const buf, const MMAL_RECT_T * const scale_rect, const MMAL_DISPLAYTRANSFORM_T scale_transform)
++{
++    vzc_subbuf_ent_t * sb = buf->user_data;
++    if (scale_rect == NULL) {
++        sb->dreg.dest_rect = sb->orig_dest_rect;
++        sb->dreg.transform = MMAL_DISPLAY_ROT0;
++    }
++    else
++    {
++        // The scale rect has been transposed if we have a transposing
++        // transform - untranspose so we are the same way up as the source
++        const MMAL_RECT_T c = (scale_transform & 4) == 0 ? *scale_rect : rect_transpose(*scale_rect);
++        rescale_rect(&sb->dreg.dest_rect, &sb->orig_dest_rect,
++                     &c, &sb->pic_rect);
++        sb->dreg.dest_rect = rect_untransform(sb->dreg.dest_rect, c, scale_transform);
++        sb->dreg.transform = scale_transform;
++    }
++}
++
++unsigned int hw_mmal_vzc_buf_seq(MMAL_BUFFER_HEADER_T * const buf)
++{
++    vzc_subbuf_ent_t * sb = buf->user_data;
++    return sb->ent->seq;
++}
++
++
++// The intent with the ents_cur & ents_last stuff is to remember the buffers
++// we used on the last frame and reuse them on the current one if they are the
++// same.  Unfortunately detection of "is_first" is only a heuristic (there are
++// no rules governing the order in which things are blended) so we must deal
++// (fairly) gracefully with it never (or always) being set.
++
++// dst_fmt gives the number space in which the destination pixels are specified
++
++MMAL_BUFFER_HEADER_T * hw_mmal_vzc_buf_from_pic(vzc_pool_ctl_t * const pc,
++                                                picture_t * const pic,
++                                                const MMAL_RECT_T dst_pic_rect,
++                                                const int x_offset, const int y_offset,
++                                                const unsigned int alpha,
++                                                const bool is_first)
++{
++    MMAL_BUFFER_HEADER_T * const buf = mmal_queue_get(pc->buf_pool->queue);
++    vzc_subbuf_ent_t * sb;
++
++    if (buf == NULL)
++        return NULL;
++
++    if ((sb = calloc(1, sizeof(*sb))) == NULL)
++        goto fail1;
++
++    // If first or we've had a lot of stuff move everything to the last list
++    // (we could deal more gracefully with the "too many" case but it shouldn't
++    // really happen)
++    if (is_first || pc->ents_cur.n >= CTX_BUFS_MAX) {
++        pool_recycle_list(pc, &pc->ents_prev);
++        ent_list_move(&pc->ents_prev, &pc->ents_cur);
++    }
++
++    sb->dreg.hdr.id = MMAL_PARAMETER_DISPLAYREGION;
++    sb->dreg.hdr.size = sizeof(sb->dreg);
++    buf->user_data = sb;
++
++    {
++        // ?? Round start offset as well as length
++        const video_format_t *const fmt = &pic->format;
++
++        const unsigned int bpp = (fmt->i_bits_per_pixel + 7) >> 3;
++        const unsigned int xl = (fmt->i_x_offset & ~15);
++        const unsigned int xr = (fmt->i_x_offset + fmt->i_visible_width + 15) & ~15;
++        const size_t dst_stride = (xr - xl) * bpp;
++        const size_t dst_lines = ((fmt->i_visible_height + 15) & ~15);
++        const size_t dst_size = dst_stride * dst_lines;
++
++        pool_ent_t * ent = ent_list_extract_pic_ent(&pc->ents_prev, pic);
++        bool needs_copy = false;
++
++        // If we didn't find ent in last then look in cur in case is_first
++        // isn't working
++        if (ent == NULL)
++            ent = ent_list_extract_pic_ent(&pc->ents_cur, pic);
++
++//        printf("ent_found: %p\n", ent);
+
+-int mmal_picture_lock(picture_t *picture)
++        if (ent == NULL)
++        {
++            // Need a new ent
++            needs_copy = true;
++
++            if ((ent = pool_best_fit(pc, dst_size)) == NULL)
++                goto fail2;
++            if ((ent->enc_type = vlc_to_mmal_video_fourcc(&pic->format)) == 0)
++                goto fail2;
++
++            ent->pic = picture_Hold(pic);
++        }
++
++        ent_add_head(&pc->ents_cur, ent);
++
++        sb->ent = pool_ent_ref(ent);
++        hw_mmal_vzc_pool_ref(pc);
++
++        // Copy data
++        buf->next = NULL;
++        buf->cmd = 0;
++        buf->data = (uint8_t *)(ent->vc_hdl);
++        buf->alloc_size = buf->length = dst_size;
++        buf->offset = 0;
++        buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
++        buf->pts = buf->dts = pic->date != VLC_TICK_INVALID ? pic->date : MMAL_TIME_UNKNOWN;
++        buf->type->video = (MMAL_BUFFER_HEADER_VIDEO_SPECIFIC_T){
++            .planes = 1,
++            .pitch = { dst_stride }
++        };
++
++        // Remember offsets
++        sb->dreg.set = MMAL_DISPLAY_SET_SRC_RECT |
++            MMAL_DISPLAY_SET_DEST_RECT |
++            MMAL_DISPLAY_SET_FULLSCREEN |
++            MMAL_DISPLAY_SET_TRANSFORM |
++            MMAL_DISPLAY_SET_ALPHA;
++
++        sb->dreg.fullscreen = 0;
++
++        // Will be set later - zero now to avoid any confusion
++        sb->dreg.transform = MMAL_DISPLAY_ROT0;
++        sb->dreg.dest_rect = (MMAL_RECT_T){0, 0, 0, 0};
++
++        sb->dreg.alpha = (uint32_t)(alpha & 0xff) | MMAL_DISPLAY_ALPHA_FLAGS_MIX;
++
++//        printf("+++ bpp:%d, vis:%dx%d wxh:%dx%d, d:%dx%d\n", bpp, fmt->i_visible_width, fmt->i_visible_height, fmt->i_width, fmt->i_height, dst_stride, dst_lines);
++
++        sb->dreg.src_rect = (MMAL_RECT_T){
++            .x      = (fmt->i_x_offset - xl),
++            .y      = 0,
++            .width  = fmt->i_visible_width,
++            .height = fmt->i_visible_height
++        };
++
++        sb->pic_rect = dst_pic_rect;
++
++        sb->orig_dest_rect = (MMAL_RECT_T){
++            .x      = x_offset,
++            .y      = y_offset,
++            .width  = fmt->i_visible_width,
++            .height = fmt->i_visible_height
++        };
++
++        if (needs_copy)
++        {
++            ent->width = dst_stride / bpp;
++            ent->height = dst_lines;
++
++            // 2D copy
++            {
++                uint8_t *d = ent->buf;
++                const uint8_t *s = pic->p[0].p_pixels + xl * bpp + fmt->i_y_offset * pic->p[0].i_pitch;
++
++                mem_copy_2d(d, dst_stride, s, pic->p[0].i_pitch, fmt->i_visible_height, dst_stride);
++
++                // And make sure it is actually in memory
++                if (pc->vcsm_init_type != VCSM_INIT_CMA) {  // ** CMA is currently always uncached
++                    flush_range(ent->buf, dst_stride * fmt->i_visible_height);
++                }
++            }
++        }
++    }
++
++    return buf;
++
++fail2:
++    free(sb);
++fail1:
++    mmal_buffer_header_release(buf);
++    return NULL;
++}
++
++void hw_mmal_vzc_pool_flush(vzc_pool_ctl_t * const pc)
++{
++    pool_recycle_list(pc, &pc->ents_prev);
++    pool_recycle_list(pc, &pc->ents_cur);
++}
++
++static void hw_mmal_vzc_pool_delete(vzc_pool_ctl_t * const pc)
++{
++
++//    printf("<<< %s\n", __func__);
++
++    hw_mmal_vzc_pool_flush(pc);
++
++    ent_free_list(&pc->ent_pool);
++
++    if (pc->buf_pool != NULL)
++        mmal_pool_destroy(pc->buf_pool);
++
++    vlc_mutex_destroy(&pc->lock);
++
++    cma_vcsm_exit(pc->vcsm_init_type);
++
++//    memset(pc, 0xba, sizeof(*pc)); // Zap for (hopefully) faster crash
++    free (pc);
++
++    //    printf(">>> %s\n", __func__);
++}
++
++void hw_mmal_vzc_pool_release(vzc_pool_ctl_t * const pc)
++{
++    int n;
++
++    if (pc == NULL)
++        return;
++
++    n = atomic_fetch_sub(&pc->ref_count, 1) - 1;
++
++    if (n != 0)
++        return;
++
++    hw_mmal_vzc_pool_delete(pc);
++}
++
++void hw_mmal_vzc_pool_ref(vzc_pool_ctl_t * const pc)
++{
++    atomic_fetch_add(&pc->ref_count, 1);
++}
++
++static MMAL_BOOL_T vcz_pool_release_cb(MMAL_POOL_T * buf_pool, MMAL_BUFFER_HEADER_T *buf, void *userdata)
++{
++    vzc_pool_ctl_t * const pc = userdata;
++    vzc_subbuf_ent_t * const sb = buf->user_data;
++
++    VLC_UNUSED(buf_pool);
++
++//    printf("<<< %s\n", __func__);
++
++    if (sb != NULL) {
++        buf->user_data = NULL;
++        pool_recycle(pc, sb->ent);
++        hw_mmal_vzc_pool_release(pc);
++        free(sb);
++    }
++
++//    printf(">>> %s\n", __func__);
++
++    return MMAL_TRUE;
++}
++
++vzc_pool_ctl_t * hw_mmal_vzc_pool_new()
++{
++    vzc_pool_ctl_t * const pc = calloc(1, sizeof(*pc));
++
++    if (pc == NULL)
++        return NULL;
++
++    if ((pc->vcsm_init_type = cma_vcsm_init()) == VCSM_INIT_NONE)
++    {
++        free(pc);
++        return NULL;
++    }
++
++    pc->max_n = 8;
++    vlc_mutex_init(&pc->lock);  // Must init before potential destruction
++
++    if ((pc->buf_pool = mmal_pool_create(64, 0)) == NULL)
++    {
++        hw_mmal_vzc_pool_delete(pc);
++        return NULL;
++    }
++
++    atomic_store(&pc->ref_count, 1);
++
++    mmal_pool_callback_set(pc->buf_pool, vcz_pool_release_cb, pc);
++
++    return pc;
++}
++
++//----------------------------------------------------------------------------
++
++
++static const uint8_t shift_00[] = {0,0,0,0};
++static const uint8_t shift_01[] = {0,1,1,1};
++
++int cma_pic_set_data(picture_t * const pic,
++                            const MMAL_ES_FORMAT_T * const mm_esfmt,
++                            const MMAL_BUFFER_HEADER_T * const buf)
+ {
+-    picture_sys_t *pic_sys = picture->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer = pic_sys->buffer;
++    const MMAL_VIDEO_FORMAT_T * const mm_fmt = &mm_esfmt->es->video;
++    const MMAL_BUFFER_HEADER_VIDEO_SPECIFIC_T *const buf_vid = (buf == NULL) ? NULL : &buf->type->video;
++    cma_buf_t *const cb = cma_buf_pic_get(pic);
++    unsigned int planes = 1;
++
++    uint8_t * const data = cma_buf_addr(cb);
++    if (data == NULL) {
++        return VLC_ENOMEM;
++    }
++
++    const uint8_t * ws = shift_00;
++    const uint8_t * hs = shift_00;
++    int pb = 1;
++
++    switch (mm_esfmt->encoding)
++    {
++        case MMAL_ENCODING_ARGB:
++        case MMAL_ENCODING_ABGR:
++        case MMAL_ENCODING_RGBA:
++        case MMAL_ENCODING_BGRA:
++        case MMAL_ENCODING_RGB32:
++        case MMAL_ENCODING_BGR32:
++            pb = 4;
++            break;
++        case MMAL_ENCODING_RGB16:
++            pb = 2;
++            break;
+
+-    int offset = 0;
+-    picture->p[0].p_pixels = buffer->data;
+-    for (int i = 1; i < picture->i_planes; i++) {
+-        offset = offset + picture->p[i - 1].i_pitch * picture->p[i - 1].i_lines;
+-        picture->p[i].p_pixels = (ptrdiff_t)buffer->data + offset;
++        case MMAL_ENCODING_I420:
++            ws = shift_01;
++            hs = shift_01;
++            planes = 3;
++            break;
++
++        case MMAL_ENCODING_YUVUV128:
++            hs = shift_01;
++            planes = 2;
++            break;
++
++        default:
++//            msg_Err(p_filter, "%s: Unexpected format", __func__);
++            return VLC_EGENERIC;
+     }
+
+-    pic_sys->displayed = false;
++    // Fix up SAR if unset
++    if (pic->format.i_sar_den == 0 || pic->format.i_sar_num == 0) {
++        pic->format.i_sar_den = mm_fmt->par.den;
++        pic->format.i_sar_num = mm_fmt->par.num;
++    }
+
++    pic->i_planes = planes;
++    unsigned int offset = 0;
++    for (unsigned int i = 0; i != planes; ++i) {
++        pic->p[i] = (plane_t){
++            .p_pixels = data + (buf_vid != NULL ? buf_vid->offset[i] : offset),
++            .i_lines = mm_fmt->height >> hs[i],
++            .i_pitch = buf_vid != NULL ? buf_vid->pitch[i] : mm_fmt->width * pb,
++            .i_pixel_pitch = pb,
++            .i_visible_lines = mm_fmt->crop.height >> hs[i],
++            .i_visible_pitch = mm_fmt->crop.width >> ws[i]
++        };
++        offset += pic->p[i].i_pitch * pic->p[i].i_lines;
++    }
+     return VLC_SUCCESS;
+ }
++
++int cma_buf_pic_attach(cma_buf_t * const cb, picture_t * const pic)
++{
++    if (!is_cma_buf_pic_chroma(pic->format.i_chroma))
++        return VLC_EGENERIC;
++    if (pic->context != NULL)
++        return VLC_EBADVAR;
++
++    pic_ctx_mmal_t * const ctx = calloc(1, sizeof(pic_ctx_mmal_t));
++
++    if (ctx == NULL)
++        return VLC_ENOMEM;
++
++    ctx->cmn.copy = hw_mmal_pic_ctx_copy;
++    ctx->cmn.destroy = hw_mmal_pic_ctx_destroy;
++    ctx->buf_count = 1; // cb takes the place of the 1st buf
++    ctx->cb = cb;
++
++    cma_buf_in_flight(cb);
++
++    pic->context = &ctx->cmn;
++    return VLC_SUCCESS;
++}
++
++cma_buf_t * cma_buf_pic_get(picture_t * const pic)
++{
++    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic->context;
++    return !is_cma_buf_pic_chroma(pic->format.i_chroma) || ctx  == NULL ? 0 : ctx->cb;
++}
++
++
++//----------------------------------------------------------------------------
++
++/* Returns the type of the Pi being used
++*/
++bool rpi_is_model_pi4(void) {
++    return bcm_host_is_model_pi4();
++}
++
++// Preferred mode - none->cma on Pi4 otherwise legacy
++static volatile vcsm_init_type_t last_vcsm_type = VCSM_INIT_NONE;
++
++vcsm_init_type_t cma_vcsm_type(void)
++{
++    return last_vcsm_type;
++}
++
++vcsm_init_type_t cma_vcsm_init(void)
++{
++    vcsm_init_type_t rv = VCSM_INIT_NONE;
++    // We don't bother locking - taking a copy here should be good enough
++    vcsm_init_type_t try_type = last_vcsm_type;
++
++    if (try_type == VCSM_INIT_NONE) {
++        if (bcm_host_is_fkms_active())
++            try_type = VCSM_INIT_CMA;
++        else
++            try_type = VCSM_INIT_LEGACY;
++    }
++
++    if (try_type == VCSM_INIT_CMA) {
++        if (vcsm_init_ex(1, -1) == 0)
++            rv = VCSM_INIT_CMA;
++        else if (vcsm_init_ex(0, -1) == 0)
++            rv = VCSM_INIT_LEGACY;
++    }
++    else
++    {
++        if (vcsm_init_ex(0, -1) == 0)
++            rv = VCSM_INIT_LEGACY;
++        else if (vcsm_init_ex(1, -1) == 0)
++            rv = VCSM_INIT_CMA;
++    }
++
++    // Just in case this affects vcsm init do after that
++    if (rv != VCSM_INIT_NONE)
++        bcm_host_init();
++
++    last_vcsm_type = rv;
++    return rv;
++}
++
++void cma_vcsm_exit(const vcsm_init_type_t init_mode)
++{
++    if (init_mode != VCSM_INIT_NONE)
++    {
++        vcsm_exit();
++        bcm_host_deinit();  // Does nothing but add in case it ever does
++    }
++}
++
++const char * cma_vcsm_init_str(const vcsm_init_type_t init_mode)
++{
++    switch (init_mode)
++    {
++        case VCSM_INIT_CMA:
++            return "CMA";
++        case VCSM_INIT_LEGACY:
++            return "Legacy";
++        case VCSM_INIT_NONE:
++            return "none";
++        default:
++            break;
++    }
++    return "???";
++}
++
++
+--- a/modules/hw/mmal/mmal_picture.h
++++ b/modules/hw/mmal/mmal_picture.h
+@@ -24,19 +24,298 @@
+ #ifndef VLC_MMAL_MMAL_PICTURE_H_
+ #define VLC_MMAL_MMAL_PICTURE_H_
+
++#include <stdatomic.h>
++
+ #include <vlc_common.h>
+ #include <interface/mmal/mmal.h>
+
++#include "mmal_cma.h"
++
+ /* Think twice before changing this. Incorrect values cause havoc. */
+ #define NUM_ACTUAL_OPAQUE_BUFFERS 30
+
+-struct picture_sys_t {
+-    vlc_object_t *owner;
++#ifndef VLC_TICK_INVALID
++#define VLC_TICK_INVALID VLC_TS_INVALID
++#define VLC_VER_3 1
++#else
++#define VLC_VER_3 0
++#endif
++
++typedef struct mmal_port_pool_ref_s
++{
++    atomic_uint refs;
++    MMAL_POOL_T * pool;
++    MMAL_PORT_T * port;
++} hw_mmal_port_pool_ref_t;
++
++typedef struct pic_ctx_subpic_s {
++    picture_t * subpic;
++    int x, y;
++    int alpha;
++} pic_ctx_subpic_t;
++
++
++#define CTX_BUFS_MAX 4
++typedef struct pic_ctx_mmal_s {
++    picture_context_t cmn;  // PARENT: Common els at start
++
++    cma_buf_t * cb;
++
++    unsigned int buf_count;
++    MMAL_BUFFER_HEADER_T * bufs[CTX_BUFS_MAX];
++
++} pic_ctx_mmal_t;
++
++const char * str_fourcc(char * const buf, const unsigned int fcc);
++
++MMAL_FOURCC_T vlc_to_mmal_video_fourcc(const video_frame_format_t * const vf_vlc);
++MMAL_FOURCC_T vlc_to_mmal_color_space(const video_color_space_t vlc_cs);
++void hw_mmal_vlc_fmt_to_mmal_fmt(MMAL_ES_FORMAT_T *const es_fmt, const video_frame_format_t * const vf_vlc);
++// Returns true if fmt_changed
++// frame_rate ignored for compare, but is set if something else is updated
++bool hw_mmal_vlc_pic_to_mmal_fmt_update(MMAL_ES_FORMAT_T *const es_fmt, const picture_t * const pic);
++
++// Copy pic contents into an existing buffer
++int hw_mmal_copy_pic_to_buf(void * const buf_data, uint32_t * const pLength,
++                            const MMAL_ES_FORMAT_T * const fmt, const picture_t * const pic);
++
++hw_mmal_port_pool_ref_t * hw_mmal_port_pool_ref_create(MMAL_PORT_T * const port,
++   const unsigned int headers, const uint32_t payload_size);
++void hw_mmal_port_pool_ref_release(hw_mmal_port_pool_ref_t * const ppr, const bool in_cb);
++bool hw_mmal_port_pool_ref_recycle(hw_mmal_port_pool_ref_t * const ppr, MMAL_BUFFER_HEADER_T * const buf);
++MMAL_STATUS_T hw_mmal_port_pool_ref_fill(hw_mmal_port_pool_ref_t * const ppr);
++static inline void hw_mmal_port_pool_ref_acquire(hw_mmal_port_pool_ref_t * const ppr)
++{
++    atomic_fetch_add(&ppr->refs, 1);
++}
++MMAL_STATUS_T hw_mmal_opaque_output(vlc_object_t * const obj,
++                                    hw_mmal_port_pool_ref_t ** pppr,
++                                    MMAL_PORT_T * const port,
++                                    const unsigned int extra_buffers, MMAL_PORT_BH_CB_T callback);
++
++static inline int hw_mmal_pic_has_sub_bufs(picture_t * const pic)
++{
++    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic->context;
++    return ctx->buf_count > 1;
++}
++
++static inline void hw_mmal_pic_sub_buf_add(picture_t * const pic, MMAL_BUFFER_HEADER_T * const sub)
++{
++    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic->context;
++
++    if (ctx->buf_count >= CTX_BUFS_MAX) {
++        mmal_buffer_header_release(sub);
++        return;
++    }
++
++    ctx->bufs[ctx->buf_count++] = sub;
++}
++
++static inline MMAL_BUFFER_HEADER_T * hw_mmal_pic_sub_buf_get(picture_t * const pic, const unsigned int n)
++{
++    pic_ctx_mmal_t * const ctx = (pic_ctx_mmal_t *)pic->context;
++
++    return n + 1 > ctx->buf_count ? NULL : ctx->bufs[n + 1];
++}
++
++static inline bool hw_mmal_chroma_is_mmal(const vlc_fourcc_t chroma)
++{
++    return
++        chroma == VLC_CODEC_MMAL_OPAQUE ||
++        chroma == VLC_CODEC_MMAL_ZC_SAND8 ||
++        chroma == VLC_CODEC_MMAL_ZC_SAND10 ||
++        chroma == VLC_CODEC_MMAL_ZC_SAND30 ||
++        chroma == VLC_CODEC_MMAL_ZC_I420 ||
++        chroma == VLC_CODEC_MMAL_ZC_RGB32;
++}
++
++static inline bool hw_mmal_pic_is_mmal(const picture_t * const pic)
++{
++    return hw_mmal_chroma_is_mmal(pic->format.i_chroma);
++}
++
++picture_context_t * hw_mmal_pic_ctx_copy(picture_context_t * pic_ctx_cmn);
++void hw_mmal_pic_ctx_destroy(picture_context_t * pic_ctx_cmn);
++picture_context_t * hw_mmal_gen_context(
++    MMAL_BUFFER_HEADER_T * buf, hw_mmal_port_pool_ref_t * const ppr);
++
++int hw_mmal_get_gpu_mem(void);
++
++
++static inline MMAL_STATUS_T port_parameter_set_uint32(MMAL_PORT_T * port, uint32_t id, uint32_t val)
++{
++    const MMAL_PARAMETER_UINT32_T param = {
++        .hdr = {.id = id, .size = sizeof(MMAL_PARAMETER_UINT32_T)},
++        .value = val
++    };
++    return mmal_port_parameter_set(port, &param.hdr);
++}
++
++static inline MMAL_STATUS_T port_parameter_set_bool(MMAL_PORT_T * const port, const uint32_t id, const bool val)
++{
++    const MMAL_PARAMETER_BOOLEAN_T param = {
++        .hdr = {.id = id, .size = sizeof(MMAL_PARAMETER_BOOLEAN_T)},
++        .enable = val
++    };
++    return mmal_port_parameter_set(port, &param.hdr);
++}
++
++static inline MMAL_STATUS_T port_send_replicated(MMAL_PORT_T * const port, MMAL_POOL_T * const rep_pool,
++                                          MMAL_BUFFER_HEADER_T * const src_buf,
++                                          const uint64_t seq)
++{
++    MMAL_STATUS_T err;
++    MMAL_BUFFER_HEADER_T *const rep_buf = mmal_queue_wait(rep_pool->queue);
++
++    if (rep_buf == NULL)
++        return MMAL_ENOSPC;
++
++    if ((err = mmal_buffer_header_replicate(rep_buf, src_buf)) != MMAL_SUCCESS)
++        return err;
++
++    rep_buf->pts = seq;
++
++    if ((err = mmal_port_send_buffer(port, rep_buf)) != MMAL_SUCCESS)
++    {
++        mmal_buffer_header_release(rep_buf);
++        return err;
++    }
++
++    return MMAL_SUCCESS;
++}
++
++
++static inline void pic_to_buf_copy_props(MMAL_BUFFER_HEADER_T * const buf, const picture_t * const pic)
++{
++    if (!pic->b_progressive)
++    {
++        buf->flags |= MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED;
++        buf->type->video.flags |= MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED;
++    }
++    else
++    {
++        buf->flags &= ~MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED;
++        buf->type->video.flags &= ~MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED;
++    }
++    if (pic->b_top_field_first)
++    {
++        buf->flags |= MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST;
++        buf->type->video.flags |= MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST;
++    }
++    else
++    {
++        buf->flags &= ~MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST;
++        buf->type->video.flags &= ~MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST;
++    }
++    buf->pts = pic->date != VLC_TICK_INVALID ? pic->date : MMAL_TIME_UNKNOWN;
++    buf->dts = buf->pts;
++}
++
++static inline void buf_to_pic_copy_props(picture_t * const pic, const MMAL_BUFFER_HEADER_T * const buf)
++{
++    // Contrary to docn the interlace & tff flags turn up in the header flags rather than the
++    // video specific flags (which appear to be currently unused).
++    pic->b_progressive = (buf->flags & MMAL_BUFFER_HEADER_VIDEO_FLAG_INTERLACED) == 0;
++    pic->b_top_field_first = (buf->flags & MMAL_BUFFER_HEADER_VIDEO_FLAG_TOP_FIELD_FIRST) != 0;
++
++    pic->date = buf->pts != MMAL_TIME_UNKNOWN ? buf->pts :
++        buf->dts != MMAL_TIME_UNKNOWN ? buf->dts :
++            VLC_TICK_INVALID;
++}
++
++MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_copied(const picture_t *const pic,
++                                              MMAL_POOL_T * const rep_pool,
++                                              MMAL_PORT_T * const port,
++                                              cma_buf_pool_t * const cbp);
++
++MMAL_BUFFER_HEADER_T * hw_mmal_pic_buf_replicated(const picture_t *const pic, MMAL_POOL_T * const rep_pool);
++
++struct vzc_pool_ctl_s;
++typedef struct vzc_pool_ctl_s vzc_pool_ctl_t;
++
++// At the moment we cope with any mono-planar RGBA thing
++// We could cope with many other things but they currently don't occur
++extern const vlc_fourcc_t hw_mmal_vzc_subpicture_chromas[];
++static inline bool hw_mmal_vzc_subpic_fmt_valid(const video_frame_format_t * const vf_vlc)
++{
++    const vlc_fourcc_t vfcc_src = vf_vlc->i_chroma;
++    for (const vlc_fourcc_t * p = hw_mmal_vzc_subpicture_chromas; *p != 0; ++p)
++        if (*p == vfcc_src)
++            return true;
++
++    return false;
++}
++
++bool hw_mmal_vzc_buf_set_format(MMAL_BUFFER_HEADER_T * const buf, MMAL_ES_FORMAT_T * const es_fmt);
++MMAL_DISPLAYREGION_T * hw_mmal_vzc_buf_region(MMAL_BUFFER_HEADER_T * const buf);
++void hw_mmal_vzc_buf_scale_dest_rect(MMAL_BUFFER_HEADER_T * const buf, const MMAL_RECT_T * const scale_rect, const MMAL_DISPLAYTRANSFORM_T scale_transform);
++void hw_mmal_vzc_buf_get_wh(MMAL_BUFFER_HEADER_T * const buf, int * const pW, int * const pH);
++unsigned int hw_mmal_vzc_buf_seq(MMAL_BUFFER_HEADER_T * const buf);
++MMAL_BUFFER_HEADER_T * hw_mmal_vzc_buf_from_pic(vzc_pool_ctl_t * const pc, picture_t * const pic,
++                                                const MMAL_RECT_T dst_pic_rect,
++                                                const int x_offset, const int y_offset,
++                                                const unsigned int alpha, const bool is_first);
++void hw_mmal_vzc_buf_frame_size(MMAL_BUFFER_HEADER_T * const buf,
++                                uint32_t * const pWidth, uint32_t * const pHeight);
++
++void hw_mmal_vzc_pool_flush(vzc_pool_ctl_t * const pc);
++void hw_mmal_vzc_pool_release(vzc_pool_ctl_t * const pc);
++void hw_mmal_vzc_pool_ref(vzc_pool_ctl_t * const pc);
++vzc_pool_ctl_t * hw_mmal_vzc_pool_new(void);
++
++
++static inline MMAL_RECT_T vis_mmal_rect(const video_format_t * const fmt)
++{
++    return (MMAL_RECT_T){
++        .x      = fmt->i_x_offset,
++        .y      = fmt->i_y_offset,
++        .width  = fmt->i_visible_width,
++        .height = fmt->i_visible_height
++    };
++}
++
++int cma_pic_set_data(picture_t * const pic,
++                    const MMAL_ES_FORMAT_T * const mm_esfmt,
++                    const MMAL_BUFFER_HEADER_T * const buf);
++
++// Attaches cma buf to pic
++// Marks in_flight if not all_in_flight anyway
++int cma_buf_pic_attach(cma_buf_t * const cb, picture_t * const pic);
++// Returns a pointer to the cma_buf attached to the pic
++// Just a pointer - doesn't add a ref
++cma_buf_t * cma_buf_pic_get(picture_t * const pic);
++
++static inline bool is_cma_buf_pic_chroma(const uint32_t chroma)
++{
++    return chroma == VLC_CODEC_MMAL_ZC_RGB32 ||
++        chroma == VLC_CODEC_MMAL_ZC_SAND8 ||
++        chroma == VLC_CODEC_MMAL_ZC_SAND10 ||
++        chroma == VLC_CODEC_MMAL_ZC_SAND30 ||
++        chroma == VLC_CODEC_MMAL_ZC_I420;
++}
++
++
++int rpi_get_model_type(void);
++bool rpi_is_model_pi4(void);
++bool rpi_is_fkms_active(void);
++
++typedef enum vcsm_init_type_e {
++    VCSM_INIT_NONE = 0,
++    VCSM_INIT_LEGACY,
++    VCSM_INIT_CMA
++} vcsm_init_type_t;
++
++vcsm_init_type_t cma_vcsm_init(void);
++void cma_vcsm_exit(const vcsm_init_type_t init_mode);
++vcsm_init_type_t cma_vcsm_type(void);
++const char * cma_vcsm_init_str(const vcsm_init_type_t init_mode);
++
+
+-    MMAL_BUFFER_HEADER_T *buffer;
+-    bool displayed;
+-};
++#define VOUT_DISPLAY_CHANGE_MMAL_BASE 1024
++#define VOUT_DISPLAY_CHANGE_MMAL_HIDE (VOUT_DISPLAY_CHANGE_MMAL_BASE + 0)
+
+-int mmal_picture_lock(picture_t *picture);
++#define MMAL_COMPONENT_DEFAULT_RESIZER "vc.ril.resize"
++#define MMAL_COMPONENT_ISP_RESIZER     "vc.ril.isp"
++#define MMAL_COMPONENT_HVS             "vc.ril.hvs"
+
+ #endif
+--- /dev/null
++++ b/modules/hw/mmal/rpi_prof.h
+@@ -0,0 +1,110 @@
++#ifndef RPI_PROFILE_H
++#define RPI_PROFILE_H
++
++#include <stdint.h>
++#include <inttypes.h>
++
++#ifndef RPI_PROFILE
++#define RPI_PROFILE 0
++#endif
++
++#if RPI_PROFILE
++
++#include "v7_pmu.h"
++
++#ifdef RPI_PROC_ALLOC
++#define X volatile
++#define Z =0
++#else
++#define X extern volatile
++#define Z
++#endif
++
++X uint64_t av_rpi_prof0_cycles Z;
++X unsigned int av_rpi_prof0_cnt Z;
++#define RPI_prof0_MAX_DURATION 100000
++
++X uint64_t av_rpi_prof1_cycles Z;
++X unsigned int av_rpi_prof1_cnt Z;
++#define RPI_prof1_MAX_DURATION 100000
++
++X uint64_t av_rpi_prof2_cycles Z;
++X unsigned int av_rpi_prof2_cnt Z;
++#define RPI_prof2_MAX_DURATION 10000
++
++X uint64_t av_rpi_prof_n_cycles[128];
++X unsigned int av_rpi_prof_n_cnt[128];
++#define RPI_prof_n_MAX_DURATION 10000
++
++
++#undef X
++#undef Z
++
++#define PROFILE_INIT()\
++do {\
++    enable_pmu();\
++    enable_ccnt();\
++} while (0)
++
++#define PROFILE_START()\
++do {\
++    volatile uint32_t perf_1 = read_ccnt();\
++    volatile uint32_t perf_2
++
++
++#define PROFILE_ACC(x)\
++    perf_2 = read_ccnt();\
++    {\
++        const uint32_t duration = perf_2 - perf_1;\
++        if (duration < RPI_##x##_MAX_DURATION)\
++        {\
++            av_rpi_##x##_cycles += duration;\
++            av_rpi_##x##_cnt += 1;\
++        }\
++    }\
++} while(0)
++
++
++#define PROFILE_ACC_N(n)\
++    if ((n) >= 0) {\
++        perf_2 = read_ccnt();\
++        {\
++            const uint32_t duration = perf_2 - perf_1;\
++            if (duration < RPI_prof_n_MAX_DURATION)\
++            {\
++                av_rpi_prof_n_cycles[n] += duration;\
++                av_rpi_prof_n_cnt[n] += 1;\
++            }\
++        }\
++    }\
++} while(0)
++
++#define PROFILE_PRINTF(x)\
++    printf("%-20s cycles=%14" PRIu64 ";  cnt=%8u;  avg=%5" PRIu64 "\n", #x, av_rpi_##x##_cycles, av_rpi_##x##_cnt,\
++        av_rpi_##x##_cnt == 0 ? (uint64_t)0 : av_rpi_##x##_cycles / (uint64_t)av_rpi_##x##_cnt)
++
++#define PROFILE_PRINTF_N(n)\
++    printf("prof[%d] cycles=%14" PRIu64 ";  cnt=%8u;  avg=%5" PRIu64 "\n", (n), av_rpi_prof_n_cycles[n], av_rpi_prof_n_cnt[n],\
++        av_rpi_prof_n_cnt[n] == 0 ? (uint64_t)0 : av_rpi_prof_n_cycles[n] / (uint64_t)av_rpi_prof_n_cnt[n])
++
++#define PROFILE_CLEAR_N(n) \
++do {\
++    av_rpi_prof_n_cycles[n] = 0;\
++    av_rpi_prof_n_cnt[n] = 0;\
++} while(0)
++
++#else
++
++// No profile
++#define PROFILE_INIT()
++#define PROFILE_START()
++#define PROFILE_ACC(x)
++#define PROFILE_ACC_N(x)
++#define PROFILE_PRINTF(x)
++#define PROFILE_PRINTF_N(x)
++#define PROFILE_CLEAR_N(n)
++
++#endif
++
++#endif
++
+--- /dev/null
++++ b/modules/hw/mmal/subpic.c
+@@ -0,0 +1,257 @@
++/*****************************************************************************
++ * mmal.c: MMAL-based decoder plugin for Raspberry Pi
++ *****************************************************************************
++ * Authors: jc@kynesim.co.uk
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU Lesser General Public License as published by
++ * the Free Software Foundation; either version 2.1 of the License, or
++ * (at your option) any later version.
++ *
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * GNU Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public License
++ * along with this program; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
++ *****************************************************************************/
++
++#ifdef HAVE_CONFIG_H
++#include "config.h"
++#endif
++
++#include <stdatomic.h>
++
++#include <vlc_common.h>
++#include <vlc_plugin.h>
++#include <vlc_codec.h>
++#include <vlc_filter.h>
++#include <vlc_threads.h>
++
++#include <bcm_host.h>
++#include <interface/mmal/mmal.h>
++#include <interface/mmal/util/mmal_util.h>
++#include <interface/mmal/util/mmal_default_components.h>
++
++#include "mmal_picture.h"
++#include "subpic.h"
++
++
++#define TRACE_ALL 0
++
++static inline bool cmp_rect(const MMAL_RECT_T * const a, const MMAL_RECT_T * const b)
++{
++    return a->x == b->x && a->y == b->y && a->width == b->width && a->height == b->height;
++}
++
++void hw_mmal_subpic_flush(vlc_object_t * const p_filter, subpic_reg_stash_t * const sub)
++{
++    VLC_UNUSED(p_filter);
++    if (sub->port != NULL && sub->port->is_enabled)
++        mmal_port_disable(sub->port);
++    sub->seq = 0;
++}
++
++void hw_mmal_subpic_close(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe)
++{
++    hw_mmal_subpic_flush(p_filter, spe);
++
++    if (spe->pool != NULL)
++        mmal_pool_destroy(spe->pool);
++
++    // Zap to avoid any accidental reuse
++    *spe = (subpic_reg_stash_t){NULL};
++}
++
++MMAL_STATUS_T hw_mmal_subpic_open(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe, MMAL_PORT_T * const port,
++                                  const int display_id, const unsigned int layer)
++{
++    MMAL_STATUS_T err;
++
++    // Start by zapping all to zero
++    *spe = (subpic_reg_stash_t){NULL};
++
++    if ((err = port_parameter_set_bool(port, MMAL_PARAMETER_ZERO_COPY, true)) != MMAL_SUCCESS)
++    {
++        msg_Err(p_filter, "Failed to set sub port zero copy");
++        return err;
++    }
++
++    if ((spe->pool = mmal_pool_create(30, 0)) == NULL)
++    {
++        msg_Err(p_filter, "Failed to create sub pool");
++        return MMAL_ENOMEM;
++    }
++
++    port->userdata = (void *)p_filter;
++    spe->port = port;
++    spe->display_id = display_id;
++    spe->layer = layer;
++
++    return MMAL_SUCCESS;
++}
++
++static void conv_subpic_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
++{
++#if TRACE_ALL
++    msg_Dbg((filter_t *)port->userdata, "<<< %s cmd=%d, user=%p, buf=%p, flags=%#x, len=%d/%d, pts=%lld",
++            __func__, buf->cmd, buf->user_data, buf, buf->flags, buf->length, buf->alloc_size, (long long)buf->pts);
++#else
++    VLC_UNUSED(port);
++#endif
++
++    mmal_buffer_header_release(buf);  // Will extract & release pic in pool callback
++}
++
++static int
++subpic_send_empty(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe, const uint64_t pts)
++{
++    MMAL_BUFFER_HEADER_T *const buf = mmal_queue_wait(spe->pool->queue);
++    MMAL_STATUS_T err;
++
++    if (buf == NULL) {
++        msg_Err(p_filter, "Buffer get for subpic failed");
++        return -1;
++    }
++#if TRACE_ALL
++    msg_Dbg(p_filter, "Remove pic for sub %d", spe->seq);
++#endif
++    buf->cmd = 0;
++    buf->data = NULL;
++    buf->alloc_size = 0;
++    buf->offset = 0;
++    buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
++    buf->pts = pts;
++    buf->dts = MMAL_TIME_UNKNOWN;
++    buf->user_data = NULL;
++
++    if ((err = mmal_port_send_buffer(spe->port, buf)) != MMAL_SUCCESS)
++    {
++        msg_Err(p_filter, "Send buffer to subput failed");
++        mmal_buffer_header_release(buf);
++        return -1;
++    }
++    return 0;
++}
++
++// < 0 Error
++//   0 Done & stop
++//   1 Done & continue
++
++int hw_mmal_subpic_update(vlc_object_t * const p_filter,
++    MMAL_BUFFER_HEADER_T * const sub_buf,
++    subpic_reg_stash_t * const spe,
++    const video_format_t * const fmt,
++    const MMAL_RECT_T * const scale_out,
++    const MMAL_DISPLAYTRANSFORM_T transform_out,
++    const uint64_t pts)
++{
++    MMAL_STATUS_T err;
++
++    if (sub_buf == NULL)
++    {
++        if (spe->port->is_enabled && spe->seq != 0)
++        {
++            subpic_send_empty(p_filter, spe, pts);
++            spe->seq = 0;
++        }
++    }
++    else
++    {
++        const unsigned int seq = hw_mmal_vzc_buf_seq(sub_buf);
++        bool needs_update = (spe->seq != seq);
++
++        hw_mmal_vzc_buf_scale_dest_rect(sub_buf, scale_out, transform_out);
++
++        if (hw_mmal_vzc_buf_set_format(sub_buf, spe->port->format))
++        {
++            MMAL_DISPLAYREGION_T * const dreg = hw_mmal_vzc_buf_region(sub_buf);
++            MMAL_VIDEO_FORMAT_T *const v_fmt = &spe->port->format->es->video;
++
++            v_fmt->frame_rate.den = fmt->i_frame_rate_base;
++            v_fmt->frame_rate.num = fmt->i_frame_rate;
++            v_fmt->par.den        = fmt->i_sar_den;
++            v_fmt->par.num        = fmt->i_sar_num;
++            v_fmt->color_space = MMAL_COLOR_SPACE_UNKNOWN;
++
++            if (needs_update || dreg->alpha != spe->alpha || !cmp_rect(&dreg->dest_rect, &spe->dest_rect)) {
++
++                spe->alpha = dreg->alpha;
++                spe->dest_rect = dreg->dest_rect;
++                needs_update = true;
++
++                if (spe->display_id >= 0)
++                {
++                    dreg->display_num = spe->display_id;
++                    dreg->set |= MMAL_DISPLAY_SET_NUM;
++                }
++                dreg->layer = spe->layer;
++                dreg->set |= MMAL_DISPLAY_SET_LAYER;
++
++#if TRACE_ALL
++                msg_Dbg(p_filter, "%s: Update region: Set=%x, dest=%dx%d @ (%d,%d), src=%dx%d @ (%d,%d), layer=%d, alpha=%#x",
++                        __func__, dreg->set,
++                        dreg->dest_rect.width, dreg->dest_rect.height, dreg->dest_rect.x, dreg->dest_rect.y,
++                        dreg->src_rect.width, dreg->src_rect.height, dreg->src_rect.x, dreg->src_rect.y,
++                        dreg->layer, dreg->alpha);
++#endif
++
++                // If now completely offscreen just flush this & return
++                // We only do -ve as (a) that is easy and (b) it seems to be
++                // something that can confuse mmal
++                if (dreg->dest_rect.y + dreg->dest_rect.height <= 0 ||
++                    dreg->dest_rect.x + dreg->dest_rect.width <= 0)
++                {
++                    if (spe->port->is_enabled)
++                        subpic_send_empty(p_filter, spe, pts);
++                    spe->seq = seq;
++                    return 1;
++                }
++
++                if ((err = mmal_port_parameter_set(spe->port, &dreg->hdr)) != MMAL_SUCCESS)
++                {
++                    msg_Err(p_filter, "Set display region on subput failed");
++                    return -1;
++                }
++
++                if ((err = mmal_port_format_commit(spe->port)) != MMAL_SUCCESS)
++                {
++                    msg_Dbg(p_filter, "%s: Subpic commit fail: %d", __func__, err);
++                    return -1;
++                }
++            }
++        }
++
++        if (!spe->port->is_enabled)
++        {
++            spe->port->buffer_num = 30;
++            spe->port->buffer_size = spe->port->buffer_size_recommended;  // Not used but shuts up the error checking
++
++            if ((err = mmal_port_enable(spe->port, conv_subpic_cb)) != MMAL_SUCCESS)
++            {
++                msg_Dbg(p_filter, "%s: Subpic enable fail: %d", __func__, err);
++                return -1;
++            }
++        }
++
++        if (needs_update)
++        {
++#if TRACE_ALL
++            msg_Dbg(p_filter, "Update pic for sub %d", spe->seq);
++#endif
++            if ((err = port_send_replicated(spe->port, spe->pool, sub_buf, pts)) != MMAL_SUCCESS)
++            {
++                msg_Err(p_filter, "Send buffer to subput failed");
++                return -1;
++            }
++
++            spe->seq = seq;
++        }
++    }
++    return 1;
++}
++
++
++
+--- /dev/null
++++ b/modules/hw/mmal/subpic.h
+@@ -0,0 +1,33 @@
++#ifndef VLC_HW_MMAL_SUBPIC_H_
++#define VLC_HW_MMAL_SUBPIC_H_
++
++typedef struct subpic_reg_stash_s
++{
++    MMAL_PORT_T * port;
++    MMAL_POOL_T * pool;
++    int display_id;  // -1 => do not set
++    unsigned int layer;
++    // Shadow  vars so we can tell if stuff has changed
++    MMAL_RECT_T dest_rect;
++    unsigned int alpha;
++    unsigned int seq;
++} subpic_reg_stash_t;
++
++int hw_mmal_subpic_update(vlc_object_t * const p_filter,
++                          MMAL_BUFFER_HEADER_T * const sub_buf,
++                          subpic_reg_stash_t * const spe,
++                          const video_format_t * const fmt,
++                          const MMAL_RECT_T * const scale_out,
++                          const MMAL_DISPLAYTRANSFORM_T transform_out,
++                          const uint64_t pts);
++
++void hw_mmal_subpic_flush(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe);
++
++void hw_mmal_subpic_close(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe);
++
++// If display id is -1 it will be unset
++MMAL_STATUS_T hw_mmal_subpic_open(vlc_object_t * const p_filter, subpic_reg_stash_t * const spe, MMAL_PORT_T * const port,
++                                  const int display_id, const unsigned int layer);
++
++#endif
++
+--- /dev/null
++++ b/modules/hw/mmal/transform_ops.h
+@@ -0,0 +1,99 @@
++#ifndef VLC_MMAL_TRANSFORM_OPS_H
++#define VLC_MMAL_TRANSFORM_OPS_H
++
++#include <vlc_common.h>
++#include <vlc_picture.h>
++#include <interface/mmal/mmal.h>
++
++
++// These are enums with the same order so simply coerce
++static inline MMAL_DISPLAYTRANSFORM_T vlc_to_mmal_transform(const video_orientation_t orientation){
++    return (MMAL_DISPLAYTRANSFORM_T)orientation;
++}
++
++// MMAL headers comment these (getting 2 a bit wrong) but do not give
++// defines
++#define XFORM_H_SHIFT 0  // Hflip
++#define XFORM_V_SHIFT 1  // Vflip
++#define XFORM_T_SHIFT 2  // Transpose
++#define XFORM_H_BIT   (1 << XFORM_H_SHIFT)
++#define XFORM_V_BIT   (1 << XFORM_V_SHIFT)
++#define XFORM_T_BIT   (1 << XFORM_T_SHIFT)
++
++static inline bool
++is_transform_transpose(const MMAL_DISPLAYTRANSFORM_T t)
++{
++    return ((unsigned int)t & XFORM_T_BIT) != 0;
++}
++
++static inline bool
++is_transform_hflip(const MMAL_DISPLAYTRANSFORM_T t)
++{
++    return ((unsigned int)t & XFORM_H_BIT) != 0;
++}
++
++static inline bool
++is_transform_vflip(const MMAL_DISPLAYTRANSFORM_T t)
++{
++    return ((unsigned int)t & XFORM_V_BIT) != 0;
++}
++
++static inline MMAL_DISPLAYTRANSFORM_T
++swap_transform_hv(const MMAL_DISPLAYTRANSFORM_T x)
++{
++    return (((x >> XFORM_H_SHIFT) & 1) << XFORM_V_SHIFT) |
++           (((x >> XFORM_V_SHIFT) & 1) << XFORM_H_SHIFT) |
++           (x & XFORM_T_BIT);
++}
++
++static inline MMAL_DISPLAYTRANSFORM_T
++transform_inverse(const MMAL_DISPLAYTRANSFORM_T x)
++{
++    return is_transform_transpose(x) ? swap_transform_hv(x) : x;
++}
++
++// Transform generated by A then B
++// All ops are self inverse so can simply be XORed on their own
++// H & V flips after a transpose need to be swapped
++static inline MMAL_DISPLAYTRANSFORM_T
++combine_transform(const MMAL_DISPLAYTRANSFORM_T a, const MMAL_DISPLAYTRANSFORM_T b)
++{
++    return a ^ (is_transform_transpose(a) ? swap_transform_hv(b) : b);
++}
++
++static inline MMAL_RECT_T
++rect_transpose(const MMAL_RECT_T s)
++{
++    return (MMAL_RECT_T){
++        .x      = s.y,
++        .y      = s.x,
++        .width  = s.height,
++        .height = s.width
++    };
++}
++
++// hflip s in c
++static inline MMAL_RECT_T rect_hflip(const MMAL_RECT_T s, const MMAL_RECT_T c)
++{
++    return (MMAL_RECT_T){
++        .x = c.x + (c.x + c.width) - (s.x + s.width),
++        .y = s.y,
++        .width = s.width,
++        .height = s.height
++    };
++}
++
++// vflip s in c
++static inline MMAL_RECT_T rect_vflip(const MMAL_RECT_T s, const MMAL_RECT_T c)
++{
++    return (MMAL_RECT_T){
++        .x = s.x,
++        .y = (c.y + c.height) - (s.y - c.y) - s.height,
++        .width = s.width,
++        .height = s.height
++    };
++}
++
++
++#endif
++
+--- /dev/null
++++ b/modules/hw/mmal/v7_pmu.S
+@@ -0,0 +1,263 @@
++/*------------------------------------------------------------
++Performance Monitor Block
++------------------------------------------------------------*/
++    .arm  @ Make sure we are in ARM mode.
++    .text
++    .align 2
++    .global getPMN @ export this function for the linker
++
++/*  Returns the number of progammable counters uint32_t getPMN(void) */
++
++getPMN:
++  MRC     p15, 0, r0, c9, c12, 0 /* Read PMNC Register	*/
++  MOV     r0, r0, LSR #11        /* Shift N field down to bit 0	*/
++  AND     r0, r0, #0x1F          /* Mask to leave just the 5 N bits	*/
++  BX      lr
++
++
++
++    .global pmn_config @ export this function for the linker
++  /* Sets the event for a programmable counter to record	*/
++  /* void pmn_config(unsigned counter, uint32_t event)	*/
++  /* counter = r0 = Which counter to program  (e.g. 0 for PMN0, 1 for PMN1 */
++  /* event   = r1 = The event code	*/
++pmn_config:
++  AND     r0, r0, #0x1F          /* Mask to leave only bits 4:0	*/
++  MCR     p15, 0, r0, c9, c12, 5 /* Write PMNXSEL Register	*/
++  MCR     p15, 0, r1, c9, c13, 1 /* Write EVTSELx Register	*/
++  BX      lr
++
++
++
++    .global ccnt_divider @ export this function for the linker
++  /* Enables/disables the divider (1/64) on CCNT	*/
++  /* void ccnt_divider(int divider)	*/
++  /* divider = r0 = If 0 disable divider, else enable dvider	*/
++ccnt_divider:
++  MRC     p15, 0, r1, c9, c12, 0  /* Read PMNC	*/
++
++  CMP     r0, #0x0                /* IF (r0 == 0)	*/
++  BICEQ   r1, r1, #0x08           /* THEN: Clear the D bit (disables the */
++  ORRNE   r1, r1, #0x08           /* ELSE: Set the D bit (enables the di */
++
++  MCR     p15, 0, r1, c9, c12, 0  /* Write PMNC	*/
++  BX      lr
++
++
++  /* ---------------------------------------------------------------	*/
++  /* Enable/Disable	*/
++  /* ---------------------------------------------------------------	*/
++
++    .global enable_pmu @ export this function for the linker
++  /* Global PMU enable	*/
++  /* void enable_pmu(void)	*/
++enable_pmu:
++  MRC     p15, 0, r0, c9, c12, 0  /* Read PMNC	*/
++  ORR     r0, r0, #0x01           /* Set E bit	*/
++  MCR     p15, 0, r0, c9, c12, 0  /* Write PMNC	*/
++  BX      lr
++
++
++
++    .global disable_pmu @ export this function for the linker
++  /* Global PMU disable	*/
++  /* void disable_pmu(void)	*/
++disable_pmu:
++  MRC     p15, 0, r0, c9, c12, 0  /* Read PMNC	*/
++  BIC     r0, r0, #0x01           /* Clear E bit	*/
++  MCR     p15, 0, r0, c9, c12, 0  /* Write PMNC	*/
++  BX      lr
++
++
++
++    .global enable_ccnt @ export this function for the linker
++  /* Enable the CCNT	*/
++  /* void enable_ccnt(void)	*/
++enable_ccnt:
++  MOV     r0, #0x80000000         /* Set C bit	*/
++  MCR     p15, 0, r0, c9, c12, 1  /* Write CNTENS Register	*/
++  BX      lr
++
++
++
++    .global disable_ccnt @ export this function for the linker
++  /* Disable the CCNT	*/
++  /* void disable_ccnt(void)	*/
++disable_ccnt:
++  MOV     r0, #0x80000000         /* Clear C bit	*/
++  MCR     p15, 0, r0, c9, c12, 2  /* Write CNTENC Register	*/
++  BX      lr
++
++
++
++    .global enable_pmn @ export this function for the linker
++  /* Enable PMN{n}	*/
++  /* void enable_pmn(uint32_t counter)	*/
++  /* counter = r0 = The counter to enable (e.g. 0 for PMN0, 1 for PMN1)
++enable_pmn: */
++  MOV     r1, #0x1                /* Use arg (r0) to set which counter t */
++  MOV     r1, r1, LSL r0
++
++  MCR     p15, 0, r1, c9, c12, 1  /* Write CNTENS Register	*/
++  BX      lr
++
++
++
++    .global disable_pmn @ export this function for the linker
++  /* Enable PMN{n}	*/
++  /* void disable_pmn(uint32_t counter)	*/
++  /* counter = r0 = The counter to enable (e.g. 0 for PMN0, 1 for PMN1)
++disable_pmn: */
++  MOV     r1, #0x1                /* Use arg (r0) to set which counter t */
++  MOV     r1, r1, LSL r0
++
++  MCR     p15, 0, r1, c9, c12, 1  /* Write CNTENS Register	*/
++  BX      lr
++
++
++
++    .global enable_pmu_user_access @ export this function for the linker
++  /* Enables User mode access to the PMU (must be called in a priviledge */
++  /* void enable_pmu_user_access(void)	*/
++enable_pmu_user_access:
++  MRC     p15, 0, r0, c9, c14, 0  /* Read PMUSERENR Register	*/
++  ORR     r0, r0, #0x01           /* Set EN bit (bit 0)	*/
++  MCR     p15, 0, r0, c9, c14, 0  /* Write PMUSERENR Register	*/
++  BX      lr
++
++
++
++    .global disable_pmu_user_access @ export this function for the linke
++  /* Disables User mode access to the PMU (must be called in a priviledg */
++  /* void disable_pmu_user_access(void)	*/
++disable_pmu_user_access:
++  MRC     p15, 0, r0, c9, c14, 0  /* Read PMUSERENR Register	*/
++  BIC     r0, r0, #0x01           /* Clear EN bit (bit 0)	*/
++  MCR     p15, 0, r0, c9, c14, 0  /* Write PMUSERENR Register	*/
++  BX      lr
++
++
++  /* ---------------------------------------------------------------	*/
++  /* Counter read registers	*/
++  /* ---------------------------------------------------------------	*/
++
++    .global read_ccnt @ export this function for the linker
++  /* Returns the value of CCNT	*/
++  /* uint32_t read_ccnt(void)	*/
++read_ccnt:
++  MRC     p15, 0, r0, c9, c13, 0 /* Read CCNT Register	*/
++  BX      lr
++
++
++    .global read_pmn @ export this function for the linker
++  /* Returns the value of PMN{n}	*/
++  /* uint32_t read_pmn(uint32_t counter)	*/
++  /* counter = r0 =  The counter to read (e.g. 0 for PMN0, 1 for PMN1)	*
++read_pmn: */
++  AND     r0, r0, #0x1F          /* Mask to leave only bits 4:0	*/
++  MCR     p15, 0, r0, c9, c12, 5 /* Write PMNXSEL Register	*/
++  MRC     p15, 0, r0, c9, c13, 2 /* Read current PMNx Register	*/
++  BX      lr
++
++
++  /* ---------------------------------------------------------------	*/
++  /* Software Increment	*/
++  /* ---------------------------------------------------------------	*/
++
++    .global pmu_software_increment @ export this function for the linker
++	/* Writes to software increment register	*/
++	/* void pmu_software_increment(uint32_t counter)	*/
++	/* counter = r0 =  The counter to increment (e.g. 0 for PMN0, 1 for PMN
++pmu_software_increment: */
++  MOV     r1, #0x01
++  MOV			r1, r1, LSL r0
++  MCR     p15, 0, r1, c9, c12, 4 /* Write SWINCR Register	*/
++  BX      lr
++
++  /* ---------------------------------------------------------------	*/
++  /* Overflow & Interrupt Generation	*/
++  /* ---------------------------------------------------------------	*/
++
++    .global read_flags @ export this function for the linker
++  /* Returns the value of the overflow flags	*/
++  /* uint32_t read_flags(void)	*/
++read_flags:
++  MRC     p15, 0, r0, c9, c12, 3 /* Read FLAG Register	*/
++  BX      lr
++
++
++    .global write_flags @ export this function for the linker
++  /* Writes the overflow flags	*/
++  /* void write_flags(uint32_t flags)	*/
++write_flags:
++  MCR     p15, 0, r0, c9, c12, 3 /* Write FLAG Register	*/
++  BX      lr
++
++
++    .global enable_ccnt_irq @ export this function for the linker
++  /* Enables interrupt generation on overflow of the CCNT	*/
++  /* void enable_ccnt_irq(void)	*/
++enable_ccnt_irq:
++  MOV     r0, #0x80000000
++  MCR     p15, 0, r0, c9, c14, 1  /* Write INTENS Register	*/
++  BX      lr
++
++    .global disable_ccnt_irq @ export this function for the linker
++  /* Disables interrupt generation on overflow of the CCNT	*/
++  /* void disable_ccnt_irq(void)	*/
++disable_ccnt_irq:
++  MOV     r0, #0x80000000
++  MCR     p15, 0, r0, c9, c14, 2   /* Write INTENC Register	*/
++  BX      lr
++
++
++    .global enable_pmn_irq @ export this function for the linker
++  /* Enables interrupt generation on overflow of PMN{x}	*/
++  /* void enable_pmn_irq(uint32_t counter)	*/
++  /* counter = r0 =  The counter to enable the interrupt for (e.g. 0 for
++enable_pmn_irq: */
++  MOV     r1, #0x1                 /* Use arg (r0) to set which counter */
++  MOV     r0, r1, LSL r0
++  MCR     p15, 0, r0, c9, c14, 1   /* Write INTENS Register	*/
++  BX      lr
++
++    .global disable_pmn_irq @ export this function for the linker
++  /* Disables interrupt generation on overflow of PMN{x}	*/
++  /* void disable_pmn_irq(uint32_t counter)	*/
++  /* counter = r0 =  The counter to disable the interrupt for (e.g. 0 fo
++disable_pmn_irq: */
++  MOV     r1, #0x1                /* Use arg (r0) to set which counter t */
++  MOV     r0, r1, LSL r0
++  MCR     p15, 0, r0, c9, c14, 2  /* Write INTENC Register	*/
++  BX      lr
++
++  /* ---------------------------------------------------------------	*/
++  /* Reset Functions	*/
++  /* ---------------------------------------------------------------	*/
++
++    .global reset_pmn @ export this function for the linker
++  /* Resets the programmable counters	*/
++  /* void reset_pmn(void)	*/
++reset_pmn:
++  MRC     p15, 0, r0, c9, c12, 0  /* Read PMNC	*/
++  ORR     r0, r0, #0x02           /* Set P bit (Event Counter Reset)	*/
++  MCR     p15, 0, r0, c9, c12, 0  /* Write PMNC	*/
++  BX      lr
++
++
++	.global reset_ccnt @ export this function for the linker
++  /* Resets the CCNT	*/
++  /* void reset_ccnt(void)	*/
++reset_ccnt:
++  MRC     p15, 0, r0, c9, c12, 0  /* Read PMNC	*/
++  ORR     r0, r0, #0x04           /* Set C bit (Event Counter Reset)	*/
++  MCR     p15, 0, r0, c9, c12, 0  /* Write PMNC	*/
++  BX      lr
++
++
++    .end @end of code, this line is optional.
++/* ------------------------------------------------------------	*/
++/* End of v7_pmu.s	*/
++/* ------------------------------------------------------------	*/
++
++
+--- /dev/null
++++ b/modules/hw/mmal/v7_pmu.h
+@@ -0,0 +1,113 @@
++// ------------------------------------------------------------
++// PMU for Cortex-A/R (v7-A/R)
++// ------------------------------------------------------------
++
++#ifndef _V7_PMU_H
++#define _V7_PMU_H
++
++// Returns the number of progammable counters
++unsigned int getPMN(void);
++
++// Sets the event for a programmable counter to record
++// counter = r0 = Which counter to program  (e.g. 0 for PMN0, 1 for PMN1)
++// event   = r1 = The event code (from appropiate TRM or ARM Architecture Reference Manual)
++void pmn_config(unsigned int counter, unsigned int event);
++
++// Enables/disables the divider (1/64) on CCNT
++// divider = r0 = If 0 disable divider, else enable dvider
++void ccnt_divider(int divider);
++
++//
++// Enables and disables
++//
++
++// Global PMU enable
++// On ARM11 this enables the PMU, and the counters start immediately
++// On Cortex this enables the PMU, there are individual enables for the counters
++void enable_pmu(void);
++
++// Global PMU disable
++// On Cortex, this overrides the enable state of the individual counters
++void disable_pmu(void);
++
++// Enable the CCNT
++void enable_ccnt(void);
++
++// Disable the CCNT
++void disable_ccnt(void);
++
++// Enable PMN{n}
++// counter = The counter to enable (e.g. 0 for PMN0, 1 for PMN1)
++void enable_pmn(unsigned int counter);
++
++// Enable PMN{n}
++// counter = The counter to enable (e.g. 0 for PMN0, 1 for PMN1)
++void disable_pmn(unsigned int counter);
++
++//
++// Read counter values
++//
++
++// Returns the value of CCNT
++unsigned int read_ccnt(void);
++
++// Returns the value of PMN{n}
++// counter = The counter to read (e.g. 0 for PMN0, 1 for PMN1)
++unsigned int read_pmn(unsigned int counter);
++
++//
++// Overflow and interrupts
++//
++
++// Returns the value of the overflow flags
++unsigned int read_flags(void);
++
++// Writes the overflow flags
++void write_flags(unsigned int flags);
++
++// Enables interrupt generation on overflow of the CCNT
++void enable_ccnt_irq(void);
++
++// Disables interrupt generation on overflow of the CCNT
++void disable_ccnt_irq(void);
++
++// Enables interrupt generation on overflow of PMN{x}
++// counter = The counter to enable the interrupt for (e.g. 0 for PMN0, 1 for PMN1)
++void enable_pmn_irq(unsigned int counter);
++
++// Disables interrupt generation on overflow of PMN{x}
++// counter = r0 =  The counter to disable the interrupt for (e.g. 0 for PMN0, 1 for PMN1)
++void disable_pmn_irq(unsigned int counter);
++
++//
++// Counter reset functions
++//
++
++// Resets the programmable counters
++void reset_pmn(void);
++
++// Resets the CCNT
++void reset_ccnt(void);
++
++//
++// Software Increment
++
++// Writes to software increment register
++// counter = The counter to increment (e.g. 0 for PMN0, 1 for PMN1)
++void pmu_software_increment(unsigned int counter);
++
++//
++// User mode access
++//
++
++// Enables User mode access to the PMU (must be called in a priviledged mode)
++void enable_pmu_user_access(void);
++
++// Disables User mode access to the PMU (must be called in a priviledged mode)
++void disable_pmu_user_access(void);
++
++#endif
++// ------------------------------------------------------------
++// End of v7_pmu.h
++// ------------------------------------------------------------
++
+--- a/modules/hw/mmal/vout.c
++++ b/modules/hw/mmal/vout.c
+@@ -27,21 +27,28 @@
+ #endif
+
+ #include <math.h>
++#include <stdatomic.h>
+
+ #include <vlc_common.h>
+-#include <vlc_atomic.h>
+ #include <vlc_plugin.h>
+ #include <vlc_threads.h>
+ #include <vlc_vout_display.h>
++#include <vlc_modules.h>
+
+-#include "mmal_picture.h"
+-
++#pragma GCC diagnostic push
++#pragma GCC diagnostic ignored "-Wbad-function-cast"
+ #include <bcm_host.h>
++#pragma GCC diagnostic pop
+ #include <interface/mmal/mmal.h>
+ #include <interface/mmal/util/mmal_util.h>
+ #include <interface/mmal/util/mmal_default_components.h>
+ #include <interface/vmcs_host/vc_tvservice.h>
+-#include <interface/vmcs_host/vc_dispmanx.h>
++
++#include "mmal_picture.h"
++#include "subpic.h"
++#include "transform_ops.h"
++
++#define TRACE_ALL 0
+
+ #define MAX_BUFFERS_IN_TRANSIT 1
+ #define VC_TV_MAX_MODE_IDS 127
+@@ -50,10 +57,28 @@
+ #define MMAL_LAYER_TEXT N_("VideoCore layer where the video is displayed.")
+ #define MMAL_LAYER_LONGTEXT N_("VideoCore layer where the video is displayed. Subpictures are displayed directly above and a black background directly below.")
+
+-#define MMAL_BLANK_BACKGROUND_NAME "mmal-blank-background"
+-#define MMAL_BLANK_BACKGROUND_TEXT N_("Blank screen below video.")
+-#define MMAL_BLANK_BACKGROUND_LONGTEXT N_("Render blank screen below video. " \
+-        "Increases VideoCore load.")
++#define MMAL_DISPLAY_NAME "mmal-display"
++#define MMAL_DISPLAY_TEXT N_("Output device for Rpi fullscreen.")
++#define MMAL_DISPLAY_LONGTEXT N_("Output device for Rpi fullscreen. " \
++"Valid values are HDMI-1,HDMI-2.  By default if qt-fullscreen-screennumber " \
++"is specified (or set by Fullscreen Output Device in Preferences) " \
++"HDMI-<qt-fullscreen-screennumber+1> will be used, otherwise HDMI-1.")
++
++#define MMAL_VOUT_TRANSFORM_NAME "mmal-vout-transform"
++#define MMAL_VOUT_TRANSFORM_TEXT N_("Video transform for Rpi fullscreen.")
++#define MMAL_VOUT_TRANSFORM_LONGTEXT N_("Video transform for Rpi fullscreen."\
++"Transforms availible: auto, 0, 90, 180, 270, hflip, vflip, transpose, antitranspose")
++
++#define MMAL_VOUT_WINDOW_NAME "mmal-vout-window"
++#define MMAL_VOUT_WINDOW_TEXT N_("Display window for Rpi fullscreen")
++#define MMAL_VOUT_WINDOW_LONGTEXT N_("Display window for Rpi fullscreen."\
++"fullscreen|<width>x<height>+<x>+<y>")
++
++#define MMAL_VOUT_TRANSPARENT_NAME "mmal-vout-transparent"
++#define MMAL_VOUT_TRANSPARENT_TEXT N_("Enable layers beneeth the vodeo layer.")
++#define MMAL_VOUT_TRANSPARENT_LONGTEXT N_("Enable layers beneath the video layer."\
++" By default these are disabled."\
++" Having the lower layers enabled can impact video performance")
+
+ #define MMAL_ADJUST_REFRESHRATE_NAME "mmal-adjust-refreshrate"
+ #define MMAL_ADJUST_REFRESHRATE_TEXT N_("Adjust HDMI refresh rate to the video.")
+@@ -68,332 +93,628 @@
+ #define PHASE_OFFSET_TARGET ((double)0.25)
+ #define PHASE_CHECK_INTERVAL 100
+
+-static int Open(vlc_object_t *);
+-static void Close(vlc_object_t *);
+-
+-vlc_module_begin()
+-    set_shortname(N_("MMAL vout"))
+-    set_description(N_("MMAL-based vout plugin for Raspberry Pi"))
+-    set_capability("vout display", 90)
+-    add_shortcut("mmal_vout")
+-    add_integer(MMAL_LAYER_NAME, 1, MMAL_LAYER_TEXT, MMAL_LAYER_LONGTEXT, false)
+-    add_bool(MMAL_BLANK_BACKGROUND_NAME, true, MMAL_BLANK_BACKGROUND_TEXT,
+-                    MMAL_BLANK_BACKGROUND_LONGTEXT, true);
+-    add_bool(MMAL_ADJUST_REFRESHRATE_NAME, false, MMAL_ADJUST_REFRESHRATE_TEXT,
+-                    MMAL_ADJUST_REFRESHRATE_LONGTEXT, false)
+-    add_bool(MMAL_NATIVE_INTERLACED, false, MMAL_NATIVE_INTERLACE_TEXT,
+-                    MMAL_NATIVE_INTERLACE_LONGTEXT, false)
+-    set_callbacks(Open, Close)
+-vlc_module_end()
++#define SUBS_MAX 4
+
+-struct dmx_region_t {
+-    struct dmx_region_t *next;
+-    picture_t *picture;
+-    VC_RECT_T bmp_rect;
+-    VC_RECT_T src_rect;
+-    VC_RECT_T dst_rect;
+-    VC_DISPMANX_ALPHA_T alpha;
+-    DISPMANX_ELEMENT_HANDLE_T element;
+-    DISPMANX_RESOURCE_HANDLE_T resource;
+-    int32_t pos_x;
+-    int32_t pos_y;
+-};
++typedef struct vout_subpic_s {
++    MMAL_COMPONENT_T *component;
++    subpic_reg_stash_t sub;
++} vout_subpic_t;
+
+ struct vout_display_sys_t {
+-    vlc_cond_t buffer_cond;
+-    vlc_mutex_t buffer_mutex;
+     vlc_mutex_t manage_mutex;
+
+-    plane_t planes[3]; /* Depending on video format up to 3 planes are used */
+-    picture_t **pictures; /* Actual list of alloced pictures passed into picture_pool */
+-    picture_pool_t *picture_pool;
+-
++    vcsm_init_type_t init_type;
+     MMAL_COMPONENT_T *component;
+     MMAL_PORT_T *input;
+     MMAL_POOL_T *pool; /* mmal buffer headers, used for pushing pictures to component*/
+-    struct dmx_region_t *dmx_region;
+     int i_planes; /* Number of actually used planes, 1 for opaque, 3 for i420 */
+
+-    uint32_t buffer_size; /* size of actual mmal buffers */
+     int buffers_in_transit; /* number of buffers currently pushed to mmal component */
+     unsigned num_buffers; /* number of buffers allocated at mmal port */
+
+-    DISPMANX_DISPLAY_HANDLE_T dmx_handle;
+-    DISPMANX_ELEMENT_HANDLE_T bkg_element;
+-    DISPMANX_RESOURCE_HANDLE_T bkg_resource;
+-    unsigned display_width;
+-    unsigned display_height;
++    int display_id;
++    MMAL_RECT_T win_rect;       // Window rect after transform(s)
++    MMAL_RECT_T display_rect;   // Actual shape of display (x, y always 0)
++    MMAL_RECT_T req_win;        // User requested window (w=0 => fullscreen)
++
++    MMAL_RECT_T spu_rect;       // Output rectangle in cfg coords (for subpic placement)
++    MMAL_RECT_T dest_rect;      // Output rectangle in display coords
++    MMAL_DISPLAYTRANSFORM_T dest_transform;      // Dest window coord transform
++    MMAL_DISPLAYTRANSFORM_T display_transform;  // "Native" display transform
++    MMAL_DISPLAYTRANSFORM_T video_transform;     // Combined config+native transform
+
+-    int i_frame_rate_base; /* cached framerate to detect changes for rate adjustment */
+-    int i_frame_rate;
++    unsigned int i_frame_rate_base; /* cached framerate to detect changes for rate adjustment */
++    unsigned int i_frame_rate;
+
+     int next_phase_check; /* lowpass for phase check frequency */
+     int phase_offset; /* currently applied offset to presentation time in ns */
+     int layer; /* the dispman layer (z-index) used for video rendering */
++    bool transparent;           // Do not disable layers beneath ours
+
+     bool need_configure_display; /* indicates a required display reconfigure to main thread */
+     bool adjust_refresh_rate;
+     bool native_interlaced;
+     bool b_top_field_first; /* cached interlaced settings to detect changes for native mode */
+     bool b_progressive;
+-    bool opaque; /* indicated use of opaque picture format (zerocopy) */
+-};
++    bool force_config;
+
+-static const vlc_fourcc_t subpicture_chromas[] = {
+-    VLC_CODEC_RGBA,
+-    0
+-};
++    vout_subpic_t subs[SUBS_MAX];
++    // Stash for subpics derived from the passed subpicture rather than
++    // included with the main pic
++    MMAL_BUFFER_HEADER_T * subpic_bufs[SUBS_MAX];
++
++    picture_pool_t * pic_pool;
++
++    struct vout_isp_conf_s {
++        MMAL_COMPONENT_T *component;
++        MMAL_PORT_T * input;
++        MMAL_PORT_T * output;
++        MMAL_QUEUE_T * out_q;
++        MMAL_POOL_T * in_pool;
++        MMAL_POOL_T * out_pool;
++        bool pending;
++    } isp;
+
+-/* Utility functions */
+-static inline uint32_t align(uint32_t x, uint32_t y);
+-static int configure_display(vout_display_t *vd, const vout_display_cfg_t *cfg,
+-                const video_format_t *fmt);
++    MMAL_POOL_T * copy_pool;
++    MMAL_BUFFER_HEADER_T * copy_buf;
+
+-/* VLC vout display callbacks */
+-static picture_pool_t *vd_pool(vout_display_t *vd, unsigned count);
+-static void vd_prepare(vout_display_t *vd, picture_t *picture,
+-                subpicture_t *subpicture);
+-static void vd_display(vout_display_t *vd, picture_t *picture,
+-                subpicture_t *subpicture);
+-static int vd_control(vout_display_t *vd, int query, va_list args);
+-static void vd_manage(vout_display_t *vd);
+-
+-/* MMAL callbacks */
+-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
+-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer);
++    // Subpic blend if we have to do it here
++    vzc_pool_ctl_t * vzc;
++};
+
+-/* TV service */
+-static int query_resolution(vout_display_t *vd, unsigned *width, unsigned *height);
+-static void tvservice_cb(void *callback_data, uint32_t reason, uint32_t param1,
+-                uint32_t param2);
+-static void adjust_refresh_rate(vout_display_t *vd, const video_format_t *fmt);
+-static int set_latency_target(vout_display_t *vd, bool enable);
+
+-/* DispManX */
+-static void display_subpicture(vout_display_t *vd, subpicture_t *subpicture);
+-static void close_dmx(vout_display_t *vd);
+-static struct dmx_region_t *dmx_region_new(vout_display_t *vd,
+-                DISPMANX_UPDATE_HANDLE_T update, subpicture_region_t *region);
+-static void dmx_region_update(struct dmx_region_t *dmx_region,
+-                DISPMANX_UPDATE_HANDLE_T update, picture_t *picture);
+-static void dmx_region_delete(struct dmx_region_t *dmx_region,
+-                DISPMANX_UPDATE_HANDLE_T update);
+-static void show_background(vout_display_t *vd, bool enable);
+-static void maintain_phase_sync(vout_display_t *vd);
++// ISP setup
+
+-static int Open(vlc_object_t *object)
++static inline bool want_isp(const vout_display_t * const vd)
+ {
+-    vout_display_t *vd = (vout_display_t *)object;
+-    vout_display_sys_t *sys;
+-    uint32_t buffer_pitch, buffer_height;
+-    vout_display_place_t place;
+-    MMAL_DISPLAYREGION_T display_region;
+-    MMAL_STATUS_T status;
+-    int ret = VLC_SUCCESS;
+-    unsigned i;
++    return (vd->fmt.i_chroma == VLC_CODEC_MMAL_ZC_SAND10);
++}
+
+-    if (vout_display_IsWindowed(vd))
+-        return VLC_EGENERIC;
++static inline bool want_copy(const vout_display_t * const vd)
++{
++    return (vd->fmt.i_chroma == VLC_CODEC_I420 || vd->fmt.i_chroma == VLC_CODEC_I420_10L);
++}
+
+-    sys = calloc(1, sizeof(struct vout_display_sys_t));
+-    if (!sys)
+-        return VLC_ENOMEM;
+-    vd->sys = sys;
++static inline vlc_fourcc_t req_chroma(const vout_display_t * const vd)
++{
++    return !hw_mmal_chroma_is_mmal(vd->fmt.i_chroma) && !want_copy(vd) ?
++        VLC_CODEC_I420 :
++        vd->fmt.i_chroma;
++}
+
+-    sys->layer = var_InheritInteger(vd, MMAL_LAYER_NAME);
+-    bcm_host_init();
++static MMAL_FOURCC_T vout_vlc_to_mmal_pic_fourcc(const unsigned int fcc)
++{
++    switch (fcc){
++    case VLC_CODEC_MMAL_OPAQUE:
++        return MMAL_ENCODING_OPAQUE;
++    case VLC_CODEC_MMAL_ZC_SAND8:
++        return MMAL_ENCODING_YUVUV128;
++    case VLC_CODEC_MMAL_ZC_SAND10:
++        return MMAL_ENCODING_YUVUV64_10;
++    case VLC_CODEC_MMAL_ZC_SAND30:
++        return MMAL_ENCODING_YUV10_COL;
++    case VLC_CODEC_MMAL_ZC_I420:
++    case VLC_CODEC_I420:
++        return MMAL_ENCODING_I420;
++    default:
++        break;
++    }
++    return MMAL_ENCODING_I420;
++}
+
+-    sys->opaque = vd->fmt.i_chroma == VLC_CODEC_MMAL_OPAQUE;
++static void display_set_format(const vout_display_t * const vd, MMAL_ES_FORMAT_T *const es_fmt, const bool is_intermediate)
++{
++    const unsigned int w = is_intermediate ? vd->fmt.i_visible_width  : vd->fmt.i_width ;
++    const unsigned int h = is_intermediate ? vd->fmt.i_visible_height : vd->fmt.i_height;
++    MMAL_VIDEO_FORMAT_T * const v_fmt = &es_fmt->es->video;
+
+-    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &sys->component);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(vd, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
+-                        MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++    es_fmt->type = MMAL_ES_TYPE_VIDEO;
++    es_fmt->encoding = is_intermediate ? MMAL_ENCODING_I420 : vout_vlc_to_mmal_pic_fourcc(vd->fmt.i_chroma);
++    es_fmt->encoding_variant = 0;
++
++    v_fmt->width  = (w + 31) & ~31;
++    v_fmt->height = (h + 15) & ~15;
++    v_fmt->crop.x = 0;
++    v_fmt->crop.y = 0;
++    v_fmt->crop.width = w;
++    v_fmt->crop.height = h;
++    if (vd->fmt.i_sar_num == 0 || vd->fmt.i_sar_den == 0) {
++        v_fmt->par.num        = 1;
++        v_fmt->par.den        = 1;
++    } else {
++        v_fmt->par.num        = vd->fmt.i_sar_num;
++        v_fmt->par.den        = vd->fmt.i_sar_den;
+     }
++    v_fmt->frame_rate.num = vd->fmt.i_frame_rate;
++    v_fmt->frame_rate.den = vd->fmt.i_frame_rate_base;
++    v_fmt->color_space    = vlc_to_mmal_color_space(vd->fmt.space);
+
+-    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
+-    status = mmal_port_enable(sys->component->control, control_port_cb);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(vd, "Failed to enable control port %s (status=%"PRIx32" %s)",
+-                        sys->component->control->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++    msg_Dbg(vd, "WxH: %dx%d, Crop: %dx%d", v_fmt->width, v_fmt->height, v_fmt->crop.width, v_fmt->crop.height);
++}
++
++static MMAL_RECT_T
++display_src_rect(const vout_display_t * const vd, const video_format_t * const src)
++{
++    const bool wants_isp = want_isp(vd);
++
++    // Scale source derived cropping to actual picture shape
++    return (MMAL_RECT_T){
++        .x = wants_isp ? 0 : src->i_x_offset * vd->fmt.i_width / src->i_width,
++        .y = wants_isp ? 0 : src->i_y_offset * vd->fmt.i_height / src->i_height,
++        .width  = src->i_visible_width  * vd->fmt.i_width / src->i_width,
++        .height = src->i_visible_height * vd->fmt.i_height / src->i_height
++    };
++}
++
++static void isp_input_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
++{
++#if TRACE_ALL
++    vout_display_t * const vd = (vout_display_t *)port->userdata;
++    pic_ctx_mmal_t * ctx = buf->user_data;
++    msg_Dbg(vd, "<<< %s: cmd=%d, ctx=%p, buf=%p, flags=%#x, pts=%lld", __func__, buf->cmd, ctx, buf,
++            buf->flags, (long long)buf->pts);
++#else
++    VLC_UNUSED(port);
++#endif
++
++    mmal_buffer_header_release(buf);
++
++#if TRACE_ALL
++    msg_Dbg(vd, ">>> %s", __func__);
++#endif
++}
++
++static void isp_control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++{
++    vout_display_t *vd = (vout_display_t *)port->userdata;
++    MMAL_STATUS_T status;
++
++    if (buffer->cmd == MMAL_EVENT_ERROR) {
++        status = *(uint32_t *)buffer->data;
++        msg_Err(vd, "MMAL error %"PRIx32" \"%s\"", status, mmal_status_to_string(status));
+     }
+
+-    sys->input = sys->component->input[0];
+-    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
++    mmal_buffer_header_release(buffer);
++}
+
+-    if (sys->opaque) {
+-        sys->input->format->encoding = MMAL_ENCODING_OPAQUE;
+-        sys->i_planes = 1;
+-        sys->buffer_size = sys->input->buffer_size_recommended;
+-    } else {
+-        sys->input->format->encoding = MMAL_ENCODING_I420;
+-        vd->fmt.i_chroma = VLC_CODEC_I420;
+-        buffer_pitch = align(vd->fmt.i_width, 32);
+-        buffer_height = align(vd->fmt.i_height, 16);
+-        sys->i_planes = 3;
+-        sys->buffer_size = 3 * buffer_pitch * buffer_height / 2;
+-    }
+-
+-    sys->input->format->es->video.width = vd->fmt.i_width;
+-    sys->input->format->es->video.height = vd->fmt.i_height;
+-    sys->input->format->es->video.crop.x = 0;
+-    sys->input->format->es->video.crop.y = 0;
+-    sys->input->format->es->video.crop.width = vd->fmt.i_width;
+-    sys->input->format->es->video.crop.height = vd->fmt.i_height;
+-    sys->input->format->es->video.par.num = vd->source.i_sar_num;
+-    sys->input->format->es->video.par.den = vd->source.i_sar_den;
++static void isp_output_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
++{
++    if (buf->cmd == 0 && buf->length != 0)
++    {
++        // The filter structure etc. should always exist if we have contents
++        // but might not on later flushes as we shut down
++        vout_display_t * const vd = (vout_display_t *)port->userdata;
++        struct vout_isp_conf_s *const isp = &vd->sys->isp;
+
+-    status = mmal_port_format_commit(sys->input);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(vd, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
+-                        sys->input->name, status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++#if TRACE_ALL
++        msg_Dbg(vd, "<<< %s: cmd=%d; flags=%#x, pts=%lld", __func__, buf->cmd, buf->flags, (long long) buf->pts);
++#endif
++        mmal_queue_put(isp->out_q, buf);
++#if TRACE_ALL
++        msg_Dbg(vd, ">>> %s: out Q len=%d", __func__, mmal_queue_length(isp->out_q));
++#endif
+     }
+-    sys->input->buffer_size = sys->input->buffer_size_recommended;
++    else
++    {
++        mmal_buffer_header_reset(buf);
++        mmal_buffer_header_release(buf);
++    }
++}
+
+-    vout_display_PlacePicture(&place, &vd->source, vd->cfg, false);
+-    display_region.hdr.id = MMAL_PARAMETER_DISPLAYREGION;
+-    display_region.hdr.size = sizeof(MMAL_DISPLAYREGION_T);
+-    display_region.fullscreen = MMAL_FALSE;
+-    display_region.src_rect.x = vd->fmt.i_x_offset;
+-    display_region.src_rect.y = vd->fmt.i_y_offset;
+-    display_region.src_rect.width = vd->fmt.i_visible_width;
+-    display_region.src_rect.height = vd->fmt.i_visible_height;
+-    display_region.dest_rect.x = place.x;
+-    display_region.dest_rect.y = place.y;
+-    display_region.dest_rect.width = place.width;
+-    display_region.dest_rect.height = place.height;
+-    display_region.layer = sys->layer;
+-    display_region.set = MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_SRC_RECT |
+-            MMAL_DISPLAY_SET_DEST_RECT | MMAL_DISPLAY_SET_LAYER;
+-    status = mmal_port_parameter_set(sys->input, &display_region.hdr);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(vd, "Failed to set display region (status=%"PRIx32" %s)",
+-                        status, mmal_status_to_string(status));
+-        ret = VLC_EGENERIC;
+-        goto out;
++static void isp_empty_out_q(struct vout_isp_conf_s * const isp)
++{
++    MMAL_BUFFER_HEADER_T * buf;
++    // We can be called as part of error recovery so allow for missing Q
++    if (isp->out_q == NULL)
++        return;
++
++    while ((buf = mmal_queue_get(isp->out_q)) != NULL)
++        mmal_buffer_header_release(buf);
++}
++
++static void isp_flush(struct vout_isp_conf_s * const isp)
++{
++    if (!isp->input->is_enabled)
++        mmal_port_disable(isp->input);
++
++    if (isp->output->is_enabled)
++        mmal_port_disable(isp->output);
++
++    isp_empty_out_q(isp);
++    isp->pending = false;
++}
++
++static MMAL_STATUS_T isp_prepare(vout_display_t * const vd, struct vout_isp_conf_s * const isp)
++{
++    MMAL_STATUS_T err;
++    MMAL_BUFFER_HEADER_T * buf;
++
++    if (!isp->output->is_enabled) {
++        if ((err = mmal_port_enable(isp->output, isp_output_cb)) != MMAL_SUCCESS)
++        {
++            msg_Err(vd, "ISP output port enable failed");
++            return err;
++        }
+     }
+
+-    for (i = 0; i < sys->i_planes; ++i) {
+-        sys->planes[i].i_lines = buffer_height;
+-        sys->planes[i].i_pitch = buffer_pitch;
+-        sys->planes[i].i_visible_lines = vd->fmt.i_visible_height;
+-        sys->planes[i].i_visible_pitch = vd->fmt.i_visible_width;
++    while ((buf = mmal_queue_get(isp->out_pool->queue)) != NULL) {
++        if ((err = mmal_port_send_buffer(isp->output, buf)) != MMAL_SUCCESS)
++        {
++            msg_Err(vd, "ISP output port stuff failed");
++            return err;
++        }
++    }
+
+-        if (i > 0) {
+-            sys->planes[i].i_lines /= 2;
+-            sys->planes[i].i_pitch /= 2;
+-            sys->planes[i].i_visible_lines /= 2;
+-            sys->planes[i].i_visible_pitch /= 2;
++    if (!isp->input->is_enabled) {
++        if ((err = mmal_port_enable(isp->input, isp_input_cb)) != MMAL_SUCCESS)
++        {
++            msg_Err(vd, "ISP input port enable failed");
++            return err;
+         }
+     }
++    return MMAL_SUCCESS;
++}
+
+-    vlc_mutex_init(&sys->buffer_mutex);
+-    vlc_cond_init(&sys->buffer_cond);
+-    vlc_mutex_init(&sys->manage_mutex);
++static void isp_close(vout_display_t * const vd, vout_display_sys_t * const vd_sys)
++{
++    struct vout_isp_conf_s * const isp = &vd_sys->isp;
++    VLC_UNUSED(vd);
+
+-    vd->pool = vd_pool;
+-    vd->prepare = vd_prepare;
+-    vd->display = vd_display;
+-    vd->control = vd_control;
+-    vd->manage = vd_manage;
++    if (isp->component == NULL)
++        return;
+
+-    vc_tv_register_callback(tvservice_cb, vd);
++    isp_flush(isp);
+
+-    if (query_resolution(vd, &sys->display_width, &sys->display_height) >= 0) {
+-        vout_display_SendEventDisplaySize(vd, sys->display_width, sys->display_height);
+-    } else {
+-        sys->display_width = vd->cfg->display.width;
+-        sys->display_height = vd->cfg->display.height;
++    if (isp->component->control->is_enabled)
++        mmal_port_disable(isp->component->control);
++
++    if (isp->out_q != NULL) {
++        // 1st junk anything lying around
++        isp_empty_out_q(isp);
++
++        mmal_queue_destroy(isp->out_q);
++        isp->out_q = NULL;
+     }
+
+-    sys->dmx_handle = vc_dispmanx_display_open(0);
+-    vd->info.subpicture_chromas = subpicture_chromas;
++    if (isp->out_pool != NULL) {
++        mmal_port_pool_destroy(isp->output, isp->out_pool);
++        isp->out_pool = NULL;
++    }
+
+-    vout_display_DeleteWindow(vd, NULL);
++    isp->input = NULL;
++    isp->output = NULL;
+
+-out:
+-    if (ret != VLC_SUCCESS)
+-        Close(object);
++    mmal_component_release(isp->component);
++    isp->component = NULL;
+
+-    return ret;
++    return;
+ }
+
+-static void Close(vlc_object_t *object)
++// Restuff into output rather than return to pool is we can
++static MMAL_BOOL_T isp_out_pool_cb(MMAL_POOL_T *pool, MMAL_BUFFER_HEADER_T *buffer, void *userdata)
+ {
+-    vout_display_t *vd = (vout_display_t *)object;
+-    vout_display_sys_t *sys = vd->sys;
+-    char response[20]; /* answer is hvs_update_fields=%1d */
+-    unsigned i;
++    struct vout_isp_conf_s * const isp = userdata;
++    VLC_UNUSED(pool);
++    if (isp->output->is_enabled) {
++        mmal_buffer_header_reset(buffer);
++        if (mmal_port_send_buffer(isp->output, buffer) == MMAL_SUCCESS)
++            return MMAL_FALSE;
++    }
++    return MMAL_TRUE;
++}
+
+-    vc_tv_unregister_callback_full(tvservice_cb, vd);
++static MMAL_STATUS_T isp_setup(vout_display_t * const vd, vout_display_sys_t * const vd_sys)
++{
++    struct vout_isp_conf_s * const isp = &vd_sys->isp;
++    MMAL_STATUS_T err;
+
+-    if (sys->dmx_handle)
+-        close_dmx(vd);
++    if ((err = mmal_component_create(MMAL_COMPONENT_ISP_RESIZER, &isp->component)) != MMAL_SUCCESS) {
++        msg_Err(vd, "Cannot create ISP component");
++        return err;
++    }
++    isp->input = isp->component->input[0];
++    isp->output = isp->component->output[0];
+
+-    if (sys->component && sys->component->control->is_enabled)
+-        mmal_port_disable(sys->component->control);
++    isp->component->control->userdata = (void *)vd;
++    if ((err = mmal_port_enable(isp->component->control, isp_control_port_cb)) != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to enable ISP control port");
++        goto fail;
++    }
+
+-    if (sys->input && sys->input->is_enabled)
+-        mmal_port_disable(sys->input);
++    isp->input->userdata = (void *)vd;
++    display_set_format(vd, isp->input->format, false);
+
+-    if (sys->component && sys->component->is_enabled)
+-        mmal_component_disable(sys->component);
++    if ((err = port_parameter_set_bool(isp->input, MMAL_PARAMETER_ZERO_COPY, true)) != MMAL_SUCCESS)
++        goto fail;
+
+-    if (sys->pool)
+-        mmal_port_pool_destroy(sys->input, sys->pool);
++    if ((err = mmal_port_format_commit(isp->input)) != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to set ISP input format");
++        goto fail;
++    }
+
+-    if (sys->component)
+-        mmal_component_release(sys->component);
++    isp->input->buffer_size = isp->input->buffer_size_recommended;
++    isp->input->buffer_num = 30;
+
+-    if (sys->picture_pool)
+-        picture_pool_Release(sys->picture_pool);
+-    else
+-        for (i = 0; i < sys->num_buffers; ++i)
+-            if (sys->pictures[i]) {
+-                mmal_buffer_header_release(sys->pictures[i]->p_sys->buffer);
+-                picture_Release(sys->pictures[i]);
+-            }
++    if ((isp->in_pool = mmal_pool_create(isp->input->buffer_num, 0)) == NULL)
++    {
++        msg_Err(vd, "Failed to create input pool");
++        goto fail;
++    }
+
+-    vlc_mutex_destroy(&sys->buffer_mutex);
+-    vlc_cond_destroy(&sys->buffer_cond);
+-    vlc_mutex_destroy(&sys->manage_mutex);
++    if ((isp->out_q = mmal_queue_create()) == NULL)
++    {
++        err = MMAL_ENOMEM;
++        goto fail;
++    }
+
+-    if (sys->native_interlaced) {
+-        if (vc_gencmd(response, sizeof(response), "hvs_update_fields 0") < 0 ||
+-                response[18] != '0')
+-            msg_Warn(vd, "Could not reset hvs field mode");
++    display_set_format(vd, isp->output->format, true);
++
++    if ((err = port_parameter_set_bool(isp->output, MMAL_PARAMETER_ZERO_COPY, true)) != MMAL_SUCCESS)
++        goto fail;
++
++    if ((err = mmal_port_format_commit(isp->output)) != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to set ISP input format");
++        goto fail;
+     }
+
+-    free(sys->pictures);
+-    free(sys);
++    isp->output->buffer_size = isp->output->buffer_size_recommended;
++    isp->output->buffer_num = 2;
++    isp->output->userdata = (void *)vd;
++
++    if ((isp->out_pool = mmal_port_pool_create(isp->output, isp->output->buffer_num, isp->output->buffer_size)) == NULL)
++    {
++        msg_Err(vd, "Failed to make ISP port pool");
++        goto fail;
++    }
++
++    mmal_pool_callback_set(isp->out_pool, isp_out_pool_cb, isp);
++
++    if ((err = isp_prepare(vd, isp)) != MMAL_SUCCESS)
++        goto fail;
++
++    return MMAL_SUCCESS;
+
+-    bcm_host_deinit();
++fail:
++    isp_close(vd, vd_sys);
++    return err;
+ }
+
+-static inline uint32_t align(uint32_t x, uint32_t y) {
+-    uint32_t mod = x % y;
+-    if (mod == 0)
+-        return x;
++static MMAL_STATUS_T isp_check(vout_display_t * const vd, vout_display_sys_t * const vd_sys)
++{
++    struct vout_isp_conf_s *const isp = &vd_sys->isp;
++    const bool has_isp = (isp->component != NULL);
++    const bool wants_isp = want_isp(vd);
++
++    if (has_isp == wants_isp)
++    {
++        // All OK - do nothing
++    }
++    else if (has_isp)
++    {
++        // ISP active but we don't want it
++        isp_flush(isp);
++
++        // Check we have everything back and then kill it
++        if (mmal_queue_length(isp->out_pool->queue) == isp->output->buffer_num)
++            isp_close(vd, vd_sys);
++    }
+     else
+-        return x + y - mod;
++    {
++        // ISP closed but we want it
++        return isp_setup(vd, vd_sys);
++    }
++
++    return MMAL_SUCCESS;
++}
++
++/* TV service */
++static void tvservice_cb(void *callback_data, uint32_t reason, uint32_t param1,
++                uint32_t param2);
++static void adjust_refresh_rate(vout_display_t *vd, const video_format_t *fmt);
++static int set_latency_target(vout_display_t *vd, bool enable);
++
++// Mmal
++static void maintain_phase_sync(vout_display_t *vd);
++
++
++
++static void vd_input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buf)
++{
++#if TRACE_ALL
++    vout_display_t * const vd = (vout_display_t *)port->userdata;
++    pic_ctx_mmal_t * ctx = buf->user_data;
++    msg_Dbg(vd, "<<< %s: cmd=%d, ctx=%p, buf=%p, flags=%#x, pts=%lld", __func__, buf->cmd, ctx, buf,
++            buf->flags, (long long)buf->pts);
++#else
++    VLC_UNUSED(port);
++#endif
++
++    mmal_buffer_header_release(buf);
++
++#if TRACE_ALL
++    msg_Dbg(vd, ">>> %s", __func__);
++#endif
++}
++
++static int query_resolution(vout_display_t *vd, const int display_id, unsigned *width, unsigned *height)
++{
++    TV_DISPLAY_STATE_T display_state = {0};
++    int ret = 0;
++
++    if (vc_tv_get_display_state_id(display_id, &display_state) == 0) {
++        msg_Dbg(vd, "State=%#x", display_state.state);
++        if (display_state.state & 0xFF) {
++            msg_Dbg(vd, "HDMI: %dx%d", display_state.display.hdmi.width, display_state.display.hdmi.height);
++            *width = display_state.display.hdmi.width;
++            *height = display_state.display.hdmi.height;
++        } else if (display_state.state & 0xFF00) {
++            msg_Dbg(vd, "SDTV: %dx%d", display_state.display.sdtv.width, display_state.display.sdtv.height);
++            *width = display_state.display.sdtv.width;
++            *height = display_state.display.sdtv.height;
++        } else {
++            msg_Warn(vd, "Invalid display state %"PRIx32, display_state.state);
++            ret = -1;
++        }
++    } else {
++        msg_Warn(vd, "Failed to query display resolution");
++        ret = -1;
++    }
++
++    return ret;
++}
++
++static inline MMAL_RECT_T
++place_to_mmal_rect(const vout_display_place_t place)
++{
++    return (MMAL_RECT_T){
++        .x      = place.x,
++        .y      = place.y,
++        .width  = place.width,
++        .height = place.height
++    };
++}
++
++static MMAL_RECT_T
++place_out(const vout_display_cfg_t * cfg,
++          const video_format_t * fmt,
++          const MMAL_RECT_T r)
++{
++    video_format_t tfmt;
++    vout_display_cfg_t tcfg;
++    vout_display_place_t place;
++
++    // Fix SAR if unknown
++    if (fmt->i_sar_den == 0 || fmt->i_sar_num == 0) {
++        tfmt = *fmt;
++        tfmt.i_sar_den = 1;
++        tfmt.i_sar_num = 1;
++        fmt = &tfmt;
++    }
++
++    // Override what VLC thinks might be going on with display size
++    // if we know better
++    if (r.width != 0 && r.height != 0)
++    {
++        tcfg = *cfg;
++        tcfg.display.width = r.width;
++        tcfg.display.height = r.height;
++        cfg = &tcfg;
++    }
++
++    vout_display_PlacePicture(&place, fmt, cfg, false);
++
++    place.x += r.x;
++    place.y += r.y;
++
++    return place_to_mmal_rect(place);
++}
++
++static MMAL_RECT_T
++rect_transform(MMAL_RECT_T s, const MMAL_RECT_T c, const MMAL_DISPLAYTRANSFORM_T t)
++{
++    if (is_transform_transpose(t))
++        s = rect_transpose(s);
++    if (is_transform_hflip(t))
++        s = rect_hflip(s, c);
++    if (is_transform_vflip(t) != 0)
++        s = rect_vflip(s, c);
++    return s;
++}
++
++static void
++place_dest_rect(vout_display_t * const vd,
++          const vout_display_cfg_t * const cfg,
++          const video_format_t * fmt)
++{
++    vout_display_sys_t * const sys = vd->sys;
++    sys->dest_rect = rect_transform(place_out(cfg, fmt, sys->win_rect),
++                                    sys->display_rect, sys->dest_transform);
++}
++
++static void
++place_spu_rect(vout_display_t * const vd,
++          const vout_display_cfg_t * const cfg,
++          const video_format_t * fmt)
++{
++    vout_display_sys_t * const sys = vd->sys;
++    static const MMAL_RECT_T r0 = {0};
++
++    sys->spu_rect = place_out(cfg, fmt, r0);
++    sys->spu_rect.x = 0;
++    sys->spu_rect.y = 0;
++
++    // Copy place override logic for spu pos from video_output.c
++    // This info doesn't appear to reside anywhere natively
++
++    if (fmt->i_width * fmt->i_height >= (unsigned int)(sys->spu_rect.width * sys->spu_rect.height)) {
++        sys->spu_rect.width  = fmt->i_visible_width;
++        sys->spu_rect.height = fmt->i_visible_height;
++    }
++
++    if (ORIENT_IS_SWAP(fmt->orientation))
++        sys->spu_rect = rect_transpose(sys->spu_rect);
++}
++
++static void
++place_rects(vout_display_t * const vd,
++          const vout_display_cfg_t * const cfg,
++          const video_format_t * fmt)
++{
++    place_dest_rect(vd, cfg, fmt);
++    place_spu_rect(vd, cfg, fmt);
++}
++
++static int
++set_input_region(vout_display_t * const vd, const video_format_t * const fmt)
++{
++    const vout_display_sys_t * const sys = vd->sys;
++    MMAL_DISPLAYREGION_T display_region = {
++        .hdr = {
++            .id = MMAL_PARAMETER_DISPLAYREGION,
++            .size = sizeof(MMAL_DISPLAYREGION_T)
++        },
++        .display_num = sys->display_id,
++        .fullscreen = MMAL_FALSE,
++        .transform = sys->video_transform,
++        .dest_rect = sys->dest_rect,
++        .src_rect = display_src_rect(vd, fmt),
++        .noaspect = MMAL_TRUE,
++        .mode = MMAL_DISPLAY_MODE_FILL,
++        .layer = sys->layer,
++        .alpha = 0xff | (sys->transparent ? 0 : (1 << 29)),
++        .set =
++            MMAL_DISPLAY_SET_NUM |
++            MMAL_DISPLAY_SET_FULLSCREEN |
++            MMAL_DISPLAY_SET_TRANSFORM |
++            MMAL_DISPLAY_SET_DEST_RECT |
++            MMAL_DISPLAY_SET_SRC_RECT |
++            MMAL_DISPLAY_SET_NOASPECT |
++            MMAL_DISPLAY_SET_MODE |
++            MMAL_DISPLAY_SET_LAYER |
++            MMAL_DISPLAY_SET_ALPHA
++    };
++    MMAL_STATUS_T status = mmal_port_parameter_set(sys->input, &display_region.hdr);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to set display region (status=%"PRIx32" %s)",
++                        status, mmal_status_to_string(status));
++        return -EINVAL;
++    }
++    return 0;
+ }
+
+ static int configure_display(vout_display_t *vd, const vout_display_cfg_t *cfg,
+                 const video_format_t *fmt)
+ {
+-    vout_display_sys_t *sys = vd->sys;
+-    vout_display_place_t place;
+-    MMAL_DISPLAYREGION_T display_region;
++    vout_display_sys_t * const sys = vd->sys;
+     MMAL_STATUS_T status;
+
+     if (!cfg && !fmt)
++    {
++        msg_Err(vd, "%s: Missing cfg & fmt", __func__);
+         return -EINVAL;
++    }
++
++    isp_check(vd, sys);
+
+     if (fmt) {
+         sys->input->format->es->video.par.num = fmt->i_sar_num;
+@@ -412,30 +733,14 @@ static int configure_display(vout_displa
+     if (!cfg)
+         cfg = vd->cfg;
+
+-    vout_display_PlacePicture(&place, fmt, cfg, false);
++    sys->video_transform = combine_transform(
++        vlc_to_mmal_transform(fmt->orientation), sys->display_transform);
+
+-    display_region.hdr.id = MMAL_PARAMETER_DISPLAYREGION;
+-    display_region.hdr.size = sizeof(MMAL_DISPLAYREGION_T);
+-    display_region.fullscreen = MMAL_FALSE;
+-    display_region.src_rect.x = fmt->i_x_offset;
+-    display_region.src_rect.y = fmt->i_y_offset;
+-    display_region.src_rect.width = fmt->i_visible_width;
+-    display_region.src_rect.height = fmt->i_visible_height;
+-    display_region.dest_rect.x = place.x;
+-    display_region.dest_rect.y = place.y;
+-    display_region.dest_rect.width = place.width;
+-    display_region.dest_rect.height = place.height;
+-    display_region.layer = sys->layer;
+-    display_region.set = MMAL_DISPLAY_SET_FULLSCREEN | MMAL_DISPLAY_SET_SRC_RECT |
+-            MMAL_DISPLAY_SET_DEST_RECT | MMAL_DISPLAY_SET_LAYER;
+-    status = mmal_port_parameter_set(sys->input, &display_region.hdr);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(vd, "Failed to set display region (status=%"PRIx32" %s)",
+-                        status, mmal_status_to_string(status));
++    place_rects(vd, cfg, fmt);
++
++    if (set_input_region(vd, fmt) != 0)
+         return -EINVAL;
+-    }
+
+-    show_background(vd, var_InheritBool(vd, MMAL_BLANK_BACKGROUND_NAME));
+     sys->adjust_refresh_rate = var_InheritBool(vd, MMAL_ADJUST_REFRESHRATE_NAME);
+     sys->native_interlaced = var_InheritBool(vd, MMAL_NATIVE_INTERLACED);
+     if (sys->adjust_refresh_rate) {
+@@ -446,204 +751,217 @@ static int configure_display(vout_displa
+     return 0;
+ }
+
++static void kill_pool(vout_display_sys_t * const sys)
++{
++    if (sys->pic_pool != NULL) {
++        picture_pool_Release(sys->pic_pool);
++        sys->pic_pool = NULL;
++    }
++}
++
++// Actual picture pool for MMAL opaques is just a set of trivial containers
+ static picture_pool_t *vd_pool(vout_display_t *vd, unsigned count)
+ {
+-    vout_display_sys_t *sys = vd->sys;
+-    picture_resource_t picture_res;
+-    picture_pool_configuration_t picture_pool_cfg;
+-    video_format_t fmt = vd->fmt;
+-    MMAL_STATUS_T status;
+-    unsigned i;
++    vout_display_sys_t * const sys = vd->sys;
+
+-    if (sys->picture_pool) {
+-        if (sys->num_buffers < count)
+-            msg_Warn(vd, "Picture pool with %u pictures requested, but we already have one with %u pictures",
+-                            count, sys->num_buffers);
++    msg_Dbg(vd, "%s: fmt:%dx%d,sar:%d/%d; source:%dx%d", __func__,
++            vd->fmt.i_width, vd->fmt.i_height, vd->fmt.i_sar_num, vd->fmt.i_sar_den, vd->source.i_width, vd->source.i_height);
+
+-        goto out;
++    if (sys->pic_pool == NULL) {
++        sys->pic_pool = picture_pool_NewFromFormat(&vd->fmt, count);
+     }
++    return sys->pic_pool;
++}
+
+-    if (sys->opaque) {
+-        if (count <= NUM_ACTUAL_OPAQUE_BUFFERS)
+-            count = NUM_ACTUAL_OPAQUE_BUFFERS;
++static inline bool
++check_shape(vout_display_t * const vd, const picture_t * const p_pic)
++{
++    if (vd->fmt.i_width == p_pic->format.i_width &&
++        vd->fmt.i_height == p_pic->format.i_height)
++        return true;
++    return false;
++}
+
+-        MMAL_PARAMETER_BOOLEAN_T zero_copy = {
+-            { MMAL_PARAMETER_ZERO_COPY, sizeof(MMAL_PARAMETER_BOOLEAN_T) },
+-            1
+-        };
++static void vd_display(vout_display_t *vd, picture_t *p_pic,
++                subpicture_t *subpicture)
++{
++    vout_display_sys_t * const sys = vd->sys;
++    MMAL_STATUS_T err;
+
+-        status = mmal_port_parameter_set(sys->input, &zero_copy.hdr);
+-        if (status != MMAL_SUCCESS) {
+-           msg_Err(vd, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
+-                    sys->input->name, status, mmal_status_to_string(status));
+-           goto out;
+-        }
++#if TRACE_ALL
++    {
++        char dbuf0[5];
++        msg_Dbg(vd, "<<< %s: %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d -> %dx%d@%d,%d", __func__,
++                str_fourcc(dbuf0, p_pic->format.i_chroma), p_pic->format.i_width, p_pic->format.i_height,
++                p_pic->format.i_x_offset, p_pic->format.i_y_offset,
++                p_pic->format.i_visible_width, p_pic->format.i_visible_height,
++                p_pic->format.i_sar_num, p_pic->format.i_sar_den,
++                sys->dest_rect.width, sys->dest_rect.height, sys->dest_rect.x, sys->dest_rect.y);
+     }
+-
+-    if (count < sys->input->buffer_num_recommended)
+-        count = sys->input->buffer_num_recommended;
+-
+-#ifndef NDEBUG
+-    msg_Dbg(vd, "Creating picture pool with %u pictures", count);
+ #endif
+
+-    sys->input->buffer_num = count;
+-    status = mmal_port_enable(sys->input, input_port_cb);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(vd, "Failed to enable input port %s (status=%"PRIx32" %s)",
+-                        sys->input->name, status, mmal_status_to_string(status));
+-        goto out;
++    // If we had subpics then we have attached them to the main pic in prepare
++    // so all we have to do here is delete the refs
++    if (subpicture != NULL) {
++        subpicture_Delete(subpicture);
+     }
+
+-    status = mmal_component_enable(sys->component);
+-    if (status != MMAL_SUCCESS) {
+-        msg_Err(vd, "Failed to enable component %s (status=%"PRIx32" %s)",
+-                        sys->component->name, status, mmal_status_to_string(status));
+-        goto out;
++    if (!check_shape(vd, p_pic))
++    {
++        msg_Err(vd, "Pic/fmt shape mismatch");
++        goto fail;
++    }
++
++    if (!sys->input->is_enabled &&
++        (err = mmal_port_enable(sys->input, vd_input_port_cb)) != MMAL_SUCCESS)
++    {
++        msg_Err(vd, "Input port enable failed");
++        goto fail;
++    }
++    // Stuff into input
++    // We assume the BH is already set up with values reflecting pic date etc.
++    if (sys->copy_buf != NULL) {
++        MMAL_BUFFER_HEADER_T *const buf = sys->copy_buf;
++        sys->copy_buf = NULL;
++#if TRACE_ALL
++        msg_Dbg(vd, "--- %s: Copy stuff", __func__);
++#endif
++        if (mmal_port_send_buffer(sys->input, buf) != MMAL_SUCCESS)
++        {
++            mmal_buffer_header_release(buf);
++            msg_Err(vd, "Send copy buffer to render input failed");
++            goto fail;
++        }
+     }
+-
+-    sys->num_buffers = count;
+-    sys->pool = mmal_port_pool_create(sys->input, sys->num_buffers,
+-            sys->input->buffer_size);
+-    if (!sys->pool) {
+-        msg_Err(vd, "Failed to create MMAL pool for %u buffers of size %"PRIu32,
+-                        count, sys->input->buffer_size);
+-        goto out;
++    else if (sys->isp.pending) {
++        MMAL_BUFFER_HEADER_T *const buf = mmal_queue_wait(sys->isp.out_q);
++        sys->isp.pending = false;
++#if TRACE_ALL
++        msg_Dbg(vd, "--- %s: ISP stuff", __func__);
++#endif
++        if (mmal_port_send_buffer(sys->input, buf) != MMAL_SUCCESS)
++        {
++            mmal_buffer_header_release(buf);
++            msg_Err(vd, "Send ISP buffer to render input failed");
++            goto fail;
++        }
+     }
+-
+-    memset(&picture_res, 0, sizeof(picture_resource_t));
+-    sys->pictures = calloc(sys->num_buffers, sizeof(picture_t *));
+-    for (i = 0; i < sys->num_buffers; ++i) {
+-        picture_res.p_sys = calloc(1, sizeof(picture_sys_t));
+-        picture_res.p_sys->owner = (vlc_object_t *)vd;
+-        picture_res.p_sys->buffer = mmal_queue_get(sys->pool->queue);
+-
+-        sys->pictures[i] = picture_NewFromResource(&fmt, &picture_res);
+-        if (!sys->pictures[i]) {
+-            msg_Err(vd, "Failed to create picture");
+-            free(picture_res.p_sys);
+-            goto out;
++    else
++    {
++        MMAL_BUFFER_HEADER_T *const pic_buf = hw_mmal_pic_buf_replicated(p_pic, sys->pool);
++        if (pic_buf == NULL)
++        {
++            msg_Err(vd, "Replicated buffer get fail");
++            goto fail;
+         }
+
+-        sys->pictures[i]->i_planes = sys->i_planes;
+-        memcpy(sys->pictures[i]->p, sys->planes, sys->i_planes * sizeof(plane_t));
+-    }
+
+-    memset(&picture_pool_cfg, 0, sizeof(picture_pool_configuration_t));
+-    picture_pool_cfg.picture_count = sys->num_buffers;
+-    picture_pool_cfg.picture = sys->pictures;
+-    picture_pool_cfg.lock = mmal_picture_lock;
++        // If dimensions have chnaged then fix that
++        if (hw_mmal_vlc_pic_to_mmal_fmt_update(sys->input->format, p_pic))
++        {
++            msg_Dbg(vd, "Reset port format");
++
++            // HVS can deal with on-line dimension changes
++            if (mmal_port_format_commit(sys->input) != MMAL_SUCCESS)
++                msg_Warn(vd, "Input format commit failed");
++        }
+
+-    sys->picture_pool = picture_pool_NewExtended(&picture_pool_cfg);
+-    if (!sys->picture_pool) {
+-        msg_Err(vd, "Failed to create picture pool");
+-        goto out;
++        if ((err = mmal_port_send_buffer(sys->input, pic_buf)) != MMAL_SUCCESS)
++        {
++            mmal_buffer_header_release(pic_buf);
++            msg_Err(vd, "Send buffer to input failed");
++            goto fail;
++        }
+     }
+
+-out:
+-    return sys->picture_pool;
+-}
+-
+-static void vd_prepare(vout_display_t *vd, picture_t *picture,
+-                subpicture_t *subpicture)
+-{
+-    vout_display_sys_t *sys = vd->sys;
+-    picture_sys_t *pic_sys = picture->p_sys;
+-
+-    if (!sys->adjust_refresh_rate || pic_sys->displayed)
+-        return;
+-
+-    /* Apply the required phase_offset to the picture, so that vd_display()
+-     * will be called at the corrected time from the core */
+-    picture->date += sys->phase_offset;
+-}
+-
+-static void vd_display(vout_display_t *vd, picture_t *picture,
+-                subpicture_t *subpicture)
+-{
+-    vout_display_sys_t *sys = vd->sys;
+-    picture_sys_t *pic_sys = picture->p_sys;
+-    MMAL_BUFFER_HEADER_T *buffer = pic_sys->buffer;
+-    MMAL_STATUS_T status;
+-
+-    if (picture->format.i_frame_rate != sys->i_frame_rate ||
+-        picture->format.i_frame_rate_base != sys->i_frame_rate_base ||
+-        picture->b_progressive != sys->b_progressive ||
+-        picture->b_top_field_first != sys->b_top_field_first) {
+-        sys->b_top_field_first = picture->b_top_field_first;
+-        sys->b_progressive = picture->b_progressive;
+-        sys->i_frame_rate = picture->format.i_frame_rate;
+-        sys->i_frame_rate_base = picture->format.i_frame_rate_base;
+-        configure_display(vd, NULL, &picture->format);
+-    }
+-
+-    if (!pic_sys->displayed || !sys->opaque) {
+-        buffer->cmd = 0;
+-        buffer->length = sys->input->buffer_size;
+-        buffer->user_data = picture;
+-
+-        status = mmal_port_send_buffer(sys->input, buffer);
+-        if (status == MMAL_SUCCESS)
+-            atomic_fetch_add(&sys->buffers_in_transit, 1);
+-
+-        if (status != MMAL_SUCCESS) {
+-            msg_Err(vd, "Failed to send buffer to input port. Frame dropped");
+-            picture_Release(picture);
++    {
++        unsigned int sub_no = 0;
++        MMAL_BUFFER_HEADER_T **psub_bufs2 = sys->subpic_bufs;
++        const bool is_mmal_pic = hw_mmal_pic_is_mmal(p_pic);
++
++        for (sub_no = 0; sub_no != SUBS_MAX; ++sub_no) {
++            int rv;
++            MMAL_BUFFER_HEADER_T * const sub_buf = !is_mmal_pic ? NULL :
++                hw_mmal_pic_sub_buf_get(p_pic, sub_no);
++
++            if ((rv = hw_mmal_subpic_update(VLC_OBJECT(vd),
++                                            sub_buf != NULL ? sub_buf : *psub_bufs2++,
++                                            &sys->subs[sub_no].sub,
++                                            &p_pic->format,
++                                            &sys->dest_rect,
++                                            sys->display_transform,
++                                            p_pic->date)) == 0)
++                break;
++            else if (rv < 0)
++                goto fail;
+         }
+-
+-        pic_sys->displayed = true;
+-    } else {
+-        picture_Release(picture);
+     }
+
+-    display_subpicture(vd, subpicture);
++fail:
++    for (unsigned int i = 0; i != SUBS_MAX && sys->subpic_bufs[i] != NULL; ++i) {
++        mmal_buffer_header_release(sys->subpic_bufs[i]);
++        sys->subpic_bufs[i] = NULL;
++    }
+
+-    if (subpicture)
+-        subpicture_Delete(subpicture);
++    picture_Release(p_pic);
+
+     if (sys->next_phase_check == 0 && sys->adjust_refresh_rate)
+         maintain_phase_sync(vd);
+     sys->next_phase_check = (sys->next_phase_check + 1) % PHASE_CHECK_INTERVAL;
+-
+-    if (sys->opaque) {
+-        vlc_mutex_lock(&sys->buffer_mutex);
+-        while (atomic_load(&sys->buffers_in_transit) >= MAX_BUFFERS_IN_TRANSIT)
+-            vlc_cond_wait(&sys->buffer_cond, &sys->buffer_mutex);
+-        vlc_mutex_unlock(&sys->buffer_mutex);
+-    }
+ }
+
+ static int vd_control(vout_display_t *vd, int query, va_list args)
+ {
+-    vout_display_sys_t *sys = vd->sys;
+-    vout_display_cfg_t cfg;
+-    const vout_display_cfg_t *tmp_cfg;
++    vout_display_sys_t * const sys = vd->sys;
+     int ret = VLC_EGENERIC;
++    VLC_UNUSED(args);
+
+     switch (query) {
+-        case VOUT_DISPLAY_CHANGE_DISPLAY_SIZE:
+-            tmp_cfg = va_arg(args, const vout_display_cfg_t *);
+-            if (tmp_cfg->display.width == sys->display_width &&
+-                            tmp_cfg->display.height == sys->display_height) {
+-                cfg = *vd->cfg;
+-                cfg.display.width = sys->display_width;
+-                cfg.display.height = sys->display_height;
+-                if (configure_display(vd, &cfg, NULL) >= 0)
+-                    ret = VLC_SUCCESS;
+-            }
+-            break;
+-
+         case VOUT_DISPLAY_CHANGE_SOURCE_ASPECT:
+         case VOUT_DISPLAY_CHANGE_SOURCE_CROP:
+-            if (configure_display(vd, NULL, &vd->source) >= 0)
++            if (configure_display(vd, vd->cfg, &vd->source) >= 0)
+                 ret = VLC_SUCCESS;
+             break;
+
+-        case VOUT_DISPLAY_RESET_PICTURES:
+-            vlc_assert_unreachable();
+         case VOUT_DISPLAY_CHANGE_ZOOM:
+-            msg_Warn(vd, "Unsupported control query %d", query);
++        case VOUT_DISPLAY_CHANGE_DISPLAY_SIZE:
++        case VOUT_DISPLAY_CHANGE_DISPLAY_FILLED:
++        {
++            const vout_display_cfg_t * const cfg = va_arg(args, const vout_display_cfg_t *);
++
++            if (configure_display(vd, cfg, &vd->source) >= 0)
++                ret = VLC_SUCCESS;
++            break;
++        }
++
++        case VOUT_DISPLAY_RESET_PICTURES:
++            msg_Warn(vd, "Reset Pictures");
++            kill_pool(sys);
++            vd->fmt = vd->source; // Take (nearly) whatever source wants to give us
++            vd->fmt.i_chroma = req_chroma(vd);  // Adjust chroma to something we can actaully deal with
++            ret = VLC_SUCCESS;
++            break;
++
++        case VOUT_DISPLAY_CHANGE_MMAL_HIDE:
++        {
++            MMAL_STATUS_T err;
++            unsigned int i;
++
++            msg_Dbg(vd, "Hide display");
++
++            for (i = 0; i != SUBS_MAX; ++i)
++                hw_mmal_subpic_flush(VLC_OBJECT(vd), &sys->subs[i].sub);
++
++            if (sys->input->is_enabled &&
++                (err = mmal_port_disable(sys->input)) != MMAL_SUCCESS)
++            {
++                msg_Err(vd, "Unable to disable port: err=%d", err);
++                break;
++            }
++            sys->force_config = true;
++            ret = VLC_SUCCESS;
+             break;
++        }
+
+         default:
+             msg_Warn(vd, "Unknown control query %d", query);
+@@ -653,79 +971,207 @@ static int vd_control(vout_display_t *vd
+     return ret;
+ }
+
++static void set_display_windows(vout_display_t *const vd, vout_display_sys_t *const sys)
++{
++    unsigned int width, height;
++    if (query_resolution(vd, sys->display_id, &width, &height) < 0) {
++        width = vd->cfg->display.width;
++        height = vd->cfg->display.height;
++    }
++    sys->display_rect = (MMAL_RECT_T){0, 0, width, height};
++
++    sys->win_rect = (sys->req_win.width != 0) ?
++            sys->req_win :
++         is_transform_transpose(sys->display_transform) ?
++            rect_transpose(sys->display_rect) : sys->display_rect;
++}
++
+ static void vd_manage(vout_display_t *vd)
+ {
+-    vout_display_sys_t *sys = vd->sys;
+-    unsigned width, height;
++    vout_display_sys_t *const sys = vd->sys;
+
+     vlc_mutex_lock(&sys->manage_mutex);
+
+     if (sys->need_configure_display) {
+-        close_dmx(vd);
+-        sys->dmx_handle = vc_dispmanx_display_open(0);
+-
+-        if (query_resolution(vd, &width, &height) >= 0) {
+-            sys->display_width = width;
+-            sys->display_height = height;
+-            vout_display_SendEventDisplaySize(vd, width, height);
+-        }
+-
+         sys->need_configure_display = false;
++        set_display_windows(vd, sys);
+     }
+
+     vlc_mutex_unlock(&sys->manage_mutex);
+ }
+
+-static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++
++static int attach_subpics(vout_display_t * const vd, vout_display_sys_t * const sys,
++                          subpicture_t * const subpicture)
+ {
+-    vout_display_t *vd = (vout_display_t *)port->userdata;
+-    MMAL_STATUS_T status;
++    unsigned int n = 0;
+
+-    if (buffer->cmd == MMAL_EVENT_ERROR) {
+-        status = *(uint32_t *)buffer->data;
+-        msg_Err(vd, "MMAL error %"PRIx32" \"%s\"", status, mmal_status_to_string(status));
++    if (sys->vzc == NULL) {
++        if ((sys->vzc = hw_mmal_vzc_pool_new()) == NULL)
++        {
++            msg_Err(vd, "Failed to allocate VZC");
++            return VLC_ENOMEM;
++        }
+     }
+
+-    mmal_buffer_header_release(buffer);
++    // Attempt to import the subpics
++    for (subpicture_t * spic = subpicture; spic != NULL; spic = spic->p_next)
++    {
++        for (subpicture_region_t *sreg = spic->p_region; sreg != NULL; sreg = sreg->p_next) {
++            picture_t *const src = sreg->p_picture;
++
++#if TRACE_ALL
++            char dbuf0[5];
++            msg_Dbg(vd, "  [%p:%p] Pos=%d,%d max=%dx%d, src=%dx%d/%dx%d o:%d, spu=%d,%d:%dx%d, vd->fmt=%dx%d/%dx%d, vd->source=%dx%d/%dx%d, cfg=%dx%d, zoom=%d/%d, Alpha=%d, Fmt=%s", src, src->p[0].p_pixels,
++                    sreg->i_x, sreg->i_y,
++                    sreg->i_max_width, sreg->i_max_height,
++                    src->format.i_visible_width, src->format.i_visible_height,
++                    src->format.i_width, src->format.i_height,
++                    src->format.orientation,
++                    sys->spu_rect.x, sys->spu_rect.y, sys->spu_rect.width, sys->spu_rect.height,
++                    vd->fmt.i_visible_width, vd->fmt.i_visible_height,
++                    vd->fmt.i_width, vd->fmt.i_height,
++                    vd->source.i_visible_width, vd->source.i_visible_height,
++                    vd->source.i_width, vd->source.i_height,
++                    vd->cfg->display.width, vd->cfg->display.height,
++                    vd->cfg->zoom.num, vd->cfg->zoom.den,
++                    sreg->i_alpha,
++                    str_fourcc(dbuf0, src->format.i_chroma));
++#endif
++
++            // At this point I think the subtitles are being placed in the
++            // coord space of the placed rectangle in the cfg display space
++            if ((sys->subpic_bufs[n] = hw_mmal_vzc_buf_from_pic(sys->vzc,
++                src,
++                (MMAL_RECT_T){.width = sys->spu_rect.width, .height=sys->spu_rect.height},
++                sreg->i_x, sreg->i_y,
++                sreg->i_alpha,
++                n == 0)) == NULL)
++            {
++                msg_Err(vd, "Failed to allocate vzc buffer for subpic");
++                return VLC_ENOMEM;
++            }
++
++            if (++n == SUBS_MAX)
++                return VLC_SUCCESS;
++        }
++    }
++    return VLC_SUCCESS;
+ }
+
+-static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
++
++static void vd_prepare(vout_display_t *vd, picture_t *p_pic,
++#if VLC_VER_3
++                       subpicture_t *subpicture
++#else
++                       subpicture_t *subpicture, vlc_tick_t date
++#endif
++                       )
+ {
+-    vout_display_t *vd = (vout_display_t *)port->userdata;
++    MMAL_STATUS_T err;
++    vout_display_sys_t * const sys = vd->sys;
++
++    vd_manage(vd);
++
++    if (!check_shape(vd, p_pic))
++        return;
++
++    if (sys->force_config ||
++        p_pic->format.i_frame_rate != sys->i_frame_rate ||
++        p_pic->format.i_frame_rate_base != sys->i_frame_rate_base ||
++        p_pic->b_progressive != sys->b_progressive ||
++        p_pic->b_top_field_first != sys->b_top_field_first)
++    {
++        sys->force_config = false;
++        sys->b_top_field_first = p_pic->b_top_field_first;
++        sys->b_progressive = p_pic->b_progressive;
++        sys->i_frame_rate = p_pic->format.i_frame_rate;
++        sys->i_frame_rate_base = p_pic->format.i_frame_rate_base;
++        configure_display(vd, NULL, &vd->source);
++    }
++
++    // Subpics can either turn up attached to the main pic or in the
++    // subpic list here  - if they turn up here then process into temp
++    // buffers
++    if (subpicture != NULL) {
++        attach_subpics(vd, sys, subpicture);
++    }
++
++    // *****
++    if (want_copy(vd)) {
++        if (sys->copy_buf != NULL) {
++            msg_Err(vd, "Copy buf not NULL");
++            mmal_buffer_header_release(sys->copy_buf);
++            sys->copy_buf = NULL;
++        }
++
++        MMAL_BUFFER_HEADER_T * const buf = mmal_queue_wait(sys->copy_pool->queue);
++        // Copy 2d
++        hw_mmal_copy_pic_to_buf(buf->data, &buf->length, sys->input->format, p_pic);
++        buf->flags = MMAL_BUFFER_HEADER_FLAG_FRAME_END;
++
++        sys->copy_buf = buf;
++    }
++
++    if (isp_check(vd, sys) != MMAL_SUCCESS) {
++        return;
++    }
++
++    if (want_isp(vd))
++    {
++        struct vout_isp_conf_s * const isp = &sys->isp;
++        MMAL_BUFFER_HEADER_T * buf;
++
++        // This should be empty - make it so if it isn't
++        isp_empty_out_q(isp);
++        isp->pending = false;
++
++        // Stuff output
++        if (isp_prepare(vd, isp) != MMAL_SUCCESS)
++            return;
++
++        if ((buf = hw_mmal_pic_buf_replicated(p_pic, isp->in_pool)) == NULL)
++        {
++            msg_Err(vd, "Pic has no attached buffer");
++            return;
++        }
++
++        if ((err = mmal_port_send_buffer(isp->input, buf)) != MMAL_SUCCESS)
++        {
++            msg_Err(vd, "Send buffer to input failed");
++            mmal_buffer_header_release(buf);
++            return;
++        }
++
++        isp->pending = true;
++    }
++
++#if 0
++    VLC_UNUSED(date);
+     vout_display_sys_t *sys = vd->sys;
+-    picture_t *picture = (picture_t *)buffer->user_data;
++    picture_sys_t *pic_sys = picture->p_sys;
+
+-    if (picture)
+-        picture_Release(picture);
++    if (!sys->adjust_refresh_rate || pic_sys->displayed)
++        return;
+
+-    vlc_mutex_lock(&sys->buffer_mutex);
+-    atomic_fetch_sub(&sys->buffers_in_transit, 1);
+-    vlc_cond_signal(&sys->buffer_cond);
+-    vlc_mutex_unlock(&sys->buffer_mutex);
++    /* Apply the required phase_offset to the picture, so that vd_display()
++     * will be called at the corrected time from the core */
++    picture->date += sys->phase_offset;
++#endif
+ }
+
+-static int query_resolution(vout_display_t *vd, unsigned *width, unsigned *height)
++
++static void vd_control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+ {
+-    TV_DISPLAY_STATE_T display_state;
+-    int ret = 0;
++    vout_display_t *vd = (vout_display_t *)port->userdata;
++    MMAL_STATUS_T status;
+
+-    if (vc_tv_get_display_state(&display_state) == 0) {
+-        if (display_state.state & 0xFF) {
+-            *width = display_state.display.hdmi.width;
+-            *height = display_state.display.hdmi.height;
+-        } else if (display_state.state & 0xFF00) {
+-            *width = display_state.display.sdtv.width;
+-            *height = display_state.display.sdtv.height;
+-        } else {
+-            msg_Warn(vd, "Invalid display state %"PRIx32, display_state.state);
+-            ret = -1;
+-        }
+-    } else {
+-        msg_Warn(vd, "Failed to query display resolution");
+-        ret = -1;
++    if (buffer->cmd == MMAL_EVENT_ERROR) {
++        status = *(uint32_t *)buffer->data;
++        msg_Err(vd, "MMAL error %"PRIx32" \"%s\"", status, mmal_status_to_string(status));
+     }
+
+-    return ret;
++    mmal_buffer_header_release(buffer);
+ }
+
+ static void tvservice_cb(void *callback_data, uint32_t reason, uint32_t param1, uint32_t param2)
+@@ -780,9 +1226,9 @@ static void adjust_refresh_rate(vout_dis
+     double best_score, score;
+     int i;
+
+-    vc_tv_get_display_state(&display_state);
++    vc_tv_get_display_state_id(sys->display_id, &display_state);
+     if(display_state.display.hdmi.mode != HDMI_MODE_OFF) {
+-        num_modes = vc_tv_hdmi_get_supported_modes_new(display_state.display.hdmi.group,
++        num_modes = vc_tv_hdmi_get_supported_modes_new_id(sys->display_id, display_state.display.hdmi.group,
+                         supported_modes, VC_TV_MAX_MODE_IDS, NULL, NULL);
+
+         for (i = 0; i < num_modes; ++i) {
+@@ -810,7 +1256,7 @@ static void adjust_refresh_rate(vout_dis
+         if((best_id >= 0) && (display_state.display.hdmi.mode != supported_modes[best_id].code)) {
+             msg_Info(vd, "Setting HDMI refresh rate to %"PRIu32,
+                             supported_modes[best_id].frame_rate);
+-            vc_tv_hdmi_power_on_explicit_new(HDMI_MODE_HDMI,
++            vc_tv_hdmi_power_on_explicit_new_id(sys->display_id, HDMI_MODE_HDMI,
+                             supported_modes[best_id].group,
+                             supported_modes[best_id].code);
+         }
+@@ -828,148 +1274,12 @@ static void adjust_refresh_rate(vout_dis
+     }
+ }
+
+-static void display_subpicture(vout_display_t *vd, subpicture_t *subpicture)
+-{
+-    vout_display_sys_t *sys = vd->sys;
+-    struct dmx_region_t **dmx_region = &sys->dmx_region;
+-    struct dmx_region_t *unused_dmx_region;
+-    DISPMANX_UPDATE_HANDLE_T update = 0;
+-    picture_t *picture;
+-    video_format_t *fmt;
+-    struct dmx_region_t *dmx_region_next;
+-
+-    if(subpicture) {
+-        subpicture_region_t *region = subpicture->p_region;
+-        while(region) {
+-            picture = region->p_picture;
+-            fmt = &region->fmt;
+-
+-            if(!*dmx_region) {
+-                if(!update)
+-                    update = vc_dispmanx_update_start(10);
+-                *dmx_region = dmx_region_new(vd, update, region);
+-            } else if(((*dmx_region)->bmp_rect.width != (int32_t)fmt->i_visible_width) ||
+-                    ((*dmx_region)->bmp_rect.height != (int32_t)fmt->i_visible_height) ||
+-                    ((*dmx_region)->pos_x != region->i_x) ||
+-                    ((*dmx_region)->pos_y != region->i_y) ||
+-                    ((*dmx_region)->alpha.opacity != (uint32_t)region->i_alpha)) {
+-                dmx_region_next = (*dmx_region)->next;
+-                if(!update)
+-                    update = vc_dispmanx_update_start(10);
+-                dmx_region_delete(*dmx_region, update);
+-                *dmx_region = dmx_region_new(vd, update, region);
+-                (*dmx_region)->next = dmx_region_next;
+-            } else if((*dmx_region)->picture != picture) {
+-                if(!update)
+-                    update = vc_dispmanx_update_start(10);
+-                dmx_region_update(*dmx_region, update, picture);
+-            }
+-
+-            dmx_region = &(*dmx_region)->next;
+-            region = region->p_next;
+-        }
+-    }
+-
+-    /* Remove remaining regions */
+-    unused_dmx_region = *dmx_region;
+-    while(unused_dmx_region) {
+-        dmx_region_next = unused_dmx_region->next;
+-        if(!update)
+-            update = vc_dispmanx_update_start(10);
+-        dmx_region_delete(unused_dmx_region, update);
+-        unused_dmx_region = dmx_region_next;
+-    }
+-    *dmx_region = NULL;
+-
+-    if(update)
+-        vc_dispmanx_update_submit_sync(update);
+-}
+-
+-static void close_dmx(vout_display_t *vd)
+-{
+-    vout_display_sys_t *sys = vd->sys;
+-    DISPMANX_UPDATE_HANDLE_T update = vc_dispmanx_update_start(10);
+-    struct dmx_region_t *dmx_region = sys->dmx_region;
+-    struct dmx_region_t *dmx_region_next;
+-
+-    while(dmx_region) {
+-        dmx_region_next = dmx_region->next;
+-        dmx_region_delete(dmx_region, update);
+-        dmx_region = dmx_region_next;
+-    }
+-
+-    vc_dispmanx_update_submit_sync(update);
+-    sys->dmx_region = NULL;
+-
+-    show_background(vd, false);
+-
+-    vc_dispmanx_display_close(sys->dmx_handle);
+-    sys->dmx_handle = DISPMANX_NO_HANDLE;
+-}
+-
+-static struct dmx_region_t *dmx_region_new(vout_display_t *vd,
+-                DISPMANX_UPDATE_HANDLE_T update, subpicture_region_t *region)
+-{
+-    vout_display_sys_t *sys = vd->sys;
+-    video_format_t *fmt = &region->fmt;
+-    struct dmx_region_t *dmx_region = malloc(sizeof(struct dmx_region_t));
+-    uint32_t image_handle;
+-
+-    dmx_region->pos_x = region->i_x;
+-    dmx_region->pos_y = region->i_y;
+-
+-    vc_dispmanx_rect_set(&dmx_region->bmp_rect, 0, 0, fmt->i_visible_width,
+-                    fmt->i_visible_height);
+-    vc_dispmanx_rect_set(&dmx_region->src_rect, 0, 0, fmt->i_visible_width << 16,
+-                    fmt->i_visible_height << 16);
+-    vc_dispmanx_rect_set(&dmx_region->dst_rect, region->i_x, region->i_y,
+-                    fmt->i_visible_width, fmt->i_visible_height);
+-
+-    dmx_region->resource = vc_dispmanx_resource_create(VC_IMAGE_RGBA32,
+-                    dmx_region->bmp_rect.width | (region->p_picture->p[0].i_pitch << 16),
+-                    dmx_region->bmp_rect.height | (dmx_region->bmp_rect.height << 16),
+-                    &image_handle);
+-    vc_dispmanx_resource_write_data(dmx_region->resource, VC_IMAGE_RGBA32,
+-                    region->p_picture->p[0].i_pitch,
+-                    region->p_picture->p[0].p_pixels, &dmx_region->bmp_rect);
+-
+-    dmx_region->alpha.flags = DISPMANX_FLAGS_ALPHA_FROM_SOURCE | DISPMANX_FLAGS_ALPHA_MIX;
+-    dmx_region->alpha.opacity = region->i_alpha;
+-    dmx_region->alpha.mask = DISPMANX_NO_HANDLE;
+-    dmx_region->element = vc_dispmanx_element_add(update, sys->dmx_handle,
+-                    sys->layer + 1, &dmx_region->dst_rect, dmx_region->resource,
+-                    &dmx_region->src_rect, DISPMANX_PROTECTION_NONE,
+-                    &dmx_region->alpha, NULL, VC_IMAGE_ROT0);
+-
+-    dmx_region->next = NULL;
+-    dmx_region->picture = region->p_picture;
+-
+-    return dmx_region;
+-}
+-
+-static void dmx_region_update(struct dmx_region_t *dmx_region,
+-                DISPMANX_UPDATE_HANDLE_T update, picture_t *picture)
+-{
+-    vc_dispmanx_resource_write_data(dmx_region->resource, VC_IMAGE_RGBA32,
+-                    picture->p[0].i_pitch, picture->p[0].p_pixels, &dmx_region->bmp_rect);
+-    vc_dispmanx_element_change_source(update, dmx_region->element, dmx_region->resource);
+-    dmx_region->picture = picture;
+-}
+-
+-static void dmx_region_delete(struct dmx_region_t *dmx_region,
+-                DISPMANX_UPDATE_HANDLE_T update)
+-{
+-    vc_dispmanx_element_remove(update, dmx_region->element);
+-    vc_dispmanx_resource_delete(dmx_region->resource);
+-    free(dmx_region);
+-}
+-
+ static void maintain_phase_sync(vout_display_t *vd)
+ {
+     MMAL_PARAMETER_VIDEO_RENDER_STATS_T render_stats = {
+         .hdr = { MMAL_PARAMETER_VIDEO_RENDER_STATS, sizeof(render_stats) },
+     };
+-    int32_t frame_duration = 1000000 /
++    int32_t frame_duration = CLOCK_FREQ /
+         ((double)vd->sys->i_frame_rate /
+         vd->sys->i_frame_rate_base);
+     vout_display_sys_t *sys = vd->sys;
+@@ -1012,32 +1322,436 @@ static void maintain_phase_sync(vout_dis
+     }
+ }
+
+-static void show_background(vout_display_t *vd, bool enable)
++static void CloseMmalVout(vlc_object_t *object)
+ {
+-    vout_display_sys_t *sys = vd->sys;
+-    uint32_t image_ptr, color = 0xFF000000;
+-    VC_RECT_T dst_rect, src_rect;
+-    DISPMANX_UPDATE_HANDLE_T update;
+-
+-    if (enable && !sys->bkg_element) {
+-        sys->bkg_resource = vc_dispmanx_resource_create(VC_IMAGE_RGBA32, 1, 1,
+-                        &image_ptr);
+-        vc_dispmanx_rect_set(&dst_rect, 0, 0, 1, 1);
+-        vc_dispmanx_resource_write_data(sys->bkg_resource, VC_IMAGE_RGBA32,
+-                        sizeof(color), &color, &dst_rect);
+-        vc_dispmanx_rect_set(&src_rect, 0, 0, 1 << 16, 1 << 16);
+-        vc_dispmanx_rect_set(&dst_rect, 0, 0, 0, 0);
+-        update = vc_dispmanx_update_start(0);
+-        sys->bkg_element = vc_dispmanx_element_add(update, sys->dmx_handle,
+-                        sys->layer - 1, &dst_rect, sys->bkg_resource, &src_rect,
+-                        DISPMANX_PROTECTION_NONE, NULL, NULL, VC_IMAGE_ROT0);
+-        vc_dispmanx_update_submit_sync(update);
+-    } else if (!enable && sys->bkg_element) {
+-        update = vc_dispmanx_update_start(0);
+-        vc_dispmanx_element_remove(update, sys->bkg_element);
+-        vc_dispmanx_resource_delete(sys->bkg_resource);
+-        vc_dispmanx_update_submit_sync(update);
+-        sys->bkg_element = DISPMANX_NO_HANDLE;
+-        sys->bkg_resource = DISPMANX_NO_HANDLE;
++    vout_display_t * const vd = (vout_display_t *)object;
++    vout_display_sys_t * const sys = vd->sys;
++    char response[20]; /* answer is hvs_update_fields=%1d */
++
++#if TRACE_ALL
++    msg_Dbg(vd, "<<< %s", __func__);
++#endif
++
++    kill_pool(sys);
++
++    vc_tv_unregister_callback_full(tvservice_cb, vd);
++
++    // Shouldn't be anything here - but just in case
++    for (unsigned int i = 0; i != SUBS_MAX; ++i)
++        if (sys->subpic_bufs[i] != NULL)
++            mmal_buffer_header_release(sys->subpic_bufs[i]);
++
++    for (unsigned int i = 0; i != SUBS_MAX; ++i) {
++        vout_subpic_t * const sub = sys->subs + i;
++        if (sub->component != NULL) {
++            hw_mmal_subpic_close(VLC_OBJECT(vd), &sub->sub);
++            if (sub->component->control->is_enabled)
++                mmal_port_disable(sub->component->control);
++            if (sub->component->is_enabled)
++                mmal_component_disable(sub->component);
++            mmal_component_release(sub->component);
++            sub->component = NULL;
++        }
+     }
++
++    if (sys->input && sys->input->is_enabled)
++        mmal_port_disable(sys->input);
++
++    if (sys->component && sys->component->control->is_enabled)
++        mmal_port_disable(sys->component->control);
++
++    if (sys->copy_buf != NULL)
++        mmal_buffer_header_release(sys->copy_buf);
++
++    if (sys->input != NULL && sys->copy_pool != NULL)
++        mmal_port_pool_destroy(sys->input, sys->copy_pool);
++
++    if (sys->component && sys->component->is_enabled)
++        mmal_component_disable(sys->component);
++
++    if (sys->pool)
++        mmal_pool_destroy(sys->pool);
++
++    if (sys->component)
++        mmal_component_release(sys->component);
++
++    isp_close(vd, sys);
++
++    hw_mmal_vzc_pool_release(sys->vzc);
++
++    vlc_mutex_destroy(&sys->manage_mutex);
++
++    if (sys->native_interlaced) {
++        if (vc_gencmd(response, sizeof(response), "hvs_update_fields 0") < 0 ||
++                response[18] != '0')
++            msg_Warn(vd, "Could not reset hvs field mode");
++    }
++
++    cma_vcsm_exit(sys->init_type);;
++
++    free(sys);
++
++#if TRACE_ALL
++    msg_Dbg(vd, ">>> %s", __func__);
++#endif
++}
++
++
++static const struct {
++    const char * name;
++    int num;
++} display_name_to_num[] = {
++    {"auto",    -1},
++    {"hdmi-1",  DISPMANX_ID_HDMI0},
++    {"hdmi-2",  DISPMANX_ID_HDMI1},
++    {NULL,      -2}
++};
++
++static const struct {
++    const char * name;
++    int transform_num;
++} transform_name_to_num[] = {
++    {"auto",    -1},
++    {"0",       MMAL_DISPLAY_ROT0},
++    {"hflip",   MMAL_DISPLAY_MIRROR_ROT0},
++    {"vflip",   MMAL_DISPLAY_MIRROR_ROT180},
++    {"180",     MMAL_DISPLAY_ROT180},
++    {"transpose", MMAL_DISPLAY_MIRROR_ROT90},
++    {"270",     MMAL_DISPLAY_ROT270},
++    {"90",      MMAL_DISPLAY_ROT90},
++    {"antitranspose", MMAL_DISPLAY_MIRROR_ROT270},
++    {NULL,      -2}
++};
++
++static int find_display_num(const char * const name)
++{
++    unsigned int i;
++    for (i = 0; display_name_to_num[i].name != NULL && strcasecmp(display_name_to_num[i].name, name) != 0; ++i)
++        /* Loop */;
++    return display_name_to_num[i].num;
++}
++
++static int find_transform_num(const char * const name)
++{
++    unsigned int i;
++    for (i = 0; transform_name_to_num[i].name != NULL && strcasecmp(transform_name_to_num[i].name, name) != 0; ++i)
++        /* Loop */;
++    return transform_name_to_num[i].transform_num;
++}
++
++#if HAVE_X11_XLIB_H
++#include <X11/Xlib.h>
++#include <X11/extensions/Xrandr.h>
++static MMAL_DISPLAYTRANSFORM_T get_xrandr_rotation(vout_display_t * const vd)
++{
++    Display * const x = XOpenDisplay(NULL);
++    Rotation cur_rot = 0;
++    MMAL_DISPLAYTRANSFORM_T trans;
++
++    if (x == NULL)
++        return MMAL_DISPLAY_ROT0;
++
++    XRRRotations(x, 0, &cur_rot);
++    XCloseDisplay(x);
++
++    // Convert to MMAL
++    // xrandr seems to rotate the other way to mmal
++
++    switch (cur_rot)
++    {
++        case 0:
++        case RR_Rotate_0:
++            trans = MMAL_DISPLAY_ROT0;
++            break;
++        case RR_Rotate_90:
++            trans = MMAL_DISPLAY_ROT270;
++            break;
++        case RR_Rotate_180:
++            trans = MMAL_DISPLAY_ROT180;
++            break;
++        case RR_Rotate_270:
++            trans = MMAL_DISPLAY_ROT90;
++            break;
++        case RR_Reflect_X:
++            trans = MMAL_DISPLAY_MIRROR_ROT0;
++            break;
++        case RR_Reflect_Y:
++            trans = MMAL_DISPLAY_MIRROR_ROT180;
++            break;
++        default:
++            msg_Info(vd, "Unexpected X rotation value: %#x", cur_rot);
++            trans = MMAL_DISPLAY_ROT0;
++            break;
++    }
++
++    return trans;
++}
++#else
++static MMAL_DISPLAYTRANSFORM_T get_xrandr_rotation(vout_display_t * const vd)
++{
++    VLC_UNUSED(vd);
++    return MMAL_DISPLAY_ROT0;
++}
++#endif
++
++static MMAL_RECT_T str_to_rect(const char * s)
++{
++    MMAL_RECT_T rect = {0};
++    rect.width = strtoul(s, (char**)&s, 0);
++    if (*s == '\0')
++        return rect;
++    if (*s++ != 'x')
++        goto fail;
++    rect.height = strtoul(s, (char**)&s, 0);
++    if (*s == '\0')
++        return rect;
++    if (*s++ != '+')
++        goto fail;
++    rect.x = strtoul(s, (char**)&s, 0);
++    if (*s == '\0')
++        return rect;
++    if (*s++ != '+')
++        goto fail;
++    rect.y = strtoul(s, (char**)&s, 0);
++    if (*s != '\0')
++        goto fail;
++    return rect;
++
++fail:
++    return (MMAL_RECT_T){0,0,0,0};
++}
++
++static int OpenMmalVout(vlc_object_t *object)
++{
++    vout_display_t *vd = (vout_display_t *)object;
++    vout_display_sys_t *sys;
++    MMAL_STATUS_T status;
++    int ret = VLC_EGENERIC;
++    // At the moment all copy is via I420
++    const bool needs_copy = !hw_mmal_chroma_is_mmal(vd->fmt.i_chroma);
++    const MMAL_FOURCC_T enc_in = needs_copy ? MMAL_ENCODING_I420 :
++        vout_vlc_to_mmal_pic_fourcc(vd->fmt.i_chroma);
++
++#if TRACE_ALL
++    msg_Dbg(vd, "<<< %s: o:%d", __func__, (int)vd->fmt.orientation);
++#endif
++
++    get_xrandr_rotation(vd);
++
++    sys = calloc(1, sizeof(struct vout_display_sys_t));
++    if (!sys)
++        return VLC_ENOMEM;
++    vd->sys = sys;
++
++    vlc_mutex_init(&sys->manage_mutex);
++
++    if ((sys->init_type = cma_vcsm_init()) == VCSM_INIT_NONE)
++    {
++        msg_Err(vd, "VCSM init fail");
++        goto fail;
++    }
++
++    vc_tv_register_callback(tvservice_cb, vd);
++
++    sys->layer = var_InheritInteger(vd, MMAL_LAYER_NAME);
++    sys->transparent = var_InheritBool(vd, MMAL_VOUT_TRANSPARENT_NAME);
++
++    {
++        const char *display_name = var_InheritString(vd, MMAL_DISPLAY_NAME);
++        int qt_num = var_InheritInteger(vd, "qt-fullscreen-screennumber" );
++        int display_id = find_display_num(display_name);
++//        sys->display_id = display_id < 0 ? vc_tv_get_default_display_id() : display_id;
++        sys->display_id = display_id >= 0 ? display_id :
++            qt_num == 1 ? DISPMANX_ID_HDMI1 : DISPMANX_ID_HDMI;
++        if (display_id < -1)
++            msg_Warn(vd, "Unknown display device: '%s'", display_name);
++        else
++            msg_Dbg(vd, "Display device: %s, qt=%d id=%d display=%d", display_name,
++                    qt_num, display_id, sys->display_id);
++    }
++
++    {
++        const char *window_str = var_InheritString(vd, MMAL_VOUT_WINDOW_NAME);
++        sys->req_win = str_to_rect(window_str);
++        if (sys->req_win.width != 0)
++            msg_Dbg(vd, "Window: %dx%d @ %d,%d",
++                    sys->req_win.width, sys->req_win.height,
++                    sys->req_win.x, sys->req_win.y);
++    }
++
++    {
++        const char *transform_name = var_InheritString(vd, MMAL_VOUT_TRANSFORM_NAME);
++        int transform_num = find_transform_num(transform_name);
++        sys->display_transform = transform_num < 0 ?
++            get_xrandr_rotation(vd) :
++            (MMAL_DISPLAYTRANSFORM_T)transform_num;
++
++        if (transform_num < -1)
++            msg_Warn(vd, "Unknown vout transform: '%s'", transform_name);
++        else
++            msg_Dbg(vd, "Display transform: %s, mmal_display_transform=%d",
++                    transform_name, (int)sys->display_transform);
++
++        sys->video_transform = combine_transform(
++            vlc_to_mmal_transform(vd->fmt.orientation), sys->display_transform);
++        sys->dest_transform = transform_inverse(sys->display_transform);
++    }
++
++    status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &sys->component);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to create MMAL component %s (status=%"PRIx32" %s)",
++                        MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    sys->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
++    status = mmal_port_enable(sys->component->control, vd_control_port_cb);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to enable control port %s (status=%"PRIx32" %s)",
++                        sys->component->control->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    sys->input = sys->component->input[0];
++    sys->input->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
++
++    sys->input->format->encoding = enc_in;
++    sys->input->format->encoding_variant = 0;
++    sys->i_planes = 1;
++
++    display_set_format(vd, sys->input->format, want_isp(vd));
++
++    status = port_parameter_set_bool(sys->input, MMAL_PARAMETER_ZERO_COPY, true);
++    if (status != MMAL_SUCCESS) {
++       msg_Err(vd, "Failed to set zero copy on port %s (status=%"PRIx32" %s)",
++                sys->input->name, status, mmal_status_to_string(status));
++       goto fail;
++    }
++
++    status = mmal_port_format_commit(sys->input);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to commit format for input port %s (status=%"PRIx32" %s)",
++                        sys->input->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    sys->input->buffer_size = sys->input->buffer_size_recommended;
++
++    if (!needs_copy) {
++        sys->input->buffer_num = 30;
++    }
++    else {
++        sys->input->buffer_num = 2;
++        if ((sys->copy_pool = mmal_port_pool_create(sys->input, 2, sys->input->buffer_size)) == NULL)
++        {
++            msg_Err(vd, "Cannot create copy pool");
++            goto fail;
++        }
++    }
++
++    set_display_windows(vd, sys);
++
++    configure_display(vd, vd->cfg, &vd->source);
++
++    status = mmal_port_enable(sys->input, vd_input_port_cb);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to enable input port %s (status=%"PRIx32" %s)",
++                sys->input->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    status = mmal_component_enable(sys->component);
++    if (status != MMAL_SUCCESS) {
++        msg_Err(vd, "Failed to enable component %s (status=%"PRIx32" %s)",
++                sys->component->name, status, mmal_status_to_string(status));
++        goto fail;
++    }
++
++    if ((sys->pool = mmal_pool_create(sys->input->buffer_num, 0)) == NULL)
++    {
++        msg_Err(vd, "Failed to create input pool");
++        goto fail;
++    }
++
++    for (unsigned int i = 0; i != SUBS_MAX; ++i) {
++        vout_subpic_t * const sub = sys->subs + i;
++        if ((status = mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &sub->component)) != MMAL_SUCCESS)
++        {
++            msg_Dbg(vd, "Failed to create subpic component %d", i);
++            goto fail;
++        }
++        sub->component->control->userdata = (struct MMAL_PORT_USERDATA_T *)vd;
++        if ((status = mmal_port_enable(sub->component->control, vd_control_port_cb)) != MMAL_SUCCESS) {
++            msg_Err(vd, "Failed to enable control port %s on sub %d (status=%"PRIx32" %s)",
++                            sys->component->control->name, i, status, mmal_status_to_string(status));
++            goto fail;
++        }
++        if ((status = hw_mmal_subpic_open(VLC_OBJECT(vd), &sub->sub, sub->component->input[0],
++                                          sys->display_id, sys->layer + i + 1)) != MMAL_SUCCESS) {
++            msg_Dbg(vd, "Failed to open subpic %d", i);
++            goto fail;
++        }
++        if ((status = mmal_component_enable(sub->component)) != MMAL_SUCCESS)
++        {
++            msg_Dbg(vd, "Failed to enable subpic component %d", i);
++            goto fail;
++        }
++    }
++
++    // If we can't deal with it directly ask for I420
++    vd->fmt.i_chroma = req_chroma(vd);
++
++    vd->info = (vout_display_info_t){
++        .is_slow = false,
++        .has_double_click = false,
++        .needs_hide_mouse = false,
++        .has_pictures_invalid = true,
++        .subpicture_chromas = hw_mmal_vzc_subpicture_chromas
++    };
++
++    vd->pool = vd_pool;
++    vd->prepare = vd_prepare;
++    vd->display = vd_display;
++    vd->control = vd_control;
++
++
++    msg_Dbg(vd, ">>> %s: ok", __func__);
++    return VLC_SUCCESS;
++
++fail:
++    CloseMmalVout(object);
++
++    msg_Dbg(vd, ">>> %s: rv=%d", __func__, ret);
++
++    return ret == VLC_SUCCESS ? VLC_EGENERIC : ret;
+ }
++
++vlc_module_begin()
++
++    add_submodule()
++
++    set_shortname(N_("MMAL vout"))
++    set_description(N_("MMAL-based vout plugin for Raspberry Pi"))
++    set_capability("vout display", 16)  // 1 point better than ASCII art
++    add_shortcut("mmal_vout")
++    set_category( CAT_VIDEO )
++    set_subcategory( SUBCAT_VIDEO_VOUT )
++
++    add_integer(MMAL_LAYER_NAME, 1, MMAL_LAYER_TEXT, MMAL_LAYER_LONGTEXT, false)
++    add_bool(MMAL_ADJUST_REFRESHRATE_NAME, false, MMAL_ADJUST_REFRESHRATE_TEXT,
++                    MMAL_ADJUST_REFRESHRATE_LONGTEXT, false)
++    add_bool(MMAL_NATIVE_INTERLACED, false, MMAL_NATIVE_INTERLACE_TEXT,
++                    MMAL_NATIVE_INTERLACE_LONGTEXT, false)
++    add_string(MMAL_DISPLAY_NAME, "auto", MMAL_DISPLAY_TEXT,
++                    MMAL_DISPLAY_LONGTEXT, false)
++    add_string(MMAL_VOUT_TRANSFORM_NAME, "auto", MMAL_VOUT_TRANSFORM_TEXT,
++                    MMAL_VOUT_TRANSFORM_LONGTEXT, false)
++    add_string(MMAL_VOUT_WINDOW_NAME, "fullscreen", MMAL_VOUT_WINDOW_TEXT,
++                    MMAL_VOUT_WINDOW_LONGTEXT, false)
++    add_bool(MMAL_VOUT_TRANSPARENT_NAME, false, MMAL_VOUT_TRANSPARENT_TEXT,
++                    MMAL_VOUT_TRANSPARENT_LONGTEXT, false)
++    set_callbacks(OpenMmalVout, CloseMmalVout)
++
++vlc_module_end()
++
++
+--- /dev/null
++++ b/modules/hw/mmal/xsplitter.c
+@@ -0,0 +1,584 @@
++#ifdef HAVE_CONFIG_H
++#include "config.h"
++#endif
++
++#include <stdatomic.h>
++
++#include <vlc_common.h>
++#include <vlc_plugin.h>
++#include <vlc_threads.h>
++#include <vlc_vout_display.h>
++#include <vlc_modules.h>
++
++#include <bcm_host.h>
++#include <interface/mmal/mmal.h>
++#include <interface/mmal/util/mmal_util.h>
++#include <interface/mmal/util/mmal_default_components.h>
++
++#include "mmal_picture.h"
++
++#define TRACE_ALL 0
++
++typedef struct display_desc_s
++{
++    vout_display_t * vout;
++    unsigned int max_pels;
++} display_desc_t;
++
++typedef struct mmal_x11_sys_s
++{
++    bool use_mmal;
++    display_desc_t * cur_desc;
++    display_desc_t mmal_desc;
++    display_desc_t x_desc;
++    uint32_t changed;
++    vlc_fourcc_t subpicture_chromas[16];
++} mmal_x11_sys_t;
++
++#define MAX_GL_PELS (1920*1080)
++#define MAX_MMAL_PELS (4096*4096)  // Should never be hit
++
++#if 0
++// Gen prog for the following table
++// Not done inline in case we end up pulling in FP libs we don't want
++#include <math.h>
++#include <stdio.h>
++
++int main(int argc, char *argv[])
++{
++    unsigned int i;
++    for (i = 0; i != 64; ++i)
++    {
++        printf(" [%2u]=%5u,", i, (unsigned int)(0.5 + (1/sqrt((i + 5)/4.0) * 65536.0)));
++        if (i % 4 == 3)
++            printf("\n");
++    }
++}
++#endif
++
++static const uint16_t sqrt_tab[64] = {
++    [ 0]=58617, [ 1]=53510, [ 2]=49541, [ 3]=46341,
++    [ 4]=43691, [ 5]=41449, [ 6]=39520, [ 7]=37837,
++    [ 8]=36353, [ 9]=35030, [10]=33843, [11]=32768,
++    [12]=31790, [13]=30894, [14]=30070, [15]=29309,
++    [16]=28602, [17]=27945, [18]=27330, [19]=26755,
++    [20]=26214, [21]=25705, [22]=25225, [23]=24770,
++    [24]=24339, [25]=23930, [26]=23541, [27]=23170,
++    [28]=22817, [29]=22479, [30]=22155, [31]=21845,
++    [32]=21548, [33]=21263, [34]=20988, [35]=20724,
++    [36]=20470, [37]=20225, [38]=19988, [39]=19760,
++    [40]=19539, [41]=19326, [42]=19119, [43]=18919,
++    [44]=18725, [45]=18536, [46]=18354, [47]=18176,
++    [48]=18004, [49]=17837, [50]=17674, [51]=17515,
++    [52]=17361, [53]=17211, [54]=17064, [55]=16921,
++    [56]=16782, [57]=16646, [58]=16514, [59]=16384,
++    [60]=16257, [61]=16134, [62]=16013, [63]=15895
++};
++#define SQRT_MAX (sizeof(sqrt_tab)/sizeof(sqrt_tab[0]) - 1)
++
++static bool cpy_fmt_limit_size(const display_desc_t * const dd,
++                           video_format_t * const dst,
++                           const video_format_t * const src)
++{
++    const unsigned int src_pel = src->i_visible_width * src->i_visible_height;
++
++    *dst = *src;
++
++    if (src_pel <= dd->max_pels)
++        return false;
++
++    // scaling factor sqrt(max_pel/cur_pel)
++    // sqrt done by lookup & 16 bit fixed-point maths - not exactly accurate but
++    // easily good enough & avoids floating point (which may be slow)
++    // src_pel > max_pel so n >= 0
++    // Rounding should be such that exact sqrts work and everything else rounds
++    // down
++    unsigned int n = ((src_pel * 4 - 1) / dd->max_pels) - 4;
++    unsigned int scale = sqrt_tab[n >= SQRT_MAX ? SQRT_MAX : n];
++
++    // Rescale width - rounding up to 16
++    unsigned int width = ((src->i_visible_width * scale + (16 << 16) - 1) >> 16) & ~15;
++    // Rescale height based on new width
++    unsigned int height = (src->i_visible_height * width + src->i_visible_width/2) / src->i_visible_width;
++
++//    fprintf(stderr, "%dx%d -> %dx%d\n", src->i_visible_width, src->i_visible_height, width, height);
++
++    dst->i_width          = width;
++    dst->i_visible_width  = width;
++    dst->i_height         = height;
++    dst->i_visible_height = height;
++    return true;
++}
++
++static void unload_display_module(vout_display_t * const x_vout)
++{
++    if (x_vout != NULL) {
++       if (x_vout->module != NULL) {
++            module_unneed(x_vout, x_vout->module);
++        }
++        vlc_object_release(x_vout);
++    }
++}
++
++static void CloseMmalX11(vlc_object_t *object)
++{
++    vout_display_t * const vd = (vout_display_t *)object;
++    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
++
++    msg_Dbg(vd, "<<< %s", __func__);
++
++    if (sys == NULL)
++        return;
++
++    unload_display_module(sys->x_desc.vout);
++
++    unload_display_module(sys->mmal_desc.vout);
++
++    free(sys);
++
++    msg_Dbg(vd, ">>> %s", __func__);
++}
++
++static void mmal_x11_event(vout_display_t * x_vd, int cmd, va_list args)
++{
++    vout_display_t * const vd = x_vd->owner.sys;
++#if TRACE_ALL
++    msg_Dbg(vd, "<<< %s (cmd=%d)", __func__, cmd);
++#endif
++
++    // Do not fall into the display assert if Invalid not supported
++    if (cmd == VOUT_DISPLAY_EVENT_PICTURES_INVALID &&
++            !vd->info.has_pictures_invalid)
++        return;
++
++    vd->owner.event(vd, cmd, args);
++}
++
++static vout_window_t * mmal_x11_window_new(vout_display_t * x_vd, unsigned type)
++{
++    vout_display_t * const vd = x_vd->owner.sys;
++#if TRACE_ALL
++    msg_Dbg(vd, "<<< %s (type=%d)", __func__, type);
++#endif
++    return vd->owner.window_new(vd, type);
++}
++
++static void mmal_x11_window_del(vout_display_t * x_vd, vout_window_t * win)
++{
++    vout_display_t * const vd = x_vd->owner.sys;
++#if TRACE_ALL
++    msg_Dbg(vd, "<<< %s", __func__);
++#endif
++    vd->owner.window_del(vd, win);
++}
++
++
++static int load_display_module(vout_display_t * const vd,
++                                display_desc_t * const dd,
++                                const char * const cap,
++                                const char * const module_name)
++{
++    vout_display_t * const x_vout = vlc_object_create(vd, sizeof(*x_vout));
++
++    dd->vout = NULL;
++    if (!x_vout)
++        return -1;
++
++    x_vout->owner.sys = vd;
++    x_vout->owner.event = mmal_x11_event;
++    x_vout->owner.window_new = mmal_x11_window_new;
++    x_vout->owner.window_del = mmal_x11_window_del;
++
++    x_vout->cfg    = vd->cfg;
++    x_vout->info   = vd->info;
++    cpy_fmt_limit_size(dd, &x_vout->source, &vd->source);
++    cpy_fmt_limit_size(dd, &x_vout->fmt,    &vd->fmt);
++
++    if ((x_vout->module = module_need(x_vout, cap, module_name, true)) == NULL)
++    {
++        msg_Err(vd, "Failed to open Xsplitter:%s module", module_name);
++        goto fail;
++    }
++
++    msg_Dbg(vd, "R/G/B: %08x/%08x/%08x", x_vout->fmt.i_rmask, x_vout->fmt.i_gmask, x_vout->fmt.i_bmask);
++
++    dd->vout = x_vout;
++    return 0;
++
++fail:
++    vlc_object_release(x_vout);
++    return -1;
++}
++
++
++/* Return a pointer over the current picture_pool_t* (mandatory).
++ *
++ * For performance reasons, it is best to provide at least count
++ * pictures but it is not mandatory.
++ * You can return NULL when you cannot/do not want to allocate
++ * pictures.
++ * The vout display module keeps the ownership of the pool and can
++ * destroy it only when closing or on invalid pictures control.
++ */
++static picture_pool_t * mmal_x11_pool(vout_display_t * vd, unsigned count)
++{
++    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
++    vout_display_t * const x_vd = sys->cur_desc->vout;
++#if TRACE_ALL
++    char buf0[5];
++    msg_Dbg(vd, "<<< %s (count=%d) %s:%dx%d->%s:%dx%d", __func__, count,
++            str_fourcc(buf0, vd->fmt.i_chroma),
++            vd->fmt.i_width, vd->fmt.i_height,
++            str_fourcc(buf0, x_vd->fmt.i_chroma),
++            x_vd->fmt.i_width, x_vd->fmt.i_height);
++#endif
++    picture_pool_t * pool = x_vd->pool(x_vd, count);
++#if TRACE_ALL
++    msg_Dbg(vd, ">>> %s: %p", __func__, pool);
++#endif
++    return pool;
++}
++
++/* Prepare a picture and an optional subpicture for display (optional).
++ *
++ * It is called before the next pf_display call to provide as much
++ * time as possible to prepare the given picture and the subpicture
++ * for display.
++ * You are guaranted that pf_display will always be called and using
++ * the exact same picture_t and subpicture_t.
++ * You cannot change the pixel content of the picture_t or of the
++ * subpicture_t.
++ */
++static void mmal_x11_prepare(vout_display_t * vd, picture_t * pic, subpicture_t * sub)
++{
++    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
++    vout_display_t * const x_vd = sys->cur_desc->vout;
++#if TRACE_ALL
++    msg_Dbg(vd, "<<< %s", __func__);
++#endif
++    if (x_vd->prepare)
++        x_vd->prepare(x_vd, pic, sub);
++}
++
++/* Display a picture and an optional subpicture (mandatory).
++ *
++ * The picture and the optional subpicture must be displayed as soon as
++ * possible.
++ * You cannot change the pixel content of the picture_t or of the
++ * subpicture_t.
++ *
++ * This function gives away the ownership of the picture and of the
++ * subpicture, so you must release them as soon as possible.
++ */
++static void mmal_x11_display(vout_display_t * vd, picture_t * pic, subpicture_t * sub)
++{
++    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
++    vout_display_t * const x_vd = sys->cur_desc->vout;
++
++#if TRACE_ALL
++    const bool is_mmal_pic = hw_mmal_pic_is_mmal(pic);
++    msg_Dbg(vd, "<<< %s: fmt: %dx%d/%dx%d, pic:%dx%d, pts=%lld, mmal=%d/%d", __func__, vd->fmt.i_width, vd->fmt.i_height, x_vd->fmt.i_width, x_vd->fmt.i_height, pic->format.i_width, pic->format.i_height, (long long)pic->date,
++            is_mmal_pic, sys->use_mmal);
++#endif
++
++    if (x_vd->fmt.i_chroma != pic->format.i_chroma ||
++        x_vd->fmt.i_width  != pic->format.i_width ||
++        x_vd->fmt.i_height != pic->format.i_height)
++    {
++        msg_Dbg(vd, "%s: Picture dropped", __func__);
++        picture_Release(pic);
++        if (sub != NULL)
++            subpicture_Delete(sub);
++        return;
++    }
++
++    x_vd->display(x_vd, pic, sub);
++}
++
++
++static int vout_display_Control(const display_desc_t * const dd, int query, ...)
++{
++    va_list args;
++    int result;
++
++    va_start(args, query);
++    result = dd->vout->control(dd->vout, query, args);
++    va_end(args);
++
++    return result;
++}
++
++static bool want_mmal_vout(vout_display_t * const vd, const mmal_x11_sys_t * const sys)
++{
++    return sys->mmal_desc.vout != NULL &&
++        (sys->x_desc.vout == NULL || var_InheritBool(vd, "fullscreen"));
++}
++
++static inline int
++up_rv(const int a, const int b)
++{
++    return a != 0 ? a : b;
++}
++
++static int
++reset_pictures(vout_display_t * const vd, const display_desc_t * const desc)
++{
++    int rv = 0;
++    VLC_UNUSED(vd);
++    if (desc->vout)
++    {
++        // If the display doesn't have has_pictures_invalid then it doesn't
++        // expect RESET_PICTURES
++        if (desc->vout->info.has_pictures_invalid)
++            vout_display_Control(desc, VOUT_DISPLAY_RESET_PICTURES);
++    }
++    return rv;
++}
++
++static int
++replay_controls(vout_display_t * const vd, const display_desc_t * const desc, const int32_t changed)
++{
++    if ((changed & (1 << VOUT_DISPLAY_CHANGE_DISPLAY_FILLED)) != 0)
++        vout_display_Control(desc, VOUT_DISPLAY_CHANGE_DISPLAY_FILLED, vd->cfg);
++    if ((changed & (1 << VOUT_DISPLAY_CHANGE_ZOOM)) != 0)
++        vout_display_Control(desc, VOUT_DISPLAY_CHANGE_ZOOM, vd->cfg);
++    if ((changed & ((1 << VOUT_DISPLAY_CHANGE_SOURCE_CROP) |
++                    (1 << VOUT_DISPLAY_CHANGE_SOURCE_ASPECT))) != 0)
++        cpy_fmt_limit_size(desc, &desc->vout->source, &vd->source);
++    if ((changed & (1 << VOUT_DISPLAY_CHANGE_SOURCE_ASPECT)) != 0)
++        vout_display_Control(desc, VOUT_DISPLAY_CHANGE_SOURCE_ASPECT);
++    if ((changed & (1 << VOUT_DISPLAY_CHANGE_SOURCE_CROP)) != 0)
++        vout_display_Control(desc, VOUT_DISPLAY_CHANGE_SOURCE_CROP);
++    if ((changed & (1 << VOUT_DISPLAY_CHANGE_VIEWPOINT)) != 0)
++        vout_display_Control(desc, VOUT_DISPLAY_CHANGE_VIEWPOINT, vd->cfg);
++    return 0;
++}
++
++/* Control on the module (mandatory) */
++static int mmal_x11_control(vout_display_t * vd, int ctl, va_list va)
++{
++    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
++    display_desc_t *x_desc = sys->cur_desc;
++    int rv;
++#if TRACE_ALL
++    msg_Dbg(vd, "<<< %s[%d] (ctl=%d)", __func__, sys->use_mmal, ctl);
++#endif
++    // Remember what we've told this vd - unwanted ctls ignored on replay
++    if (ctl >= 0 && ctl <= 31)
++        sys->changed |= (1 << ctl);
++
++    switch (ctl) {
++        case VOUT_DISPLAY_CHANGE_DISPLAY_SIZE:
++        {
++            const vout_display_cfg_t * const cfg = va_arg(va, const vout_display_cfg_t *);
++            const bool want_mmal = want_mmal_vout(vd, sys);
++            const bool swap_vout = (sys->use_mmal != want_mmal);
++            display_desc_t * const new_desc = want_mmal ? &sys->mmal_desc : &sys->x_desc;
++
++            msg_Dbg(vd, "Change size: %d, %d: mmal_vout=%p, want_mmal=%d, fs=%d",
++                    cfg->display.width, cfg->display.height, sys->mmal_desc.vout, want_mmal,
++                    var_InheritBool(vd, "fullscreen"));
++
++            // Repeat any control calls that we sent to the previous vd
++            if (swap_vout && sys->changed != 0) {
++                const uint32_t changed = sys->changed;
++                sys->changed = 0;
++                replay_controls(vd, new_desc, changed);
++            }
++
++            if (swap_vout) {
++                if (sys->use_mmal) {
++                    vout_display_Control(x_desc, VOUT_DISPLAY_CHANGE_MMAL_HIDE);
++                }
++                vout_display_SendEventPicturesInvalid(vd);
++            }
++
++            rv = vout_display_Control(new_desc, ctl, cfg);
++            if (rv == VLC_SUCCESS) {
++                vd->fmt       = new_desc->vout->fmt;
++                sys->cur_desc = new_desc;
++                sys->use_mmal = want_mmal;
++            }
++
++
++            break;
++        }
++
++        case VOUT_DISPLAY_RESET_PICTURES:
++            {
++                char dbuf0[5], dbuf1[5], dbuf2[5];
++                msg_Dbg(vd, "<<< %s: Pic reset: fmt: %s,%dx%d<-%s,%dx%d, source: %s,%dx%d/%dx%d", __func__,
++                        str_fourcc(dbuf0, vd->fmt.i_chroma), vd->fmt.i_width, vd->fmt.i_height,
++                        str_fourcc(dbuf1, x_desc->vout->fmt.i_chroma), x_desc->vout->fmt.i_width, x_desc->vout->fmt.i_height,
++                        str_fourcc(dbuf2, vd->source.i_chroma), vd->source.i_width, vd->source.i_height, x_desc->vout->source.i_width,
++                        x_desc->vout->source.i_height);
++            }
++            rv = reset_pictures(vd, &sys->x_desc);
++            rv = up_rv(rv, reset_pictures(vd, &sys->mmal_desc));
++
++            vd->fmt = x_desc->vout->fmt;
++            break;
++
++        case VOUT_DISPLAY_CHANGE_SOURCE_ASPECT:
++        case VOUT_DISPLAY_CHANGE_SOURCE_CROP:
++            cpy_fmt_limit_size(x_desc, &x_desc->vout->source, &vd->source);
++
++            /* FALLTHRU */
++        default:
++            rv = x_desc->vout->control(x_desc->vout, ctl, va);
++//            vd->fmt  = x_vd->fmt;
++            break;
++    }
++#if TRACE_ALL
++    msg_Dbg(vd, ">>> %s (rv=%d)", __func__, rv);
++#endif
++    return rv;
++}
++
++#define DO_MANAGE 0
++
++#if DO_MANAGE
++/* Manage pending event (optional) */
++static void mmal_x11_manage(vout_display_t * vd)
++{
++    mmal_x11_sys_t * const sys = (mmal_x11_sys_t *)vd->sys;
++    vout_display_t * const x_vd = sys->cur_desc->vout;
++#if TRACE_ALL
++    msg_Dbg(vd, "<<< %s", __func__);
++#endif
++    x_vd->manage(x_vd);
++}
++#endif
++
++static int OpenMmalX11(vlc_object_t *object)
++{
++    vout_display_t * const vd = (vout_display_t *)object;
++    mmal_x11_sys_t * const sys = calloc(1, sizeof(*sys));
++    int ret = VLC_SUCCESS;
++
++    if (sys == NULL) {
++        return VLC_EGENERIC;
++    }
++    vd->sys = (vout_display_sys_t *)sys;
++
++    vd->info = (vout_display_info_t){
++        .is_slow = false,
++        .has_double_click = false,
++        .needs_hide_mouse = false,
++        .has_pictures_invalid = true,
++        .subpicture_chromas = NULL
++    };
++
++    {
++        char dbuf0[5];
++        msg_Dbg(vd, ">>> %s: %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d", __func__,
++                str_fourcc(dbuf0, vd->fmt.i_chroma),
++                vd->fmt.i_width,         vd->fmt.i_height,
++                vd->fmt.i_x_offset,      vd->fmt.i_y_offset,
++                vd->fmt.i_visible_width, vd->fmt.i_visible_height,
++                vd->fmt.i_sar_num,       vd->fmt.i_sar_den);
++    }
++
++    sys->x_desc.max_pels = MAX_GL_PELS;
++    sys->mmal_desc.max_pels = MAX_MMAL_PELS;
++
++    if (load_display_module(vd, &sys->x_desc, "vout display", "opengles2") == 0)
++    {
++        msg_Dbg(vd, "Opengles2 output found");
++    }
++    else
++    {
++        sys->x_desc.max_pels = MAX_MMAL_PELS;
++        if (load_display_module(vd, &sys->x_desc, "vout display", "xcb_x11") == 0)
++            msg_Dbg(vd, "X11 XCB output found");
++    }
++
++    if ((load_display_module(vd, &sys->mmal_desc, "vout display", "mmal_vout")) == 0)
++        msg_Dbg(vd, "MMAL output found");
++
++    if (sys->mmal_desc.vout == NULL && sys->x_desc.vout == NULL) {
++        char dbuf0[5], dbuf1[5];
++        msg_Info(vd, "No valid output found for vout (%s/%s)", str_fourcc(dbuf0, vd->fmt.i_chroma), str_fourcc(dbuf1, vd->source.i_chroma));
++        goto fail;
++    }
++
++    vd->pool = mmal_x11_pool;
++    vd->prepare = mmal_x11_prepare;
++    vd->display = mmal_x11_display;
++    vd->control = mmal_x11_control;
++#if DO_MANAGE
++    vd->manage = mmal_x11_manage;
++#endif
++
++    if (want_mmal_vout(vd, sys)) {
++        sys->cur_desc = &sys->mmal_desc;
++        sys->use_mmal = true;
++    }
++    else {
++        sys->cur_desc = &sys->x_desc;
++        sys->use_mmal = false;
++    }
++
++    if (sys->mmal_desc.vout == NULL || sys->x_desc.vout == NULL) {
++        vd->info = sys->cur_desc->vout->info;
++        vd->info.has_pictures_invalid = true;  // Should make this unwanted
++    }
++    else {
++        // We have both - construct a combination
++        vd->info = (vout_display_info_t){
++            .is_slow              = false,
++            .has_double_click     = sys->mmal_desc.vout->info.has_double_click || sys->x_desc.vout->info.has_double_click,
++            .needs_hide_mouse     = sys->mmal_desc.vout->info.needs_hide_mouse || sys->x_desc.vout->info.needs_hide_mouse,
++            .has_pictures_invalid = true,
++        };
++        // Construct intersection of subpicture chromas
++        // sys calloced so no need to add the terminating zero
++        if (sys->mmal_desc.vout->info.subpicture_chromas != NULL && sys->x_desc.vout->info.subpicture_chromas != NULL) {
++            unsigned int n = 0;
++            // N^2 - fix if we ever care
++            for (const vlc_fourcc_t * p1 = sys->mmal_desc.vout->info.subpicture_chromas; *p1 != 0 && n != 15; ++p1) {
++                for (const vlc_fourcc_t * p2 = sys->x_desc.vout->info.subpicture_chromas; *p2 != 0; ++p2) {
++                    if (*p1 == *p2) {
++                        sys->subpicture_chromas[n++] = *p1;
++                        break;
++                    }
++                }
++            }
++            if (n != 0)
++                vd->info.subpicture_chromas = sys->subpicture_chromas;
++        }
++    }
++    vd->fmt  = sys->cur_desc->vout->fmt;
++
++#if TRACE_ALL
++    {
++        char dbuf0[5];
++        msg_Dbg(vd, ">>> %s: (%s) %s,%dx%d [(%d,%d) %d/%d] sar:%d/%d", __func__,
++                module_get_name(sys->cur_desc->vout->module, false),
++                str_fourcc(dbuf0, vd->fmt.i_chroma),
++                vd->fmt.i_width,         vd->fmt.i_height,
++                vd->fmt.i_x_offset,      vd->fmt.i_y_offset,
++                vd->fmt.i_visible_width, vd->fmt.i_visible_height,
++                vd->fmt.i_sar_num,       vd->fmt.i_sar_den);
++    }
++#endif
++    return VLC_SUCCESS;
++
++fail:
++    CloseMmalX11(VLC_OBJECT(vd));
++    return ret == VLC_SUCCESS ? VLC_EGENERIC : ret;
++}
++
++
++
++
++vlc_module_begin()
++    set_shortname(N_("MMAL x11 splitter"))
++    set_description(N_("MMAL x11 splitter for Raspberry Pi"))
++    set_capability("vout display", 300)  // Between GLES & GL
++    add_shortcut("mmal_x11")
++    set_category( CAT_VIDEO )
++    set_subcategory( SUBCAT_VIDEO_VOUT )
++    set_callbacks(OpenMmalX11, CloseMmalX11)
++vlc_module_end()
++
+--- a/modules/video_output/opengl/egl.c
++++ b/modules/video_output/opengl/egl.c
+@@ -43,6 +43,8 @@
+ # include "../android/utils.h"
+ #endif
+
++#define REQUIRE_DMA_BUF_IMPORT 1
++
+ typedef struct vlc_gl_sys_t
+ {
+     EGLDisplay display;
+@@ -355,6 +357,14 @@ static int Open (vlc_object_t *obj, cons
+         goto error;
+     }
+
++#if REQUIRE_DMA_BUF_IMPORT
++    if (!CheckToken(ext, "EGL_EXT_image_dma_buf_import"))
++    {
++        msg_Dbg(obj, "No dma_buf_import - fall back to X");
++        goto error;
++    }
++#endif
++
+     const EGLint conf_attr[] = {
+         EGL_RED_SIZE, 5,
+         EGL_GREEN_SIZE, 5,
+--- a/src/input/decoder.c
++++ b/src/input/decoder.c
+@@ -1995,6 +1995,7 @@ void input_DecoderDelete( decoder_t *p_d
+     vlc_mutex_lock( &p_owner->lock );
+     p_owner->b_waiting = false;
+     vlc_cond_signal( &p_owner->wait_request );
++    vlc_mutex_unlock( &p_owner->lock );
+
+     /* If the video output is paused or slow, or if the picture pool size was
+      * under-estimated (e.g. greedy video filter, buggy decoder...), the
+@@ -2005,7 +2006,6 @@ void input_DecoderDelete( decoder_t *p_d
+      * worker threads (if any) and the decoder thread to terminate. */
+     if( p_owner->p_vout != NULL )
+         vout_Cancel( p_owner->p_vout, true );
+-    vlc_mutex_unlock( &p_owner->lock );
+
+     vlc_join( p_owner->thread, NULL );
+
+--- a/src/misc/fourcc.c
++++ b/src/misc/fourcc.c
+@@ -755,8 +755,13 @@ static const struct
+     { { VLC_CODEC_VDPAU_VIDEO_420, VLC_CODEC_VDPAU_VIDEO_422,
+         VLC_CODEC_VDPAU_VIDEO_444, VLC_CODEC_VDPAU_OUTPUT },
+                                                FAKE_FMT() },
+-    { { VLC_CODEC_ANDROID_OPAQUE, VLC_CODEC_MMAL_OPAQUE,
+-        VLC_CODEC_D3D9_OPAQUE,    VLC_CODEC_D3D11_OPAQUE },
++    { { VLC_CODEC_ANDROID_OPAQUE },            FAKE_FMT() },
++    { { VLC_CODEC_MMAL_OPAQUE, VLC_CODEC_MMAL_ZC_SAND30   },
++                                               FAKE_FMT() },
++    { { VLC_CODEC_MMAL_ZC_I420,   VLC_CODEC_MMAL_ZC_SAND8,
++        VLC_CODEC_MMAL_ZC_SAND10, VLC_CODEC_MMAL_ZC_RGB32 },
++                                               FAKE_FMT() },
++    { { VLC_CODEC_D3D9_OPAQUE,    VLC_CODEC_D3D11_OPAQUE },
+                                                FAKE_FMT() },
+     { { VLC_CODEC_D3D11_OPAQUE_10B, VLC_CODEC_D3D9_OPAQUE_10B },
+                                                FAKE_FMT() },
+--- a/src/misc/picture.c
++++ b/src/misc/picture.c
+@@ -365,10 +365,30 @@ void picture_CopyProperties( picture_t *
+     p_dst->b_top_field_first = p_src->b_top_field_first;
+ }
+
++static inline bool is_zc_chroma(const vlc_fourcc_t i_chroma)
++{
++    return i_chroma == VLC_CODEC_MMAL_OPAQUE ||
++        i_chroma == VLC_CODEC_MMAL_ZC_I420 ||
++        i_chroma == VLC_CODEC_MMAL_ZC_RGB32 ||
++        i_chroma == VLC_CODEC_MMAL_ZC_SAND10 ||
++        i_chroma == VLC_CODEC_MMAL_ZC_SAND30 ||
++        i_chroma == VLC_CODEC_MMAL_ZC_SAND8;
++}
++
+ void picture_CopyPixels( picture_t *p_dst, const picture_t *p_src )
+ {
+-    for( int i = 0; i < p_src->i_planes ; i++ )
+-        plane_CopyPixels( p_dst->p+i, p_src->p+i );
++    if( is_zc_chroma(p_src->format.i_chroma) )
++    {
++        assert(p_dst->i_planes == 0);
++        p_dst->i_planes = p_src->i_planes;
++        for( int i = 0; i < p_src->i_planes; i++ )
++            p_dst->p[i] = p_src->p[i];
++    }
++    else
++    {
++        for( int i = 0; i < p_src->i_planes; i++ )
++            plane_CopyPixels( p_dst->p+i, p_src->p+i );
++    }
+
+     assert( p_dst->context == NULL );
+
+--- a/src/video_output/video_output.c
++++ b/src/video_output/video_output.c
+@@ -964,6 +964,17 @@ static picture_t *ConvertRGB32AndBlend(v
+     return NULL;
+ }
+
++
++static inline bool is_zc_chroma(const vlc_fourcc_t i_chroma)
++{
++    return i_chroma == VLC_CODEC_MMAL_OPAQUE ||
++        i_chroma == VLC_CODEC_MMAL_ZC_I420 ||
++        i_chroma == VLC_CODEC_MMAL_ZC_RGB32 ||
++        i_chroma == VLC_CODEC_MMAL_ZC_SAND10 ||
++        i_chroma == VLC_CODEC_MMAL_ZC_SAND30 ||
++        i_chroma == VLC_CODEC_MMAL_ZC_SAND8;
++}
++
+ static int ThreadDisplayRenderPicture(vout_thread_t *vout, bool is_forced)
+ {
+     vout_thread_sys_t *sys = vout->p;
+@@ -1098,7 +1109,7 @@ static int ThreadDisplayRenderPicture(vo
+     }
+
+     assert(vout_IsDisplayFiltered(vd) == !sys->display.use_dr);
+-    if (sys->display.use_dr && !is_direct) {
++    if (sys->display.use_dr && !is_direct && !is_zc_chroma(todisplay->format.i_chroma)) {
+         picture_t *direct = NULL;
+         if (likely(vout->p->display_pool != NULL))
+             direct = picture_pool_Get(vout->p->display_pool);
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0004-mmal_exit_fix.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0004-mmal_exit_fix.patch
new file mode 100644
index 0000000..b8ac0a0
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0004-mmal_exit_fix.patch
@@ -0,0 +1,16 @@
+Upstream-status: Pending
+
+--- a/bin/vlc.c
++++ b/bin/vlc.c
+@@ -106,7 +106,10 @@ static void vlc_kill (void *data)
+ static void exit_timeout (int signum)
+ {
+     (void) signum;
+-    signal (SIGINT, SIG_DFL);
++// This doesn't seem to be strong enough to reliably kill us if we fail to exit
++// in a timely fashion - so upgrade to _exit().
++//    signal (SIGINT, SIG_DFL);
++    _exit(0);
+ }
+
+ /*****************************************************************************
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0005-mmal_chain.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0005-mmal_chain.patch
new file mode 100644
index 0000000..abd31df
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0005-mmal_chain.patch
@@ -0,0 +1,16 @@
+Upstream-status: Pending
+
+--- a/modules/video_chroma/chain.c
++++ b/modules/video_chroma/chain.c
+@@ -280,8 +280,9 @@ static int BuildTransformChain( filter_t
+         return VLC_SUCCESS;
+
+     /* Lets try resize+chroma first, then transform */
+-    msg_Dbg( p_filter, "Trying to build chroma+resize" );
+-    EsFormatMergeSize( &fmt_mid, &p_filter->fmt_out, &p_filter->fmt_in );
++    msg_Dbg( p_filter, "Trying to build chroma+resize, then transform" );
++    es_format_Copy( &fmt_mid, &p_filter->fmt_out );
++    video_format_TransformTo(&fmt_mid.video, p_filter->fmt_in.video.orientation);
+     i_ret = CreateChain( p_filter, &fmt_mid );
+     es_format_Clean( &fmt_mid );
+     if( i_ret == VLC_SUCCESS )
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0006-Use-packageconfig-to-detect-mmal-support.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0006-Use-packageconfig-to-detect-mmal-support.patch
new file mode 100644
index 0000000..3a2ac23
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0006-Use-packageconfig-to-detect-mmal-support.patch
@@ -0,0 +1,56 @@
+From: Vincent Davis Jr <vince@underview.tech>
+Date: Fri, 07 Jan 2022 07:10:47 PM CST
+Subject: [PATCH] Use packageconfig to acquire mmal flags
+
+Need to use userland graphics libraries package files as it's best to not assume /opt/vc is where
+all libs and headers are installed per distro. Also, needed to include $BCMHOST_MMAL_LIBS variable as
+AC_CHECK_LIB(bcm_host) fails to find `vc_tv_unregister_callback_full`. Adding $BCMHOST_MMAL_LIBS uses all
+libs inside bcm_host,mmal,vcsm,openmaxil .pc files when checking for `vc_tv_unregister_callback_full`
+function.
+
+Upstream-status: Pending
+
+Signed-off-by: Vincent Davis Jr <vince@underview.tech>
+diff --git a/configure.ac b/configure.ac
+index bff220510..4d487409d 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -3483,23 +3483,25 @@ AC_ARG_ENABLE(mmal_avcodec,
+     [Use MMAL enabled avcodec libs (default disable)]))
+ if test "${enable_mmal}" != "no"; then
+   VLC_SAVE_FLAGS
+-  LDFLAGS="${LDFLAGS} -L/opt/vc/lib -lvchostif"
+-  CPPFLAGS="${CPPFLAGS} -isystem /opt/vc/include -isystem /opt/vc/include/interface/vcos/pthreads -isystem /opt/vc/include/interface/vmcs_host/linux"
+-  AC_CHECK_HEADERS(interface/mmal/mmal.h,
+-    [ AC_CHECK_LIB(bcm_host, vc_tv_unregister_callback_full, [
++  PKG_CHECK_MODULES(BCMHOST_MMAL, [bcm_host mmal vcsm openmaxil egl], [
++    HAVE_MMAL=yes
++    AC_CHECK_HEADERS(interface/mmal/mmal.h,
++      [ AC_CHECK_LIB(bcm_host $BCMHOST_MMAL_LIBS, vc_tv_unregister_callback_full, [
+         have_mmal="yes"
+-        VLC_ADD_PLUGIN([mmal])
+-        VLC_ADD_LDFLAGS([mmal],[ -L/opt/vc/lib ])
+-        VLC_ADD_CFLAGS([mmal],[ -isystem /opt/vc/include -isystem /opt/vc/include/interface/vcos/pthreads -isystem /opt/vc/include/interface/vmcs_host/linux ])
+-        VLC_ADD_LIBS([mmal],[ -lbcm_host -lmmal -lmmal_core -lmmal_components -lmmal_util -lvchostif -lvchiq_arm -lvcsm ]) ], [
++        VLC_ADD_PLUGIN([bcm_host mmal vcsm openmaxil egl])
++        VLC_ADD_CFLAGS([bcm_host mmal vcsm openmaxil egl],[$BCMHOST_MMAL_CFLAGS])
++        VLC_ADD_LIBS([bcm_host mmal vcsm openmaxil egl],[$BCMHOST_MMAL_LIBS]) ], [
+           AS_IF([test "${enable_mmal}" = "yes"],
+             [ AC_MSG_ERROR([Cannot find bcm library...]) ],
+             [ AC_MSG_WARN([Cannot find bcm library...]) ])
+-          ],
+-        [])
+-    ] , [ AS_IF([test "${enable_mmal}" = "yes"],
+-      [ AC_MSG_ERROR([Cannot find development headers for mmal...]) ],
+-      [ AC_MSG_WARN([Cannot find development headers for mmal...]) ]) ])
++        ],[])
++      ],[ AS_IF([test "${enable_mmal}" = "yes"],
++        [ AC_MSG_ERROR([Cannot find development headers for mmal...]) ],
++        [ AC_MSG_WARN([Cannot find development headers for mmal...]) ]) ])
++  ],:[
++    AC_MSG_WARN([${BCMHOST_PKG_ERRORS}: userland graphics not available.])
++    HAVE_MMAL=no
++  ])
+   VLC_RESTORE_FLAGS
+ fi
+ AM_CONDITIONAL([HAVE_MMAL], [test "${have_mmal}" = "yes"])
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0007-use-vorbisidec.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0007-use-vorbisidec.patch
new file mode 100644
index 0000000..18bdc4d
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0007-use-vorbisidec.patch
@@ -0,0 +1,18 @@
+* tremor provides libvorbisidec, use it instead of libvorbisdec
+
+Upstream-status: Pending
+
+Signed-off-by: Tim Orling <TicoTimo@gmail.com>
+
+Index: vlc-2.2.1/modules/codec/Makefile.am
+===================================================================
+--- vlc-2.2.1.orig/modules/codec/Makefile.am
++++ vlc-2.2.1/modules/codec/Makefile.am
+@@ -234,7 +234,7 @@ codec_LTLIBRARIES += $(LTLIBtheora)
+ libtremor_plugin_la_SOURCES = codec/vorbis.c
+ libtremor_plugin_la_CPPFLAGS = $(AM_CPPFLAGS) -DMODULE_NAME_IS_tremor
+ libtremor_plugin_la_LDFLAGS = $(AM_LDFLAGS) -rpath '$(codecdir)'
+-libtremor_plugin_la_LIBADD = -lvorbisdec -logg
++libtremor_plugin_la_LIBADD = -lvorbisidec -logg
+ EXTRA_LTLIBRARIES += libtremor_plugin.la
+ codec_LTLIBRARIES += $(LTLIBtremor)
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0008-fix-luaL-checkint.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0008-fix-luaL-checkint.patch
new file mode 100644
index 0000000..96b5d2d
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0008-fix-luaL-checkint.patch
@@ -0,0 +1,233 @@
+* luaL_checkint and luaL_optint were deprecated in lua 5.3
+* replacement functions are luaL_checkinteger and luaL_optinteger
+
+Upstream-status: Pending
+
+Signed-off-by: Tim Orling <TicoTimo@gmail.com>
+
+--- a/modules/lua/demux.c
++++ b/modules/lua/demux.c
+@@ -52,7 +52,7 @@ struct vlclua_playlist
+ static int vlclua_demux_peek( lua_State *L )
+ {
+     stream_t *s = (stream_t *)vlclua_get_this(L);
+-    int n = luaL_checkint( L, 1 );
++    int n = luaL_checkinteger( L, 1 );
+     const uint8_t *p_peek;
+ 
+     ssize_t val = vlc_stream_Peek(s->p_source, &p_peek, n);
+@@ -66,7 +66,7 @@ static int vlclua_demux_peek( lua_State
+ static int vlclua_demux_read( lua_State *L )
+ {
+     stream_t *s = (stream_t *)vlclua_get_this(L);
+-    int n = luaL_checkint( L, 1 );
++    int n = luaL_checkinteger( L, 1 );
+     char *buf = malloc(n);
+ 
+     if (buf != NULL)
+--- a/modules/lua/libs/net.c
++++ b/modules/lua/libs/net.c
+@@ -179,7 +179,7 @@ static int vlclua_net_listen_tcp( lua_St
+ {
+     vlc_object_t *p_this = vlclua_get_this( L );
+     const char *psz_host = luaL_checkstring( L, 1 );
+-    int i_port = luaL_checkint( L, 2 );
++    int i_port = luaL_checkinteger( L, 2 );
+     int *pi_fd = net_ListenTCP( p_this, psz_host, i_port );
+     if( pi_fd == NULL )
+         return luaL_error( L, "Cannot listen on %s:%d", psz_host, i_port );
+@@ -251,7 +251,7 @@ static int vlclua_net_connect_tcp( lua_S
+ {
+     vlc_object_t *p_this = vlclua_get_this( L );
+     const char *psz_host = luaL_checkstring( L, 1 );
+-    int i_port = luaL_checkint( L, 2 );
++    int i_port = luaL_checkinteger( L, 2 );
+     int i_fd = net_ConnectTCP( p_this, psz_host, i_port );
+     lua_pushinteger( L, vlclua_fd_map_safe( L, i_fd ) );
+     return 1;
+@@ -259,14 +259,14 @@ static int vlclua_net_connect_tcp( lua_S
+ 
+ static int vlclua_net_close( lua_State *L )
+ {
+-    int i_fd = luaL_checkint( L, 1 );
++    int i_fd = luaL_checkinteger( L, 1 );
+     vlclua_fd_unmap_safe( L, i_fd );
+     return 0;
+ }
+ 
+ static int vlclua_net_send( lua_State *L )
+ {
+-    int fd = vlclua_fd_get( L, luaL_checkint( L, 1 ) );
++    int fd = vlclua_fd_get( L, luaL_checkinteger( L, 1 ) );
+     size_t i_len;
+     const char *psz_buffer = luaL_checklstring( L, 2, &i_len );
+ 
+@@ -278,7 +278,7 @@ static int vlclua_net_send( lua_State *L
+ 
+ static int vlclua_net_recv( lua_State *L )
+ {
+-    int fd = vlclua_fd_get( L, luaL_checkint( L, 1 ) );
++    int fd = vlclua_fd_get( L, luaL_checkinteger( L, 1 ) );
+     size_t i_len = (size_t)luaL_optinteger( L, 2, 1 );
+     char psz_buffer[i_len];
+ 
+@@ -312,7 +312,7 @@ static int vlclua_net_poll( lua_State *L
+     lua_pushnil( L );
+     for( int i = 0; lua_next( L, 1 ); i++ )
+     {
+-        luafds[i] = luaL_checkint( L, -2 );
++        luafds[i] = luaL_checkinteger( L, -2 );
+         p_fds[i].fd = vlclua_fd_get( L, luafds[i] );
+         p_fds[i].events = luaL_checkinteger( L, -1 );
+         p_fds[i].events &= POLLIN | POLLOUT | POLLPRI;
+@@ -360,7 +360,7 @@ static int vlclua_fd_open( lua_State *L
+ #ifndef _WIN32
+ static int vlclua_fd_write( lua_State *L )
+ {
+-    int fd = vlclua_fd_get( L, luaL_checkint( L, 1 ) );
++    int fd = vlclua_fd_get( L, luaL_checkinteger( L, 1 ) );
+     size_t i_len;
+     const char *psz_buffer = luaL_checklstring( L, 2, &i_len );
+ 
+@@ -371,7 +371,7 @@ static int vlclua_fd_write( lua_State *L
+ 
+ static int vlclua_fd_read( lua_State *L )
+ {
+-    int fd = vlclua_fd_get( L, luaL_checkint( L, 1 ) );
++    int fd = vlclua_fd_get( L, luaL_checkinteger( L, 1 ) );
+     size_t i_len = (size_t)luaL_optinteger( L, 2, 1 );
+     char psz_buffer[i_len];
+ 
+--- a/modules/lua/libs/osd.c
++++ b/modules/lua/libs/osd.c
+@@ -154,7 +154,7 @@ static int vlc_osd_slider_type_from_stri
+ 
+ static int vlclua_osd_slider( lua_State *L )
+ {
+-    int i_position = luaL_checkint( L, 1 );
++    int i_position = luaL_checkinteger( L, 1 );
+     const char *psz_type = luaL_checkstring( L, 2 );
+     int i_type = vlc_osd_slider_type_from_string( psz_type );
+     int i_chan = (int)luaL_optinteger( L, 3, VOUT_SPU_CHANNEL_OSD );
+@@ -198,7 +198,7 @@ static int vlclua_spu_channel_register(
+ 
+ static int vlclua_spu_channel_clear( lua_State *L )
+ {
+-    int i_chan = luaL_checkint( L, 1 );
++    int i_chan = luaL_checkinteger( L, 1 );
+     input_thread_t *p_input = vlclua_get_input_internal( L );
+     if( !p_input )
+         return luaL_error( L, "Unable to find input." );
+--- a/modules/lua/libs/playlist.c
++++ b/modules/lua/libs/playlist.c
+@@ -69,7 +69,7 @@ static int vlclua_playlist_next( lua_Sta
+ 
+ static int vlclua_playlist_skip( lua_State * L )
+ {
+-    int i_skip = luaL_checkint( L, 1 );
++    int i_skip = luaL_checkinteger( L, 1 );
+     playlist_t *p_playlist = vlclua_get_playlist_internal( L );
+     playlist_Skip( p_playlist, i_skip );
+     return 0;
+@@ -127,7 +127,7 @@ static int vlclua_playlist_random( lua_S
+ 
+ static int vlclua_playlist_gotoitem( lua_State * L )
+ {
+-    int i_id = luaL_checkint( L, 1 );
++    int i_id = luaL_checkinteger( L, 1 );
+     playlist_t *p_playlist = vlclua_get_playlist_internal( L );
+     PL_LOCK;
+     playlist_ViewPlay( p_playlist, NULL,
+@@ -138,7 +138,7 @@ static int vlclua_playlist_gotoitem( lua
+ 
+ static int vlclua_playlist_delete( lua_State * L )
+ {
+-    int i_id = luaL_checkint( L, 1 );
++    int i_id = luaL_checkinteger( L, 1 );
+     playlist_t *p_playlist = vlclua_get_playlist_internal( L );
+ 
+     PL_LOCK;
+@@ -152,8 +152,8 @@ static int vlclua_playlist_delete( lua_S
+ 
+ static int vlclua_playlist_move( lua_State * L )
+ {
+-    int i_item = luaL_checkint( L, 1 );
+-    int i_target = luaL_checkint( L, 2 );
++    int i_item = luaL_checkinteger( L, 1 );
++    int i_target = luaL_checkinteger( L, 2 );
+     playlist_t *p_playlist = vlclua_get_playlist_internal( L );
+     PL_LOCK;
+     playlist_item_t *p_item = playlist_ItemGetById( p_playlist, i_item );
+--- a/modules/lua/libs/stream.c
++++ b/modules/lua/libs/stream.c
+@@ -123,7 +123,7 @@ static int vlclua_stream_read( lua_State
+ {
+     int i_read;
+     stream_t **pp_stream = (stream_t **)luaL_checkudata( L, 1, "stream" );
+-    int n = luaL_checkint( L, 2 );
++    int n = luaL_checkinteger( L, 2 );
+     uint8_t *p_read = malloc( n );
+     if( !p_read ) return vlclua_error( L );
+ 
+--- a/modules/lua/libs/volume.c
++++ b/modules/lua/libs/volume.c
+@@ -48,7 +48,7 @@
+ static int vlclua_volume_set( lua_State *L )
+ {
+     playlist_t *p_this = vlclua_get_playlist_internal( L );
+-    int i_volume = luaL_checkint( L, 1 );
++    int i_volume = luaL_checkinteger( L, 1 );
+     if( i_volume < 0 )
+         i_volume = 0;
+     int i_ret = playlist_VolumeSet( p_this, i_volume/(float)AOUT_VOLUME_DEFAULT );
+--- a/modules/lua/libs/dialog.c
++++ b/modules/lua/libs/dialog.c
+@@ -382,7 +382,7 @@ static int lua_GetDialogUpdate( lua_Stat
+     /* Read entry in the Lua registry */
+     lua_pushlightuserdata( L, (void*) &key_update );
+     lua_gettable( L, LUA_REGISTRYINDEX );
+-    return luaL_checkint( L, -1 );
++    return luaL_checkinteger( L, -1 );
+ }
+ 
+ /** Manually update a dialog
+@@ -573,22 +573,22 @@ static int vlclua_create_widget_inner( l
+ 
+     /* Set common arguments: col, row, hspan, vspan, width, height */
+     if( lua_isnumber( L, arg ) )
+-        p_widget->i_column = luaL_checkint( L, arg );
++        p_widget->i_column = luaL_checkinteger( L, arg );
+     else goto end_of_args;
+     if( lua_isnumber( L, ++arg ) )
+-        p_widget->i_row = luaL_checkint( L, arg );
++        p_widget->i_row = luaL_checkinteger( L, arg );
+     else goto end_of_args;
+     if( lua_isnumber( L, ++arg ) )
+-        p_widget->i_horiz_span = luaL_checkint( L, arg );
++        p_widget->i_horiz_span = luaL_checkinteger( L, arg );
+     else goto end_of_args;
+     if( lua_isnumber( L, ++arg ) )
+-        p_widget->i_vert_span = luaL_checkint( L, arg );
++        p_widget->i_vert_span = luaL_checkinteger( L, arg );
+     else goto end_of_args;
+     if( lua_isnumber( L, ++arg ) )
+-        p_widget->i_width = luaL_checkint( L, arg );
++        p_widget->i_width = luaL_checkinteger( L, arg );
+     else goto end_of_args;
+     if( lua_isnumber( L, ++arg ) )
+-        p_widget->i_height = luaL_checkint( L, arg );
++        p_widget->i_height = luaL_checkinteger( L, arg );
+     else goto end_of_args;
+ 
+ end_of_args:
+--- a/modules/lua/libs/io.c
++++ b/modules/lua/libs/io.c
+@@ -139,7 +139,7 @@ static int vlclua_io_file_seek( lua_Stat
+     const char* psz_mode = luaL_optstring( L, 2, NULL );
+     if ( psz_mode != NULL )
+     {
+-        long i_offset = luaL_optlong( L, 3, 0 );
++        long i_offset = (long)luaL_optinteger( L, 3, 0 );
+         int i_mode;
+         if ( !strcmp( psz_mode, "set" ) )
+             i_mode = SEEK_SET;
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0009-fix-EGL-macro-undeclared-and-EGLImageKHR.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0009-fix-EGL-macro-undeclared-and-EGLImageKHR.patch
new file mode 100644
index 0000000..9bd6b41
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0009-fix-EGL-macro-undeclared-and-EGLImageKHR.patch
@@ -0,0 +1,58 @@
+From: Vincent Davis Jr <vince@underview.tech>
+Date: Fri, 07 Jan 2022 07:01:47 PM CST
+Subject: [PATCH] Fix EGL macro undeclared and EGLImageKHR
+
+* Fixes compiler issues related to EGL macro constant/enum value type not being defined
+* Updates EGLImage to EGLImageKHR
+
+Upstream-status: Pending
+
+Signed-off-by: Vincent Davis Jr <vince@underview.tech>
+diff --git a/modules/hw/mmal/converter_mmal.c b/modules/hw/mmal/converter_mmal.c
+index f31cb81d8..426af668b 100644
+--- a/modules/hw/mmal/converter_mmal.c
++++ b/modules/hw/mmal/converter_mmal.c
+@@ -28,6 +28,34 @@
+
+ #define TRACE_ALL 0
+
++// Pass Yocto related build errors
++#define EGL_LINUX_DMA_BUF_EXT           0x3270
++#define EGL_LINUX_DRM_FOURCC_EXT        0x3271
++#define EGL_DMA_BUF_PLANE0_FD_EXT       0x3272
++#define EGL_DMA_BUF_PLANE0_OFFSET_EXT   0x3273
++#define EGL_DMA_BUF_PLANE0_PITCH_EXT    0x3274
++#define EGL_DMA_BUF_PLANE1_FD_EXT       0x3275
++#define EGL_DMA_BUF_PLANE1_OFFSET_EXT   0x3276
++#define EGL_DMA_BUF_PLANE1_PITCH_EXT    0x3277
++#define EGL_DMA_BUF_PLANE2_FD_EXT       0x3278
++#define EGL_DMA_BUF_PLANE2_OFFSET_EXT   0x3279
++#define EGL_DMA_BUF_PLANE2_PITCH_EXT    0x327A
++#define EGL_YUV_COLOR_SPACE_HINT_EXT    0x327B
++#define EGL_SAMPLE_RANGE_HINT_EXT       0x327C
++#define EGL_YUV_CHROMA_HORIZONTAL_SITING_HINT_EXT  0x327D
++#define EGL_YUV_CHROMA_VERTICAL_SITING_HINT_EXT    0x327E
++#define EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT 0x3443
++#define EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT 0x3444
++#define EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT 0x3445
++#define EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT 0x3446
++#define EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT 0x3447
++#define EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT 0x3448
++#define EGL_DMA_BUF_PLANE3_FD_EXT          0x3440
++#define EGL_DMA_BUF_PLANE3_OFFSET_EXT      0x3441
++#define EGL_DMA_BUF_PLANE3_PITCH_EXT       0x3442
++#define EGL_DMA_BUF_PLANE3_MODIFIER_LO_EXT 0x3449
++#define EGL_DMA_BUF_PLANE3_MODIFIER_HI_EXT 0x344A
++
+ typedef struct mmal_gl_converter_s
+ {
+     EGLint drm_fourcc;
+@@ -199,7 +227,7 @@ static tex_context_t * get_tex_context(const opengl_tex_converter_t * const tc,
+
+         *a = EGL_NONE;
+
+-        const EGLImage image = tc->gl->egl.createImageKHR(tc->gl, EGL_LINUX_DMA_BUF_EXT, NULL, attribs);
++        const EGLImageKHR image = tc->gl->egl.createImageKHR(tc->gl, EGL_LINUX_DMA_BUF_EXT, NULL, attribs);
+         if (!image) {
+            msg_Err(tc, "Failed to import fd %d: Err=%#x", fd, tc->vt->GetError());
+            goto fail;
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0010-fix-numeric_limits-is-not-a-member-of-std.patch b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0010-fix-numeric_limits-is-not-a-member-of-std.patch
new file mode 100644
index 0000000..a13b337
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/files/0010-fix-numeric_limits-is-not-a-member-of-std.patch
@@ -0,0 +1,35 @@
+From: Vincent Davis Jr <vince@underview.tech>
+Date: Fri, 07 Jan 2022 07:10:47 PM CST
+Subject: [PATCH] Fix numeric limits not a member
+
+* Fixes bellow compiler issue:
+  ../../git/modules/demux/adaptive/playlist/SegmentInformation.cpp:397:23: error: 'numeric_limits' is not a member of 'std'
+  397 |     if(number == std::numeric_limits<uint64_t>::max())
+
+Upstream-status: Pending
+
+Signed-off-by: Vincent Davis Jr <vince@underview.tech>
+diff --git a/modules/demux/adaptive/playlist/SegmentInformation.cpp b/modules/demux/adaptive/playlist/SegmentInformation.cpp
+index 344e155c7..8eeb05439 100644
+--- a/modules/demux/adaptive/playlist/SegmentInformation.cpp
++++ b/modules/demux/adaptive/playlist/SegmentInformation.cpp
+@@ -34,6 +34,7 @@
+
+ #include <algorithm>
+ #include <cassert>
++#include <limits>
+
+ using namespace adaptive::playlist;
+
+diff --git a/modules/demux/hls/playlist/Parser.cpp b/modules/demux/hls/playlist/Parser.cpp
+index 67110e4cd..743969922 100644
+--- a/modules/demux/hls/playlist/Parser.cpp
++++ b/modules/demux/hls/playlist/Parser.cpp
+@@ -42,6 +42,7 @@
+ #include <map>
+ #include <cctype>
+ #include <algorithm>
++#include <limits>
+
+ using namespace adaptive;
+ using namespace adaptive::playlist;
diff --git a/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/rpidistro-vlc_3.0.12.bb b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/rpidistro-vlc_3.0.12.bb
new file mode 100644
index 0000000..b244dde
--- /dev/null
+++ b/meta-raspberrypi/dynamic-layers/multimedia-layer/recipes-multimedia/rpidistro-vlc/rpidistro-vlc_3.0.12.bb
@@ -0,0 +1,159 @@
+DESCRIPTION = "Video player and streamer - davinci edition"
+HOMEPAGE = "http://www.videolan.org"
+SECTION = "multimedia"
+
+LICENSE = "GPL-2.0-only"
+LIC_FILES_CHKSUM = "file://COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263"
+
+SRC_URI = "\
+    git://git@github.com/RPi-Distro/vlc;protocol=https;branch=buster-rpt \
+    file://0001-configure-fix-linking-on-RISC-V-ISA.patch \
+    file://0002-Revert-configure-Require-libmodplug-0.8.9.patch \
+    file://0003-mmal_20.patch \
+    file://0004-mmal_exit_fix.patch \
+    file://0005-mmal_chain.patch \
+    file://0006-Use-packageconfig-to-detect-mmal-support.patch \
+    file://0007-use-vorbisidec.patch \
+    file://0008-fix-luaL-checkint.patch \
+    file://0009-fix-EGL-macro-undeclared-and-EGLImageKHR.patch \
+    file://0010-fix-numeric_limits-is-not-a-member-of-std.patch \
+"
+
+SRCREV = "f7fd69f12a3b89d03768fa3bd468e8f33cd1dc7c"
+
+S = "${WORKDIR}/git"
+
+PROVIDES = "vlc"
+RPROVIDES:${PN} = "${PROVIDES}"
+DEPENDS = "coreutils-native fribidi libtool libgcrypt libgcrypt-native \
+           dbus libxml2 gnutls tremor faad2 ffmpeg flac alsa-lib libidn \
+           jpeg xz libmodplug mpeg2dec libmtp libopus orc libsamplerate0 \
+           avahi libusb1 schroedinger taglib tiff"
+
+inherit autotools gettext pkgconfig mime-xdg
+
+export BUILDCC = "${BUILD_CC} -std=c11"
+EXTRA_OECONF = "\
+    --enable-run-as-root \
+    --enable-xvideo \
+    --disable-lua \
+    --disable-screen --disable-caca \
+    --enable-vlm \
+    --enable-tremor \
+    --disable-aa --disable-faad \
+    --enable-dbus \
+    --without-contrib \
+    --without-kde-solid \
+    --enable-realrtsp \
+    --disable-libtar \
+    --enable-avcodec \
+"
+
+PACKAGECONFIG ?= "\
+    ${@bb.utils.contains('DISTRO_FEATURES', 'x11', 'x11', '', d)} \
+    ${@bb.utils.contains('MACHINE_FEATURES', 'vc4graphics', '', 'mmal', d)} \
+    live555 dv1394 notify fontconfig fluidsynth freetype dvdread png udev \
+    x264 alsa harfbuzz jack neon fribidi dvbpsi a52 v4l2 gles2 \
+"
+
+PACKAGECONFIG[mmal] = "--enable-omxil --enable-omxil-vout --enable-rpi-omxil --enable-mmal --enable-mmal-avcodec,,userland"
+PACKAGECONFIG[x264] = "--enable-x264,--disable-x264,x264"
+PACKAGECONFIG[mad] = "--enable-mad,--disable-mad,libmad"
+PACKAGECONFIG[a52] = "--enable-a52,--disable-a52,liba52"
+PACKAGECONFIG[jack] = "--enable-jack,--disable-jack,jack"
+PACKAGECONFIG[live555] = "--enable-live555 LIVE555_PREFIX=${STAGING_DIR_HOST}${prefix},--disable-live555,live555"
+PACKAGECONFIG[libass] = "--enable-libass,--disable-libass,libass"
+PACKAGECONFIG[postproc] = "--enable-postproc,--disable-postproc,libpostproc"
+PACKAGECONFIG[libva] = "--enable-libva,--disable-libva,libva"
+PACKAGECONFIG[opencv] = "--enable-opencv,--disable-opencv,opencv"
+PACKAGECONFIG[speex] = "--enable-speex,--disable-speex,speex"
+PACKAGECONFIG[gstreamer] = "--enable-gst-decode,--disable-gst-decode,gstreamer1.0 gstreamer1.0-plugins-base gstreamer1.0-plugins-bad"
+PACKAGECONFIG[vpx] = "--enable-vpx,--disable-vpx, libvpx"
+PACKAGECONFIG[freerdp] = "--enable-freerdp,--disable-freerdp, freerdp"
+PACKAGECONFIG[dvbpsi] = "--enable-dvbpsi,--disable-dvbpsi, libdvbpsi"
+PACKAGECONFIG[samba] = "--enable-smbclient,--disable-smbclient, samba"
+PACKAGECONFIG[upnp] = "--enable-upnp,--disable-upnp,libupnp"
+PACKAGECONFIG[dvdnav] = "--enable-dvdnav,--disable-dvdnav,libdvdnav libdvdcss"
+PACKAGECONFIG[sftp] = "--enable-sftp,--disable-sftp,libssh2"
+PACKAGECONFIG[vorbis] = "--enable-vorbis,--disable-vorbis,libvorbis libogg"
+PACKAGECONFIG[ogg] = "--enable-ogg,--disable-ogg,libvorbis libogg"
+PACKAGECONFIG[dc1394] = "--enable-dc1394,--disable-dc1394,libdc1394"
+PACKAGECONFIG[dv1394] = "--enable-dv1394,--disable-dv1394,libraw1394 libavc1394"
+PACKAGECONFIG[svg] = "--enable-svg,--disable-svg,librsvg"
+PACKAGECONFIG[svgdec] = "--enable-svgdec,--disable-svgdec,librsvg cairo"
+PACKAGECONFIG[notify] = "--enable-notify,--disable-notify, libnotify gtk+3"
+PACKAGECONFIG[fontconfig] = "--enable-fontconfig,--disable-fontconfig, fontconfig"
+PACKAGECONFIG[freetype] = "--enable-freetype,--disable-freetype, freetype"
+PACKAGECONFIG[dvdread] = "--enable-dvdread,--disable-dvdread, libdvdread libdvdcss"
+PACKAGECONFIG[vnc] = "--enable-vnc,--disable-vnc, libvncserver"
+PACKAGECONFIG[x11] = "--with-x --enable-xcb,--without-x --disable-xcb,  xcb-util-keysyms libxpm libxinerama"
+PACKAGECONFIG[png] = "--enable-png,--disable-png,libpng"
+PACKAGECONFIG[vdpau] = "--enable-vdpau,--disable-vdpau,libvdpau"
+PACKAGECONFIG[wayland] = "--enable-wayland,--disable-wayland,wayland wayland-native"
+PACKAGECONFIG[gles2] = "--enable-gles2,--disable-gles2,virtual/libgles2"
+PACKAGECONFIG[dca] = "--enable-dca,,"
+PACKAGECONFIG[fribidi] = "--enable-fribidi,,fribidi"
+PACKAGECONFIG[gnutls] = "--enable-gnutls,,gnutls"
+PACKAGECONFIG[fluidsynth] = "--enable-fluidsynth,,fluidsynth"
+PACKAGECONFIG[harfbuzz] = "--enable-harfbuzz,--disable-harfbuzz,harfbuzz"
+PACKAGECONFIG[udev] = "--enable-udev,--disable-udev,udev"
+PACKAGECONFIG[neon] = "--enable-neon,--disable-neon,"
+PACKAGECONFIG[opus] = "--enable-opus,--disable-opus,libopus libogg"
+PACKAGECONFIG[ncurses] = "--enable-ncurses,--disable-ncurses,ncurses"
+PACKAGECONFIG[alsa] = "--enable-alsa,--disable-alsa,alsa-lib"
+PACKAGECONFIG[pulseaudio] = "--enable-pulse,--disable-pulse,pulseaudio"
+PACKAGECONFIG[sdl-image] = "--enable-sdl-image,,libsdl-image"
+PACKAGECONFIG[v4l2] = "--enable-v4l2,,v4l-utils"
+
+# Workaround for modules/codec/omxil/omxil_core.h
+#   multiple definition of `pf_enable_graphic_buffers'
+#   multiple definition of `pf_get_graphic_buffer_usage'
+#   multiple definition of `pf_get_hal_format'
+TARGET_CFLAGS:append = " -fcommon"
+TARGET_CXXFLAGS:append = " -fcommon"
+
+# Ensures the --enable-mmal-avcodec flag is available for usage
+do_configure:prepend() {
+    olddir=`pwd`
+    cd ${S}
+    ./bootstrap
+    cd $olddir
+}
+
+do_configure:append() {
+    # https://forums.raspberrypi.com/viewtopic.php?p=1601535
+    sed -i "/GLAPI void APIENTRY glShaderSource (/d" ${STAGING_INCDIR}/GL/glext.h
+    #sed -i -e s:'${top_builddir_slash}libtool':'${top_builddir_slash}'${TARGET_SYS}-libtool:g ${B}/doltlibtool
+}
+
+# This recipe packages vlc as a library as well, so qt4 dependencies
+# can be avoided when only the library is installed.
+PACKAGES =+ "libvlc"
+
+LEAD_SONAME_libvlc = "libvlc.so.5"
+FILES:libvlc = "${libdir}/lib*.so.*"
+
+FILES:${PN} += "\
+    ${bindir}/vlc \
+    ${libdir}/vlc \
+    ${datadir}/applications \
+    ${datadir}/vlc \
+    ${datadir}/icons \
+    ${datadir}/metainfo/vlc.appdata.xml \
+"
+
+FILES:${PN}-dbg += "\
+    ${libdir}/vlc/*/.debug \
+    ${libdir}/vlc/plugins/*/.debug \
+"
+
+FILES:${PN}-staticdev += "\
+    ${libdir}/vlc/plugins/*/*.a \
+    ${libdir}/vlc/libcompat.a \
+"
+
+# Only enable it for rpi class of machines
+COMPATIBLE_HOST = "null"
+COMPATIBLE_HOST:rpi = "'(.*)'"
+
+INSANE_SKIP:${PN} = "dev-so"
diff --git a/meta-raspberrypi/recipes-bsp/common/raspberrypi-firmware.inc b/meta-raspberrypi/recipes-bsp/common/raspberrypi-firmware.inc
index e5974e4..a740440 100644
--- a/meta-raspberrypi/recipes-bsp/common/raspberrypi-firmware.inc
+++ b/meta-raspberrypi/recipes-bsp/common/raspberrypi-firmware.inc
@@ -1,9 +1,9 @@
-RPIFW_DATE ?= "20220331"
+RPIFW_DATE ?= "20220830"
 
 RPIFW_SRC_URI ?= "https://archive.raspberrypi.com/debian/pool/main/r/raspberrypi-firmware/raspberrypi-firmware_1.${RPIFW_DATE}.orig.tar.xz"
 RPIFW_S ?= "${WORKDIR}/raspberrypi-firmware-1.${RPIFW_DATE}"
 
 SRC_URI = "${RPIFW_SRC_URI}"
-SRC_URI[sha256sum] = "8758f10797bd52a7373cc5b39bd46d0d9f882d501ccb9535a72a3fe8a8d329c3"
+SRC_URI[sha256sum] = "2b27e4b3c4d2664a0a1d0dd8602bd80ea41dd006eb0ad9c67d7b659c9c8bb4e5"
 
 PV = "${RPIFW_DATE}"
diff --git a/meta-raspberrypi/recipes-kernel/linux-firmware-rpidistro/linux-firmware-rpidistro/0001-Default-43455-firmware-to-standard-variant.patch b/meta-raspberrypi/recipes-kernel/linux-firmware-rpidistro/linux-firmware-rpidistro/0001-Default-43455-firmware-to-standard-variant.patch
new file mode 100644
index 0000000..f67d95b
--- /dev/null
+++ b/meta-raspberrypi/recipes-kernel/linux-firmware-rpidistro/linux-firmware-rpidistro/0001-Default-43455-firmware-to-standard-variant.patch
@@ -0,0 +1,28 @@
+From b9db43e36ad0942d33cb4db5b394abd722862568 Mon Sep 17 00:00:00 2001
+From: Andrei Gherzan <andrei.gherzan@huawei.com>
+Date: Fri, 9 Sep 2022 20:28:06 +0200
+Subject: [PATCH] Default 43455 firmware to standard variant
+
+The firmware for 43455 is loaded as a symlink: brcmfmac43455-sdio.bin.
+This symlink is now broken as the debian package handles the right
+target of this symlink through a postinstall. We don't have that logic
+here so we default to the standard variant.
+
+Upstream-Status: Inappropriate [issue reported at https://github.com/RPi-Distro/firmware-nonfree/issues/26]
+Signed-off-by: Andrei Gherzan <andrei.gherzan@huawei.com>
+---
+ debian/config/brcm80211/brcm/brcmfmac43455-sdio.bin | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/debian/config/brcm80211/brcm/brcmfmac43455-sdio.bin b/debian/config/brcm80211/brcm/brcmfmac43455-sdio.bin
+index 9c39208..b914838 120000
+--- a/debian/config/brcm80211/brcm/brcmfmac43455-sdio.bin
++++ b/debian/config/brcm80211/brcm/brcmfmac43455-sdio.bin
+@@ -1 +1 @@
+-../cypress/cyfmac43455-sdio.bin
+\ No newline at end of file
++../cypress/cyfmac43455-sdio-standard.bin
+\ No newline at end of file
+-- 
+2.25.1
+
diff --git a/meta-raspberrypi/recipes-kernel/linux-firmware-rpidistro/linux-firmware-rpidistro_git.bb b/meta-raspberrypi/recipes-kernel/linux-firmware-rpidistro/linux-firmware-rpidistro_git.bb
index d39340a..f491241 100644
--- a/meta-raspberrypi/recipes-kernel/linux-firmware-rpidistro/linux-firmware-rpidistro_git.bb
+++ b/meta-raspberrypi/recipes-kernel/linux-firmware-rpidistro/linux-firmware-rpidistro_git.bb
@@ -15,7 +15,9 @@
 NO_GENERIC_LICENSE[Synaptics-rpidistro] = "debian/config/brcm80211/copyright"
 LICENSE_FLAGS = "synaptics-killswitch"
 
-SRC_URI = "git://github.com/RPi-Distro/firmware-nonfree;branch=bullseye;protocol=https"
+SRC_URI = "git://github.com/RPi-Distro/firmware-nonfree;branch=bullseye;protocol=https \
+    file://0001-Default-43455-firmware-to-standard-variant.patch \
+"
 SRCREV = "541e5a05d152e7e6f0d9be45622e4a3741e51c02"
 PV = "20210315-3+rpt7"
 S = "${WORKDIR}/git"
diff --git a/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7.inc b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7.inc
new file mode 100644
index 0000000..77debc4
--- /dev/null
+++ b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7.inc
@@ -0,0 +1,13 @@
+# SPDX-FileCopyrightText: Andrei Gherzan <andrei.gherzan@huawei.com>
+#
+# SPDX-License-Identifier: MIT
+
+KBUILD_DEFCONFIG:raspberrypi-armv7 = "bcm2709_defconfig"
+KERNEL_PACKAGE_NAME = "${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}"
+PROVIDES:remove = "virtual/kernel"
+
+KERNEL_IMAGETYPE_DIRECT ?= "zImage"
+
+COMPATIBLE_MACHINE = "^raspberrypi-armv7$"
+
+KERNEL_DEVICETREE = ""
diff --git a/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7_5.10.bb b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7_5.10.bb
new file mode 100644
index 0000000..d594b61
--- /dev/null
+++ b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7_5.10.bb
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: Andrei Gherzan <andrei.gherzan@huawei.com>
+#
+# SPDX-License-Identifier: MIT
+
+require linux-raspberrypi-v7.inc
+require linux-raspberrypi_5.10.bb
diff --git a/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7_5.15.bb b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7_5.15.bb
new file mode 100644
index 0000000..7883985
--- /dev/null
+++ b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi-v7_5.15.bb
@@ -0,0 +1,6 @@
+# SPDX-FileCopyrightText: Andrei Gherzan <andrei.gherzan@huawei.com>
+#
+# SPDX-License-Identifier: MIT
+
+require linux-raspberrypi-v7.inc
+require linux-raspberrypi_5.15.bb
diff --git a/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi.inc b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi.inc
index 6362a73..be1883c 100644
--- a/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi.inc
+++ b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi.inc
@@ -26,6 +26,8 @@
 KBUILD_DEFCONFIG:raspberrypi3-64 ?= "bcmrpi3_defconfig"
 KBUILD_DEFCONFIG:raspberrypi4 ?= "bcm2711_defconfig"
 KBUILD_DEFCONFIG:raspberrypi4-64 ?= "bcm2711_defconfig"
+KBUILD_DEFCONFIG:raspberrypi-armv7 ?= "bcm2711_defconfig"
+KBUILD_DEFCONFIG:raspberrypi-armv8 ?= "bcm2711_defconfig"
 
 LINUX_VERSION_EXTENSION ?= ""
 
diff --git a/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi_5.15.bb b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi_5.15.bb
index db93537..468737c 100644
--- a/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi_5.15.bb
+++ b/meta-raspberrypi/recipes-kernel/linux/linux-raspberrypi_5.15.bb
@@ -17,3 +17,15 @@
 require linux-raspberrypi.inc
 
 KERNEL_DTC_FLAGS += "-@ -H epapr"
+
+RDEPENDS:${KERNEL_PACKAGE_NAME}:raspberrypi-armv7:append = " ${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}"
+RDEPENDS:${KERNEL_PACKAGE_NAME}-base:raspberrypi-armv7:append = " ${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}-base"
+RDEPENDS:${KERNEL_PACKAGE_NAME}-image:raspberrypi-armv7:append = " ${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}-image"
+RDEPENDS:${KERNEL_PACKAGE_NAME}-dev:raspberrypi-armv7:append = " ${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}-dev"
+RDEPENDS:${KERNEL_PACKAGE_NAME}-vmlinux:raspberrypi-armv7:append = " ${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}-vmlinux"
+RDEPENDS:${KERNEL_PACKAGE_NAME}-modules:raspberrypi-armv7:append = " ${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}-modules"
+RDEPENDS:${KERNEL_PACKAGE_NAME}-dbg:raspberrypi-armv7:append = " ${RASPBERRYPI_v7_KERNEL_PACKAGE_NAME}-dbg"
+
+DEPLOYDEP = ""
+DEPLOYDEP:raspberrypi-armv7 = "${RASPBERRYPI_v7_KERNEL}:do_deploy"
+do_deploy[depends] += "${DEPLOYDEP}"
diff --git a/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0001-avcodec-arm-sbcenc-avoid-callee-preserved-vfp-regist.patch b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0001-avcodec-arm-sbcenc-avoid-callee-preserved-vfp-regist.patch
new file mode 100644
index 0000000..e9c9eb7
--- /dev/null
+++ b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0001-avcodec-arm-sbcenc-avoid-callee-preserved-vfp-regist.patch
@@ -0,0 +1,288 @@
+From: James Cowgill <jcowgill@debian.org>
+Date: Sun, 11 Aug 2019 16:50:56 +0100
+Subject: avcodec/arm/sbcenc: avoid callee preserved vfp registers
+
+When compiling FFmpeg with GCC-9, some very random segfaults were
+observed in code which had previously called down into the SBC encoder
+NEON assembly routines. This was caused by these functions clobbering
+some of the vfp callee saved registers (d8 - d15 aka q4 - q7). GCC was
+using these registers to save local variables, but after these
+functions returned, they would contain garbage.
+
+Fix by reallocating the registers in the two affected functions in
+the following way:
+ ff_sbc_analyze_4_neon: q2-q5 => q8-q11, then q1-q4 => q8-q11
+ ff_sbc_analyze_8_neon: q2-q9 => q8-q15
+
+The reason for using these replacements is to keep closely related
+sets of registers consecutively numbered which hopefully makes the
+code more easy to follow. Since this commit only reallocates
+registers, it should have no performance impact.
+
+Upstream-status: Pending
+
+Signed-off-by: James Cowgill <jcowgill@debian.org>
+---
+ libavcodec/arm/sbcdsp_neon.S | 220 +++++++++++++++++++++----------------------
+ 1 file changed, 110 insertions(+), 110 deletions(-)
+
+diff --git a/libavcodec/arm/sbcdsp_neon.S b/libavcodec/arm/sbcdsp_neon.S
+index d83d21d..914abfb 100644
+--- a/libavcodec/arm/sbcdsp_neon.S
++++ b/libavcodec/arm/sbcdsp_neon.S
+@@ -38,49 +38,49 @@ function ff_sbc_analyze_4_neon, export=1
+         /* TODO: merge even and odd cases (or even merge all four calls to this
+          * function) in order to have only aligned reads from 'in' array
+          * and reduce number of load instructions */
+-        vld1.16         {d4, d5}, [r0, :64]!
+-        vld1.16         {d8, d9}, [r2, :128]!
++        vld1.16         {d16, d17}, [r0, :64]!
++        vld1.16         {d20, d21}, [r2, :128]!
+
+-        vmull.s16       q0, d4, d8
+-        vld1.16         {d6,  d7}, [r0, :64]!
+-        vmull.s16       q1, d5, d9
+-        vld1.16         {d10, d11}, [r2, :128]!
++        vmull.s16       q0, d16, d20
++        vld1.16         {d18, d19}, [r0, :64]!
++        vmull.s16       q1, d17, d21
++        vld1.16         {d22, d23}, [r2, :128]!
+
+-        vmlal.s16       q0, d6, d10
+-        vld1.16         {d4, d5}, [r0, :64]!
+-        vmlal.s16       q1, d7, d11
+-        vld1.16         {d8, d9}, [r2, :128]!
++        vmlal.s16       q0, d18, d22
++        vld1.16         {d16, d17}, [r0, :64]!
++        vmlal.s16       q1, d19, d23
++        vld1.16         {d20, d21}, [r2, :128]!
+
+-        vmlal.s16       q0, d4, d8
+-        vld1.16         {d6,  d7}, [r0, :64]!
+-        vmlal.s16       q1, d5, d9
+-        vld1.16         {d10, d11}, [r2, :128]!
++        vmlal.s16       q0, d16, d20
++        vld1.16         {d18, d19}, [r0, :64]!
++        vmlal.s16       q1, d17, d21
++        vld1.16         {d22, d23}, [r2, :128]!
+
+-        vmlal.s16       q0, d6, d10
+-        vld1.16         {d4, d5}, [r0, :64]!
+-        vmlal.s16       q1, d7, d11
+-        vld1.16         {d8, d9}, [r2, :128]!
++        vmlal.s16       q0, d18, d22
++        vld1.16         {d16, d17}, [r0, :64]!
++        vmlal.s16       q1, d19, d23
++        vld1.16         {d20, d21}, [r2, :128]!
+
+-        vmlal.s16       q0, d4, d8
+-        vmlal.s16       q1, d5, d9
++        vmlal.s16       q0, d16, d20
++        vmlal.s16       q1, d17, d21
+
+         vpadd.s32       d0, d0, d1
+         vpadd.s32       d1, d2, d3
+
+         vrshrn.s32      d0, q0, SBC_PROTO_FIXED_SCALE
+
+-        vld1.16         {d2, d3, d4, d5}, [r2, :128]!
++        vld1.16         {d16, d17, d18, d19}, [r2, :128]!
+
+         vdup.i32        d1, d0[1]  /* TODO: can be eliminated */
+         vdup.i32        d0, d0[0]  /* TODO: can be eliminated */
+
+-        vmull.s16       q3, d2, d0
+-        vmull.s16       q4, d3, d0
+-        vmlal.s16       q3, d4, d1
+-        vmlal.s16       q4, d5, d1
++        vmull.s16       q10, d16, d0
++        vmull.s16       q11, d17, d0
++        vmlal.s16       q10, d18, d1
++        vmlal.s16       q11, d19, d1
+
+-        vpadd.s32       d0, d6, d7 /* TODO: can be eliminated */
+-        vpadd.s32       d1, d8, d9 /* TODO: can be eliminated */
++        vpadd.s32       d0, d20, d21 /* TODO: can be eliminated */
++        vpadd.s32       d1, d22, d23 /* TODO: can be eliminated */
+
+         vst1.32         {d0, d1}, [r1, :128]
+
+@@ -91,57 +91,57 @@ function ff_sbc_analyze_8_neon, export=1
+         /* TODO: merge even and odd cases (or even merge all four calls to this
+          * function) in order to have only aligned reads from 'in' array
+          * and reduce number of load instructions */
+-        vld1.16         {d4, d5}, [r0, :64]!
+-        vld1.16         {d8, d9}, [r2, :128]!
+-
+-        vmull.s16       q6, d4, d8
+-        vld1.16         {d6,  d7}, [r0, :64]!
+-        vmull.s16       q7, d5, d9
+-        vld1.16         {d10, d11}, [r2, :128]!
+-        vmull.s16       q8, d6, d10
+-        vld1.16         {d4, d5}, [r0, :64]!
+-        vmull.s16       q9, d7, d11
+-        vld1.16         {d8, d9}, [r2, :128]!
+-
+-        vmlal.s16       q6, d4, d8
+-        vld1.16         {d6,  d7}, [r0, :64]!
+-        vmlal.s16       q7, d5, d9
+-        vld1.16         {d10, d11}, [r2, :128]!
+-        vmlal.s16       q8, d6, d10
+-        vld1.16         {d4, d5}, [r0, :64]!
+-        vmlal.s16       q9, d7, d11
+-        vld1.16         {d8, d9}, [r2, :128]!
+-
+-        vmlal.s16       q6, d4, d8
+-        vld1.16         {d6,  d7}, [r0, :64]!
+-        vmlal.s16       q7, d5, d9
+-        vld1.16         {d10, d11}, [r2, :128]!
+-        vmlal.s16       q8, d6, d10
+-        vld1.16         {d4, d5}, [r0, :64]!
+-        vmlal.s16       q9, d7, d11
+-        vld1.16         {d8, d9}, [r2, :128]!
+-
+-        vmlal.s16       q6, d4, d8
+-        vld1.16         {d6,  d7}, [r0, :64]!
+-        vmlal.s16       q7, d5, d9
+-        vld1.16         {d10, d11}, [r2, :128]!
+-        vmlal.s16       q8, d6, d10
+-        vld1.16         {d4, d5}, [r0, :64]!
+-        vmlal.s16       q9, d7, d11
+-        vld1.16         {d8, d9}, [r2, :128]!
+-
+-        vmlal.s16       q6, d4, d8
+-        vld1.16         {d6,  d7}, [r0, :64]!
+-        vmlal.s16       q7, d5, d9
+-        vld1.16         {d10, d11}, [r2, :128]!
+-
+-        vmlal.s16       q8, d6, d10
+-        vmlal.s16       q9, d7, d11
+-
+-        vpadd.s32       d0, d12, d13
+-        vpadd.s32       d1, d14, d15
+-        vpadd.s32       d2, d16, d17
+-        vpadd.s32       d3, d18, d19
++        vld1.16         {d16, d17}, [r0, :64]!
++        vld1.16         {d20, d21}, [r2, :128]!
++
++        vmull.s16       q12, d16, d20
++        vld1.16         {d18, d19}, [r0, :64]!
++        vmull.s16       q13, d17, d21
++        vld1.16         {d22, d23}, [r2, :128]!
++        vmull.s16       q14, d18, d22
++        vld1.16         {d16, d17}, [r0, :64]!
++        vmull.s16       q15, d19, d23
++        vld1.16         {d20, d21}, [r2, :128]!
++
++        vmlal.s16       q12, d16, d20
++        vld1.16         {d18, d19}, [r0, :64]!
++        vmlal.s16       q13, d17, d21
++        vld1.16         {d22, d23}, [r2, :128]!
++        vmlal.s16       q14, d18, d22
++        vld1.16         {d16, d17}, [r0, :64]!
++        vmlal.s16       q15, d19, d23
++        vld1.16         {d20, d21}, [r2, :128]!
++
++        vmlal.s16       q12, d16, d20
++        vld1.16         {d18, d19}, [r0, :64]!
++        vmlal.s16       q13, d17, d21
++        vld1.16         {d22, d23}, [r2, :128]!
++        vmlal.s16       q14, d18, d22
++        vld1.16         {d16, d17}, [r0, :64]!
++        vmlal.s16       q15, d19, d23
++        vld1.16         {d20, d21}, [r2, :128]!
++
++        vmlal.s16       q12, d16, d20
++        vld1.16         {d18, d19}, [r0, :64]!
++        vmlal.s16       q13, d17, d21
++        vld1.16         {d22, d23}, [r2, :128]!
++        vmlal.s16       q14, d18, d22
++        vld1.16         {d16, d17}, [r0, :64]!
++        vmlal.s16       q15, d19, d23
++        vld1.16         {d20, d21}, [r2, :128]!
++
++        vmlal.s16       q12, d16, d20
++        vld1.16         {d18, d19}, [r0, :64]!
++        vmlal.s16       q13, d17, d21
++        vld1.16         {d22, d23}, [r2, :128]!
++
++        vmlal.s16       q14, d18, d22
++        vmlal.s16       q15, d19, d23
++
++        vpadd.s32       d0, d24, d25
++        vpadd.s32       d1, d26, d27
++        vpadd.s32       d2, d28, d29
++        vpadd.s32       d3, d30, d31
+
+         vrshr.s32       q0, q0, SBC_PROTO_FIXED_SCALE
+         vrshr.s32       q1, q1, SBC_PROTO_FIXED_SCALE
+@@ -153,38 +153,38 @@ function ff_sbc_analyze_8_neon, export=1
+         vdup.i32        d1, d0[1]  /* TODO: can be eliminated */
+         vdup.i32        d0, d0[0]  /* TODO: can be eliminated */
+
+-        vld1.16         {d4, d5}, [r2, :128]!
+-        vmull.s16       q6, d4, d0
+-        vld1.16         {d6, d7}, [r2, :128]!
+-        vmull.s16       q7, d5, d0
+-        vmull.s16       q8, d6, d0
+-        vmull.s16       q9, d7, d0
+-
+-        vld1.16         {d4, d5}, [r2, :128]!
+-        vmlal.s16       q6, d4, d1
+-        vld1.16         {d6, d7}, [r2, :128]!
+-        vmlal.s16       q7, d5, d1
+-        vmlal.s16       q8, d6, d1
+-        vmlal.s16       q9, d7, d1
+-
+-        vld1.16         {d4, d5}, [r2, :128]!
+-        vmlal.s16       q6, d4, d2
+-        vld1.16         {d6, d7}, [r2, :128]!
+-        vmlal.s16       q7, d5, d2
+-        vmlal.s16       q8, d6, d2
+-        vmlal.s16       q9, d7, d2
+-
+-        vld1.16         {d4, d5}, [r2, :128]!
+-        vmlal.s16       q6, d4, d3
+-        vld1.16         {d6, d7}, [r2, :128]!
+-        vmlal.s16       q7, d5, d3
+-        vmlal.s16       q8, d6, d3
+-        vmlal.s16       q9, d7, d3
+-
+-        vpadd.s32       d0, d12, d13 /* TODO: can be eliminated */
+-        vpadd.s32       d1, d14, d15 /* TODO: can be eliminated */
+-        vpadd.s32       d2, d16, d17 /* TODO: can be eliminated */
+-        vpadd.s32       d3, d18, d19 /* TODO: can be eliminated */
++        vld1.16         {d16, d17}, [r2, :128]!
++        vmull.s16       q12, d16, d0
++        vld1.16         {d18, d19}, [r2, :128]!
++        vmull.s16       q13, d17, d0
++        vmull.s16       q14, d18, d0
++        vmull.s16       q15, d19, d0
++
++        vld1.16         {d16, d17}, [r2, :128]!
++        vmlal.s16       q12, d16, d1
++        vld1.16         {d18, d19}, [r2, :128]!
++        vmlal.s16       q13, d17, d1
++        vmlal.s16       q14, d18, d1
++        vmlal.s16       q15, d19, d1
++
++        vld1.16         {d16, d17}, [r2, :128]!
++        vmlal.s16       q12, d16, d2
++        vld1.16         {d18, d19}, [r2, :128]!
++        vmlal.s16       q13, d17, d2
++        vmlal.s16       q14, d18, d2
++        vmlal.s16       q15, d19, d2
++
++        vld1.16         {d16, d17}, [r2, :128]!
++        vmlal.s16       q12, d16, d3
++        vld1.16         {d18, d19}, [r2, :128]!
++        vmlal.s16       q13, d17, d3
++        vmlal.s16       q14, d18, d3
++        vmlal.s16       q15, d19, d3
++
++        vpadd.s32       d0, d24, d25 /* TODO: can be eliminated */
++        vpadd.s32       d1, d26, d27 /* TODO: can be eliminated */
++        vpadd.s32       d2, d28, d29 /* TODO: can be eliminated */
++        vpadd.s32       d3, d30, d31 /* TODO: can be eliminated */
+
+         vst1.32         {d0, d1, d2, d3}, [r1, :128]
diff --git a/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0002-Fix-build-on-powerpc-and-ppc64.patch b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0002-Fix-build-on-powerpc-and-ppc64.patch
new file mode 100644
index 0000000..4d9c1b9
--- /dev/null
+++ b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0002-Fix-build-on-powerpc-and-ppc64.patch
@@ -0,0 +1,31 @@
+From: John Paul Adrian Glaubitz <glaubitz@physik.fu-berlin.de>
+Date: Tue, 19 Jan 2021 20:35:29 +0100
+Subject: Fix build on powerpc and ppc64
+
+Upstream-status: Pending
+
+---
+ libswscale/ppc/yuv2rgb_altivec.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/libswscale/ppc/yuv2rgb_altivec.c b/libswscale/ppc/yuv2rgb_altivec.c
+index 5365452..930ef6b 100644
+--- a/libswscale/ppc/yuv2rgb_altivec.c
++++ b/libswscale/ppc/yuv2rgb_altivec.c
+@@ -283,6 +283,16 @@ static inline void cvtyuvtoRGB(SwsContext *c, vector signed short Y,
+  * ------------------------------------------------------------------------------
+  */
+
++#if !HAVE_VSX
++static inline vector unsigned char vec_xl(signed long long offset, const ubyte *addr)
++{
++    const vector unsigned char *v_addr = (const vector unsigned char *) (addr + offset);
++    vector unsigned char align_perm = vec_lvsl(offset, addr);
++
++    return (vector unsigned char) vec_perm(v_addr[0], v_addr[1], align_perm);
++}
++#endif /* !HAVE_VSX */
++
+ #define DEFCSP420_CVT(name, out_pixels)                                       \
+ static int altivec_ ## name(SwsContext *c, const unsigned char **in,          \
+                             int *instrides, int srcSliceY, int srcSliceH,     \
diff --git a/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0003-avcodec-pngenc-remove-monowhite-from-apng-formats.patch b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0003-avcodec-pngenc-remove-monowhite-from-apng-formats.patch
new file mode 100644
index 0000000..38f3fd4
--- /dev/null
+++ b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0003-avcodec-pngenc-remove-monowhite-from-apng-formats.patch
@@ -0,0 +1,28 @@
+From: Paul B Mahol <onemda@gmail.com>
+Date: Sun, 14 Feb 2021 17:20:03 +0100
+Subject: avcodec/pngenc: remove monowhite from apng formats
+
+Monowhite pixel format is not supported, and it does not make sense
+to add support for it.
+
+Fixes #7989
+
+Upstream-status: Pending
+
+---
+ libavcodec/pngenc.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/libavcodec/pngenc.c b/libavcodec/pngenc.c
+index efcae8c..eebb164 100644
+--- a/libavcodec/pngenc.c
++++ b/libavcodec/pngenc.c
+@@ -1174,7 +1174,7 @@ AVCodec ff_apng_encoder = {
+         AV_PIX_FMT_PAL8,
+         AV_PIX_FMT_GRAY8, AV_PIX_FMT_GRAY8A,
+         AV_PIX_FMT_GRAY16BE, AV_PIX_FMT_YA16BE,
+-        AV_PIX_FMT_MONOBLACK, AV_PIX_FMT_NONE
++        AV_PIX_FMT_NONE
+     },
+     .priv_class     = &apngenc_class,
+ };
diff --git a/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0004-ffmpeg-4.3.2-rpi_10.patch b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0004-ffmpeg-4.3.2-rpi_10.patch
new file mode 100644
index 0000000..6bab0d0
--- /dev/null
+++ b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0004-ffmpeg-4.3.2-rpi_10.patch
@@ -0,0 +1,60569 @@
+Upstream-status: Pending
+
+--- a/configure
++++ b/configure
+@@ -274,6 +274,7 @@ External library support:
+   --enable-libtls          enable LibreSSL (via libtls), needed for https support
+                            if openssl, gnutls or mbedtls is not used [no]
+   --enable-libtwolame      enable MP2 encoding via libtwolame [no]
++  --enable-libudev         enable libudev [no]
+   --enable-libv4l2         enable libv4l2/v4l-utils [no]
+   --enable-libvidstab      enable video stabilization using vid.stab [no]
+   --enable-libvmaf         enable vmaf filter via libvmaf [no]
+@@ -336,12 +337,17 @@ External library support:
+   --enable-libmfx          enable Intel MediaSDK (AKA Quick Sync Video) code via libmfx [no]
+   --enable-libnpp          enable Nvidia Performance Primitives-based code [no]
+   --enable-mmal            enable Broadcom Multi-Media Abstraction Layer (Raspberry Pi) via MMAL [no]
++  --enable-rpi             enable other rpi specific stuff [no]
++  --enable-sand            enable sand video formats [rpi]
++  --enable-vout-drm        enable the vout_drm module - for internal testing only [no]
++  --enable-vout-egl        enable the vout_egl module - for internal testing only [no]
+   --disable-nvdec          disable Nvidia video decoding acceleration (via hwaccel) [autodetect]
+   --disable-nvenc          disable Nvidia video encoding code [autodetect]
+   --enable-omx             enable OpenMAX IL code [no]
+   --enable-omx-rpi         enable OpenMAX IL code for Raspberry Pi [no]
+   --enable-rkmpp           enable Rockchip Media Process Platform code [no]
+   --disable-v4l2-m2m       disable V4L2 mem2mem code [autodetect]
++  --enable-v4l2-request    enable V4L2 request API code [no]
+   --disable-vaapi          disable Video Acceleration API (mainly Unix/Intel) code [autodetect]
+   --disable-vdpau          disable Nvidia Video Decode and Presentation API for Unix code [autodetect]
+   --disable-videotoolbox   disable VideoToolbox code [autodetect]
+@@ -1771,6 +1777,7 @@ EXTERNAL_LIBRARY_LIST="
+     libdav1d
+     libdc1394
+     libdrm
++    epoxy
+     libflite
+     libfontconfig
+     libfreetype
+@@ -1807,6 +1814,7 @@ EXTERNAL_LIBRARY_LIST="
+     libtesseract
+     libtheora
+     libtwolame
++    libudev
+     libv4l2
+     libvorbis
+     libvpx
+@@ -1861,7 +1869,10 @@ HWACCEL_LIBRARY_LIST="
+     mmal
+     omx
+     opencl
++    v4l2_request
+     vulkan
++    rpi4_8
++    rpi4_10
+ "
+
+ DOCUMENT_LIST="
+@@ -1877,12 +1888,16 @@ FEATURE_LIST="
+     gray
+     hardcoded_tables
+     omx_rpi
++    rpi
+     runtime_cpudetect
+     safe_bitstream_reader
++    sand
+     shared
+     small
+     static
+     swscale_alpha
++    vout_drm
++    vout_egl
+ "
+
+ # this list should be kept in linking order
+@@ -1923,6 +1938,7 @@ SUBSYSTEM_LIST="
+     pixelutils
+     network
+     rdft
++    rpi
+ "
+
+ # COMPONENT_LIST needs to come last to ensure correct dependency checking
+@@ -2405,9 +2421,11 @@ CONFIG_EXTRA="
+     rangecoder
+     riffdec
+     riffenc
++    rpi
+     rtpdec
+     rtpenc_chain
+     rv34dsp
++    sand
+     scene_sad
+     sinewin
+     snappy
+@@ -2737,6 +2755,8 @@ hap_decoder_select="snappy texturedsp"
+ hap_encoder_deps="libsnappy"
+ hap_encoder_select="texturedspenc"
+ hevc_decoder_select="bswapdsp cabac golomb hevcparse videodsp"
++hevc_rpi_decoder_deps="rpi"
++hevc_rpi_decoder_select="hevc_decoder sand"
+ huffyuv_decoder_select="bswapdsp huffyuvdsp llviddsp"
+ huffyuv_encoder_select="bswapdsp huffman huffyuvencdsp llvidencdsp"
+ hymt_decoder_select="huffyuv_decoder"
+@@ -2903,6 +2923,7 @@ d3d11va_deps="dxva_h ID3D11VideoDecoder
+ dxva2_deps="dxva2api_h DXVA2_ConfigPictureDecode ole32 user32"
+ ffnvcodec_deps_any="libdl LoadLibrary"
+ nvdec_deps="ffnvcodec"
++v4l2_request_deps="linux_videodev2_h linux_media_h v4l2_timeval_to_ns libdrm libudev"
+ vaapi_x11_deps="xlib"
+ videotoolbox_hwaccel_deps="videotoolbox pthreads"
+ videotoolbox_hwaccel_extralibs="-framework QuartzCore"
+@@ -2934,6 +2955,12 @@ hevc_dxva2_hwaccel_deps="dxva2 DXVA_PicP
+ hevc_dxva2_hwaccel_select="hevc_decoder"
+ hevc_nvdec_hwaccel_deps="nvdec"
+ hevc_nvdec_hwaccel_select="hevc_decoder"
++hevc_v4l2request_hwaccel_deps="v4l2_request"
++hevc_v4l2request_hwaccel_select="hevc_decoder"
++hevc_rpi4_10_hwaccel_deps="rpi"
++hevc_rpi4_10_hwaccel_select="hevc_decoder"
++hevc_rpi4_8_hwaccel_deps="rpi"
++hevc_rpi4_8_hwaccel_select="hevc_decoder"
+ hevc_vaapi_hwaccel_deps="vaapi VAPictureParameterBufferHEVC"
+ hevc_vaapi_hwaccel_select="hevc_decoder"
+ hevc_vdpau_hwaccel_deps="vdpau VdpPictureInfoHEVC"
+@@ -3401,8 +3428,14 @@ sndio_indev_deps="sndio"
+ sndio_outdev_deps="sndio"
+ v4l2_indev_deps_any="linux_videodev2_h sys_videoio_h"
+ v4l2_indev_suggest="libv4l2"
++v4l2_outdev_deps="libdrm"
+ v4l2_outdev_deps_any="linux_videodev2_h sys_videoio_h"
+ v4l2_outdev_suggest="libv4l2"
++vout_drm_outdev_deps="libdrm vout_drm"
++vout_egl_outdev_deps="xlib"
++vout_egl_outdev_select="epoxy"
++vout_rpi_outdev_deps="rpi"
++vout_rpi_outdev_select="sand"
+ vfwcap_indev_deps="vfw32 vfwcap_defines"
+ xcbgrab_indev_deps="libxcb"
+ xcbgrab_indev_suggest="libxcb_shm libxcb_shape libxcb_xfixes"
+@@ -3618,6 +3651,7 @@ tonemap_vaapi_filter_deps="vaapi VAProcF
+ tonemap_opencl_filter_deps="opencl const_nan"
+ transpose_opencl_filter_deps="opencl"
+ transpose_vaapi_filter_deps="vaapi VAProcPipelineCaps_rotation_flags"
++unsand_filter_select="sand"
+ unsharp_opencl_filter_deps="opencl"
+ uspp_filter_deps="gpl avcodec"
+ vaguedenoiser_filter_deps="gpl"
+@@ -6299,6 +6333,7 @@ enabled libdav1d          && require_pkg
+ enabled libdavs2          && require_pkg_config libdavs2 "davs2 >= 1.6.0" davs2.h davs2_decoder_open
+ enabled libdc1394         && require_pkg_config libdc1394 libdc1394-2 dc1394/dc1394.h dc1394_new
+ enabled libdrm            && require_pkg_config libdrm libdrm xf86drm.h drmGetVersion
++enabled epoxy             && require_pkg_config epoxy epoxy epoxy/egl.h epoxy_egl_version
+ enabled libfdk_aac        && { check_pkg_config libfdk_aac fdk-aac "fdk-aac/aacenc_lib.h" aacEncOpen ||
+                                { require libfdk_aac fdk-aac/aacenc_lib.h aacEncOpen -lfdk-aac &&
+                                  warn "using libfdk without pkg-config"; } }
+@@ -6376,6 +6411,7 @@ enabled libtls            && require_pkg
+ enabled libtwolame        && require libtwolame twolame.h twolame_init -ltwolame &&
+                              { check_lib libtwolame twolame.h twolame_encode_buffer_float32_interleaved -ltwolame ||
+                                die "ERROR: libtwolame must be installed and version must be >= 0.3.10"; }
++enabled libudev           && require_pkg_config libudev libudev libudev.h udev_new
+ enabled libv4l2           && require_pkg_config libv4l2 libv4l2 libv4l2.h v4l2_ioctl
+ enabled libvidstab        && require_pkg_config libvidstab "vidstab >= 0.98" vid.stab/libvidstab.h vsMotionDetectInit
+ enabled libvmaf           && require_pkg_config libvmaf "libvmaf >= 1.3.9" libvmaf.h compute_vmaf
+@@ -6430,11 +6466,12 @@ enabled mbedtls           && { check_pkg
+                                check_lib mbedtls mbedtls/ssl.h mbedtls_ssl_init -lmbedtls -lmbedx509 -lmbedcrypto ||
+                                die "ERROR: mbedTLS not found"; }
+ enabled mediacodec        && { enabled jni || die "ERROR: mediacodec requires --enable-jni"; }
+-enabled mmal              && { check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ||
++( enabled rpi ||
++  enabled mmal )          && { check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ||
+                                { ! enabled cross_compile &&
+                                  add_cflags -isystem/opt/vc/include/ -isystem/opt/vc/include/interface/vmcs_host/linux -isystem/opt/vc/include/interface/vcos/pthreads -fgnu89-inline &&
+                                  add_ldflags -L/opt/vc/lib/ &&
+-                                 check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host; } ||
++                                 check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host -lvcos -lvcsm -lvchostif -lvchiq_arm; } ||
+                                die "ERROR: mmal not found" &&
+                                check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS"; }
+ enabled openal            && { { for al_extralibs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
+@@ -6475,6 +6512,10 @@ enabled rkmpp             && { require_p
+                                { enabled libdrm ||
+                                  die "ERROR: rkmpp requires --enable-libdrm"; }
+                              }
++enabled v4l2_request      && { enabled libdrm ||
++                               die "ERROR: v4l2-request requires --enable-libdrm"; } &&
++                             { enabled libudev ||
++                               die "ERROR: v4l2-request requires --enable-libudev"; }
+ enabled vapoursynth       && require_pkg_config vapoursynth "vapoursynth-script >= 42" VSScript.h vsscript_init
+
+
+@@ -6556,6 +6597,8 @@ if enabled v4l2_m2m; then
+     check_cc vp9_v4l2_m2m linux/videodev2.h "int i = V4L2_PIX_FMT_VP9;"
+ fi
+
++check_func_headers "linux/media.h linux/videodev2.h" v4l2_timeval_to_ns
++check_cc hevc_v4l2_request linux/videodev2.h "int i = V4L2_PIX_FMT_HEVC_SLICE;"
+ check_headers sys/videoio.h
+ test_code cc sys/videoio.h "struct v4l2_frmsizeenum vfse; vfse.discrete.width = 0;" && enable_sanitized struct_v4l2_frmivalenum_discrete
+
+--- a/fftools/ffmpeg.c
++++ b/fftools/ffmpeg.c
+@@ -2119,8 +2119,8 @@ static int ifilter_send_frame(InputFilte
+                        ifilter->channel_layout != frame->channel_layout;
+         break;
+     case AVMEDIA_TYPE_VIDEO:
+-        need_reinit |= ifilter->width  != frame->width ||
+-                       ifilter->height != frame->height;
++        need_reinit |= ifilter->width  != av_frame_cropped_width(frame) ||
++                       ifilter->height != av_frame_cropped_height(frame);
+         break;
+     }
+
+@@ -2131,6 +2131,9 @@ static int ifilter_send_frame(InputFilte
+         (ifilter->hw_frames_ctx && ifilter->hw_frames_ctx->data != frame->hw_frames_ctx->data))
+         need_reinit = 1;
+
++    if (no_cvt_hw && fg->graph)
++        need_reinit = 0;
++
+     if (need_reinit) {
+         ret = ifilter_parameters_from_frame(ifilter, frame);
+         if (ret < 0)
+@@ -2401,8 +2404,7 @@ static int decode_video(InputStream *ist
+         decoded_frame->top_field_first = ist->top_field_first;
+
+     ist->frames_decoded++;
+-
+-    if (ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
++    if (!no_cvt_hw && ist->hwaccel_retrieve_data && decoded_frame->format == ist->hwaccel_pix_fmt) {
+         err = ist->hwaccel_retrieve_data(ist->dec_ctx, decoded_frame);
+         if (err < 0)
+             goto fail;
+@@ -2820,6 +2822,16 @@ static enum AVPixelFormat get_format(AVC
+         } else {
+             const HWAccel *hwaccel = NULL;
+             int i;
++
++            if (no_cvt_hw) {
++                config = avcodec_get_hw_config(s->codec, 0);
++                if (config->methods == AV_CODEC_HW_CONFIG_METHOD_INTERNAL) {
++                    av_log(s, AV_LOG_DEBUG, "no_cvt_hw so accepting pix_fmt %d with codec internal hwaccel\n", *p);
++                    ist->hwaccel_pix_fmt = *p;
++                    break;
++                }
++            }
++
+             for (i = 0; hwaccels[i].name; i++) {
+                 if (hwaccels[i].pix_fmt == *p) {
+                     hwaccel = &hwaccels[i];
+@@ -2914,6 +2926,15 @@ static int init_input_stream(int ist_ind
+             return ret;
+         }
+
++#if CONFIG_HEVC_RPI_DECODER
++        ret = -1;
++        if (strcmp(codec->name, "hevc_rpi") == 0 &&
++            (ret = avcodec_open2(ist->dec_ctx, codec, &ist->decoder_opts)) < 0) {
++            ist->dec = codec = avcodec_find_decoder_by_name("hevc");
++            av_log(NULL, AV_LOG_INFO, "Failed to open hevc_rpi - trying hevc\n");
++        }
++        if (ret < 0)
++#endif
+         if ((ret = avcodec_open2(ist->dec_ctx, codec, &ist->decoder_opts)) < 0) {
+             if (ret == AVERROR_EXPERIMENTAL)
+                 abort_codec_experimental(codec, 0);
+--- a/fftools/ffmpeg.h
++++ b/fftools/ffmpeg.h
+@@ -61,6 +61,7 @@ enum HWAccelID {
+     HWACCEL_GENERIC,
+     HWACCEL_VIDEOTOOLBOX,
+     HWACCEL_QSV,
++    HWACCEL_RPI,
+ };
+
+ typedef struct HWAccel {
+@@ -590,6 +591,7 @@ extern int video_sync_method;
+ extern float frame_drop_threshold;
+ extern int do_benchmark;
+ extern int do_benchmark_all;
++extern int no_cvt_hw;
+ extern int do_deinterlace;
+ extern int do_hex_dump;
+ extern int do_pkt_dump;
+--- a/fftools/ffmpeg_filter.c
++++ b/fftools/ffmpeg_filter.c
+@@ -1186,8 +1186,8 @@ int ifilter_parameters_from_frame(InputF
+
+     ifilter->format = frame->format;
+
+-    ifilter->width               = frame->width;
+-    ifilter->height              = frame->height;
++    ifilter->width               = av_frame_cropped_width(frame);
++    ifilter->height              = av_frame_cropped_height(frame);
+     ifilter->sample_aspect_ratio = frame->sample_aspect_ratio;
+
+     ifilter->sample_rate         = frame->sample_rate;
+--- a/fftools/ffmpeg_hw.c
++++ b/fftools/ffmpeg_hw.c
+@@ -75,6 +75,8 @@ static char *hw_device_default_name(enum
+     char *name;
+     size_t index_pos;
+     int index, index_limit = 1000;
++    if (!type_name)
++        return NULL;
+     index_pos = strlen(type_name);
+     name = av_malloc(index_pos + 4);
+     if (!name)
+--- a/fftools/ffmpeg_opt.c
++++ b/fftools/ffmpeg_opt.c
+@@ -130,6 +130,12 @@ static const char *opt_name_enc_time_bas
+     }\
+ }
+
++#if CONFIG_RPI
++static int rpi_init(AVCodecContext *avctx) {
++    return 0;
++}
++#endif
++
+ const HWAccel hwaccels[] = {
+ #if CONFIG_VIDEOTOOLBOX
+     { "videotoolbox", videotoolbox_init, HWACCEL_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX },
+@@ -137,6 +143,10 @@ const HWAccel hwaccels[] = {
+ #if CONFIG_LIBMFX
+     { "qsv",   qsv_init,   HWACCEL_QSV,   AV_PIX_FMT_QSV },
+ #endif
++#if CONFIG_RPI
++    {  "rpi", rpi_init, HWACCEL_RPI, AV_PIX_FMT_RPI4_8 },
++    {  "rpi", rpi_init, HWACCEL_RPI, AV_PIX_FMT_RPI4_10 },
++#endif
+     { 0 },
+ };
+ HWDevice *filter_hw_device;
+@@ -155,6 +165,7 @@ float frame_drop_threshold = 0;
+ int do_deinterlace    = 0;
+ int do_benchmark      = 0;
+ int do_benchmark_all  = 0;
++int no_cvt_hw         = 0;
+ int do_hex_dump       = 0;
+ int do_pkt_dump       = 0;
+ int copy_ts           = 0;
+@@ -3460,6 +3471,8 @@ const OptionDef options[] = {
+         "add timings for benchmarking" },
+     { "benchmark_all",  OPT_BOOL | OPT_EXPERT,                       { &do_benchmark_all },
+       "add timings for each task" },
++    { "no_cvt_hw",      OPT_BOOL | OPT_EXPERT,                       { &no_cvt_hw },
++      "do not auto-convert hw frames to sw" },
+     { "progress",       HAS_ARG | OPT_EXPERT,                        { .func_arg = opt_progress },
+       "write program-readable progress information", "url" },
+     { "stdin",          OPT_BOOL | OPT_EXPERT,                       { &stdin_interaction },
+--- a/libavcodec/Makefile
++++ b/libavcodec/Makefile
+@@ -19,6 +19,7 @@ HEADERS = ac3_parser.h
+           mediacodec.h                                                  \
+           packet.h                                                      \
+           qsv.h                                                         \
++          rpi_zc.h                                                      \
+           vaapi.h                                                       \
+           vdpau.h                                                       \
+           version.h                                                     \
+@@ -138,6 +139,7 @@ OBJS-$(CONFIG_QSVDEC)                  +
+ OBJS-$(CONFIG_QSVENC)                  += qsvenc.o
+ OBJS-$(CONFIG_RANGECODER)              += rangecoder.o
+ OBJS-$(CONFIG_RDFT)                    += rdft.o
++OBJS-$(CONFIG_RPI)                     += rpi_qpu.o rpi_mailbox.o rpi_zc.o
+ OBJS-$(CONFIG_RV34DSP)                 += rv34dsp.o
+ OBJS-$(CONFIG_SHARED)                  += log2_tab.o reverse.o
+ OBJS-$(CONFIG_SINEWIN)                 += sinewin.o sinewin_fixed.o
+@@ -152,7 +154,10 @@ OBJS-$(CONFIG_VIDEODSP)                +
+ OBJS-$(CONFIG_VP3DSP)                  += vp3dsp.o
+ OBJS-$(CONFIG_VP56DSP)                 += vp56dsp.o
+ OBJS-$(CONFIG_VP8DSP)                  += vp8dsp.o
+-OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o
++OBJS-$(CONFIG_V4L2_M2M)                += v4l2_m2m.o v4l2_context.o v4l2_buffers.o v4l2_fmt.o\
++                                          weak_link.o
++OBJS-$(CONFIG_V4L2_REQUEST)            += v4l2_req_media.o v4l2_req_pollqueue.o v4l2_req_dmabufs.o\
++					  v4l2_req_devscan.o weak_link.o
+ OBJS-$(CONFIG_WMA_FREQS)               += wma_freqs.o
+ OBJS-$(CONFIG_WMV2DSP)                 += wmv2dsp.o
+
+@@ -391,6 +396,14 @@ OBJS-$(CONFIG_HEVC_QSV_DECODER)        +
+ OBJS-$(CONFIG_HEVC_QSV_ENCODER)        += qsvenc_hevc.o hevc_ps_enc.o       \
+                                           hevc_data.o
+ OBJS-$(CONFIG_HEVC_RKMPP_DECODER)      += rkmppdec.o
++OBJS-$(CONFIG_RPI)                     += rpi_mem.o \
++                                          rpi_mailbox.o rpi_zc.o
++OBJS-$(CONFIG_HEVC_RPI_DECODER)        += rpi_hevcdec.o rpi_hevc_mvs.o \
++                                          rpi_hevc_cabac.o rpi_hevc_refs.o rpi_hevcpred.o    \
++                                          rpi_hevcdsp.o rpi_hevc_filter.o rpi_hevc_data.o    \
++                                          rpi_hevc_shader.o rpi_hevc_shader_template.o       \
++                                          rpi_hevc_parse.o h2645_parse.o rpi_hevc_ps.o \
++                                          rpi_hevc_sei.o rpi_hevc_data.o rpi_qpu.o rpi_mem.o
+ OBJS-$(CONFIG_HEVC_VAAPI_ENCODER)      += vaapi_encode_h265.o h265_profile_level.o
+ OBJS-$(CONFIG_HEVC_V4L2M2M_DECODER)    += v4l2_m2m_dec.o
+ OBJS-$(CONFIG_HEVC_V4L2M2M_ENCODER)    += v4l2_m2m_enc.o
+@@ -909,6 +922,10 @@ OBJS-$(CONFIG_HEVC_D3D11VA_HWACCEL)
+ OBJS-$(CONFIG_HEVC_DXVA2_HWACCEL)         += dxva2_hevc.o
+ OBJS-$(CONFIG_HEVC_NVDEC_HWACCEL)         += nvdec_hevc.o
+ OBJS-$(CONFIG_HEVC_QSV_HWACCEL)           += qsvdec_h2645.o
++OBJS-$(CONFIG_HEVC_RPI4_8_HWACCEL)        += rpivid_hevc.o
++OBJS-$(CONFIG_HEVC_RPI4_10_HWACCEL)       += rpivid_hevc.o
++OBJS-$(CONFIG_HEVC_V4L2REQUEST_HWACCEL)   += v4l2_request_hevc.o v4l2_req_decode_q.o\
++                                             v4l2_req_hevc_v1.o v4l2_req_hevc_v2.o
+ OBJS-$(CONFIG_HEVC_VAAPI_HWACCEL)         += vaapi_hevc.o h265_profile_level.o
+ OBJS-$(CONFIG_HEVC_VDPAU_HWACCEL)         += vdpau_hevc.o
+ OBJS-$(CONFIG_MJPEG_NVDEC_HWACCEL)        += nvdec_mjpeg.o
+@@ -1261,3 +1278,31 @@ $(SUBDIR)qdm2.o: $(SUBDIR)qdm2_tables.h
+ $(SUBDIR)sinewin.o: $(SUBDIR)sinewin_tables.h
+ $(SUBDIR)sinewin_fixed.o: $(SUBDIR)sinewin_fixed_tables.h
+ endif
++
++ifdef CONFIG_HEVC_RPI_DECODER
++QASM_PY := ../local/bin/qasm.py
++VASMVIDCORE := ../local/bin/vasmvidcore_std
++
++ifneq ("$(wildcard $(QASM_PY))","")
++$(SUBDIR)rpi_hevc_shader.c: $(SUBDIR)rpi_hevc_shader.qasm
++	$(QASM_PY) -mc_c:rpi_hevc_shader,rpi_hevc_shader,ff_hevc_rpi_shader $< > $@
++
++$(SUBDIR)rpi_hevc_shader.h: $(SUBDIR)rpi_hevc_shader.qasm
++	$(QASM_PY) -mc_h:rpi_hevc_shader,rpi_hevc_shader,ff_hevc_rpi_shader $< > $@
++endif
++
++ifneq ("$(wildcard $(VASMVIDCORE))","")
++$(SUBDIR)rpi_hevc_transform8.bin: $(SUBDIR)rpi_hevc_transform.s
++	$(VASMVIDCORE) -Fbin -DBIT_DEPTH=8 $< -o $@
++$(SUBDIR)rpi_hevc_transform10.bin: $(SUBDIR)rpi_hevc_transform.s
++	$(VASMVIDCORE) -Fbin -DBIT_DEPTH=10 $< -o $@
++
++$(SUBDIR)rpi_hevc_transform8.h: $(SUBDIR)rpi_hevc_transform8.bin
++	python pi-util/make_array.py $<
++$(SUBDIR)rpi_hevc_transform10.h: $(SUBDIR)rpi_hevc_transform10.bin
++	python pi-util/make_array.py $<
++endif
++
++$(SUBDIR)rpi_qpu.o: $(SUBDIR)rpi_hevc_transform8.h $(SUBDIR)rpi_hevc_transform10.h
++$(SUBDIR)rpi_hevcdec.o $(SUBDIR)rpi_shader_template.o $(SUBDIR)rpi_qpu.o: $(SUBDIR)rpi_hevc_shader.h
++endif
+--- a/libavcodec/allcodecs.c
++++ b/libavcodec/allcodecs.c
+@@ -149,6 +149,7 @@ extern AVCodec ff_hap_decoder;
+ extern AVCodec ff_hevc_decoder;
+ extern AVCodec ff_hevc_qsv_decoder;
+ extern AVCodec ff_hevc_rkmpp_decoder;
++extern AVCodec ff_hevc_rpi_decoder;
+ extern AVCodec ff_hevc_v4l2m2m_decoder;
+ extern AVCodec ff_hnm4_video_decoder;
+ extern AVCodec ff_hq_hqa_decoder;
+@@ -890,6 +891,41 @@ static enum AVCodecID remap_deprecated_c
+     }
+ }
+
++static int codec_supports_format(const AVCodec * const p, const enum AVPixelFormat fmt)
++{
++    const enum AVPixelFormat *pf = p->pix_fmts;
++
++    // Assume good if we lack info
++    if (pf == NULL)
++        return 1;
++    if (fmt == AV_PIX_FMT_NONE)
++        return 0;
++
++    for (; *pf != AV_PIX_FMT_NONE; ++pf) {
++        if (*pf == fmt)
++            return 1;
++    }
++    return 0;
++}
++
++AVCodec *avcodec_find_decoder_by_id_and_fmt(enum AVCodecID id, enum AVPixelFormat fmt)
++{
++    const AVCodec *p, *experimental = NULL;
++    void *i = 0;
++
++    id= remap_deprecated_codec_id(id);
++    while ((p = av_codec_iterate(&i))) {
++        if (av_codec_is_decoder(p) && p->id == id && codec_supports_format(p, fmt)) {
++            if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
++                experimental = p;
++            } else
++                return (AVCodec *)p;
++        }
++        p = p->next;
++    }
++    return (AVCodec *)experimental;
++}
++
+ static AVCodec *find_codec(enum AVCodecID id, int (*x)(const AVCodec *))
+ {
+     const AVCodec *p, *experimental = NULL;
+--- a/libavcodec/arm/Makefile
++++ b/libavcodec/arm/Makefile
+@@ -40,6 +40,8 @@ OBJS-$(CONFIG_AAC_DECODER)             +
+                                           arm/sbrdsp_init_arm.o
+ OBJS-$(CONFIG_DCA_DECODER)             += arm/synth_filter_init_arm.o
+ OBJS-$(CONFIG_HEVC_DECODER)            += arm/hevcdsp_init_arm.o
++OBJS-$(CONFIG_HEVC_RPI_DECODER)        += arm/rpi_hevcdsp_init_arm.o    \
++                                          arm/rpi_hevcpred_init_arm.o
+ OBJS-$(CONFIG_MLP_DECODER)             += arm/mlpdsp_init_arm.o
+ OBJS-$(CONFIG_RV40_DECODER)            += arm/rv40dsp_init_arm.o
+ OBJS-$(CONFIG_SBC_ENCODER)             += arm/sbcdsp_init_arm.o
+@@ -140,10 +142,24 @@ NEON-OBJS-$(CONFIG_AAC_DECODER)        +
+ NEON-OBJS-$(CONFIG_LLAUDDSP)           += arm/lossless_audiodsp_neon.o
+ NEON-OBJS-$(CONFIG_DCA_DECODER)        += arm/synth_filter_neon.o
+ NEON-OBJS-$(CONFIG_HEVC_DECODER)       += arm/hevcdsp_init_neon.o       \
++                                          arm/hevcdsp_idct_neon.o    \
+                                           arm/hevcdsp_deblock_neon.o    \
+                                           arm/hevcdsp_idct_neon.o       \
+                                           arm/hevcdsp_qpel_neon.o       \
+                                           arm/hevcdsp_sao_neon.o
++NEON-OBJS-$(CONFIG_HEVC_RPI_DECODER)   += arm/rpi_hevcdsp_init_neon.o    \
++                                          arm/rpi_hevc_misc_neon.o       \
++                                          arm/rpi_hevcdsp_deblock_neon.o \
++                                          arm/rpi_hevcdsp_idct_neon.o    \
++                                          arm/rpi_hevcdsp_res8_neon.o    \
++                                          arm/rpi_hevcdsp_res16_neon.o   \
++                                          arm/rpi_hevcdsp_sao_neon.o     \
++                                          arm/rpi_hevcpred_init_neon.o   \
++                                          arm/rpi_hevcpred_intra_angular_neon.o \
++                                          arm/rpi_hevcpred_intra_dc_neon.o \
++                                          arm/rpi_hevcpred_intra_filter_neon.o \
++                                          arm/rpi_hevcpred_intra_hv_neon.o \
++                                          arm/rpi_hevcpred_intra_planar_neon.o
+ NEON-OBJS-$(CONFIG_RV30_DECODER)       += arm/rv34dsp_neon.o
+ NEON-OBJS-$(CONFIG_RV40_DECODER)       += arm/rv34dsp_neon.o            \
+                                           arm/rv40dsp_neon.o
+--- a/libavcodec/arm/cabac.h
++++ b/libavcodec/arm/cabac.h
+@@ -26,83 +26,209 @@
+ #include "libavutil/internal.h"
+ #include "libavcodec/cabac.h"
+
++
+ #define get_cabac_inline get_cabac_inline_arm
+ static av_always_inline int get_cabac_inline_arm(CABACContext *c,
+-                                                 uint8_t *const state)
++                                                 uint8_t *state)
+ {
+-    int bit;
+-    void *reg_b, *reg_c, *tmp;
++    const uint8_t *mlps_tables = ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128;
++    int bit, ptr, low, tmp1, tmp2;
++    __asm__ volatile (
++        "ldr     %[bit], [%[c], %[range_off]]             \n\t"
++        "ldrb    %[ptr], [%[state]]                       \n\t"
++        "sub     %[tmp1], %[mlps_tables], %[lps_off]      \n\t"
++        "and     %[tmp2], %[bit], #0xc0                   \n\t"
++        "add     %[tmp1], %[tmp1], %[ptr]                 \n\t"
++        "ldr     %[low], [%[c], %[low_off]]               \n\t"
++        "ldrb    %[tmp2], [%[tmp1], %[tmp2], lsl #1]      \n\t"
++        "sub     %[bit], %[bit], %[tmp2]                  \n\t"
++        "mov     %[tmp1], %[bit]                          \n\t"
++        "cmp     %[low], %[bit], lsl #17                  \n\t"
++        "itt     ge                                       \n\t"
++        "movge   %[tmp1], %[tmp2]                         \n\t"
++        "mvnge   %[ptr], %[ptr]                           \n\t"
++        "clz     %[tmp2], %[tmp1]                         \n\t"
++        "it      ge                                       \n\t"
++        "subge   %[low], %[low], %[bit], lsl #17          \n\t"
++        "sub     %[tmp2], %[tmp2], #23                    \n\t"
++        "and     %[bit], %[ptr], #1                       \n\t"
++        "ldrb    %[mlps_tables], [%[mlps_tables], %[ptr]] \n\t"
++        "lsl     %[low], %[low], %[tmp2]                  \n\t"
++        "lsls    %[ptr], %[low], #16                      \n\t"
++        "bne     1f                                       \n\t"
++        "ldr     %[ptr], [%[c], %[ptr_off]]               \n\t"
++        "lsl     %[tmp2], %[tmp1], %[tmp2]                \n\t"
++#if UNCHECKED_BITSTREAM_READER
++        "strb    %[mlps_tables], [%[state]]               \n\t"
++        "rbit    %[state], %[low]                         \n\t"
++        "ldrh    %[tmp1], [%[ptr]], #2                    \n\t"
++#else
++        "ldr     %[tmp1], [%[c], %[end_off]]              \n\t"
++        "strb    %[mlps_tables], [%[state]]               \n\t"
++        "rbit    %[state], %[low]                         \n\t"
++        "cmp     %[tmp1], %[ptr]                          \n\t"
++#if CONFIG_THUMB
++        "it      cs                                       \n\t"
++        "ldrhcs  %[tmp1], [%[ptr]], #2                    \n\t"
++#else
++        "ldrcsh  %[tmp1], [%[ptr]], #2                    \n\t"
++#endif
++#endif
++        "clz     %[state], %[state]                       \n\t"
++        "movw    %[mlps_tables], #0xffff                  \n\t"
++        "sub     %[state], %[state], #16                  \n\t"
++        "str     %[tmp2], [%[c], %[range_off]]            \n\t"
++        "rev     %[tmp1], %[tmp1]                         \n\t"
++        "str     %[ptr], [%[c], %[ptr_off]]               \n\t"
++        "lsr     %[tmp1], %[tmp1], #15                    \n\t"
++        "sub     %[tmp1], %[tmp1], %[mlps_tables]         \n\t"
++#if CONFIG_THUMB
++        "lsl     %[tmp1], %[tmp1], %[state]               \n\t"
++        "add     %[low], %[low], %[tmp1]                  \n\t"
++#else
++        "add     %[low], %[low], %[tmp1], lsl %[state]    \n\t"
++#endif
++        "str     %[low], [%[c], %[low_off]]               \n\t"
++        "b       2f                                       \n\t"
++        "1:                                               \n\t"
++        "strb    %[mlps_tables], [%[state]]               \n\t"
++        "lsl     %[tmp1], %[tmp1], %[tmp2]                \n\t"
++        "str     %[low], [%[c], %[low_off]]               \n\t"
++        "str     %[tmp1], [%[c], %[range_off]]            \n\t"
++        "2:                                               \n\t"
++    :  // Outputs
++             [state]"+r"(state),
++       [mlps_tables]"+r"(mlps_tables),
++               [bit]"=&r"(bit),
++               [ptr]"=&r"(ptr),
++               [low]"=&r"(low),
++              [tmp1]"=&r"(tmp1),
++              [tmp2]"=&r"(tmp2)
++    :  // Inputs
++               [c]"r"(c),
++         [low_off]"J"(offsetof(CABACContext, low)),
++       [range_off]"J"(offsetof(CABACContext, range)),
++         [ptr_off]"J"(offsetof(CABACContext, bytestream)),
++         [end_off]"J"(offsetof(CABACContext, bytestream_end)),
++         [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET)
++    :  // Clobbers
++       "cc", "memory"
++    );
++    return bit;
++}
+
+-    __asm__ volatile(
+-        "ldrb       %[bit]        , [%[state]]                  \n\t"
+-        "add        %[r_b]        , %[tables]   , %[lps_off]    \n\t"
+-        "mov        %[tmp]        , %[range]                    \n\t"
+-        "and        %[range]      , %[range]    , #0xC0         \n\t"
+-        "add        %[r_b]        , %[r_b]      , %[bit]        \n\t"
+-        "ldrb       %[range]      , [%[r_b], %[range], lsl #1]  \n\t"
+-        "add        %[r_b]        , %[tables]   , %[norm_off]   \n\t"
+-        "sub        %[r_c]        , %[tmp]      , %[range]      \n\t"
+-        "lsl        %[tmp]        , %[r_c]      , #17           \n\t"
+-        "cmp        %[tmp]        , %[low]                      \n\t"
+-        "it         gt                                          \n\t"
+-        "movgt      %[range]      , %[r_c]                      \n\t"
+-        "itt        cc                                          \n\t"
+-        "mvncc      %[bit]        , %[bit]                      \n\t"
+-        "subcc      %[low]        , %[low]      , %[tmp]        \n\t"
+-        "add        %[r_c]        , %[tables]   , %[mlps_off]   \n\t"
+-        "ldrb       %[tmp]        , [%[r_b], %[range]]          \n\t"
+-        "ldrb       %[r_b]        , [%[r_c], %[bit]]            \n\t"
+-        "lsl        %[low]        , %[low]      , %[tmp]        \n\t"
+-        "lsl        %[range]      , %[range]    , %[tmp]        \n\t"
+-        "uxth       %[r_c]        , %[low]                      \n\t"
+-        "strb       %[r_b]        , [%[state]]                  \n\t"
+-        "tst        %[r_c]        , %[r_c]                      \n\t"
+-        "bne        2f                                          \n\t"
+-        "ldr        %[r_c]        , [%[c], %[byte]]             \n\t"
++#define get_cabac_bypass get_cabac_bypass_arm
++static inline int get_cabac_bypass_arm(CABACContext * const c)
++{
++    uint32_t low = c->low, range, ptr, tmp;
++    int rv;
++    __asm volatile (
++        "ldr        %[range] , [%[c], %[range_off]] \n\t"
++        "mov        %[rv]    , #0                   \n\t"
++        "ldr        %[ptr]   , [%[c], %[ptr_off]]   \n\t"
++        "lsl        %[low]   , #1                   \n\t"
++#if !UNCHECKED_BITSTREAM_READER
++        "ldr        %[tmp]   , [%[c], %[end_off]]   \n\t"
++#endif
++        "cmp        %[low]   , %[range], lsl #17    \n\t"
++        "itt         cs                              \n\t"
++        "subcs      %[low]   , %[low], %[range], lsl #17 \n\t"
++        "movcs      %[rv]    , #1                   \n\t"
+ #if UNCHECKED_BITSTREAM_READER
+-        "ldrh       %[tmp]        , [%[r_c]]                    \n\t"
+-        "add        %[r_c]        , %[r_c]      , #2            \n\t"
+-        "str        %[r_c]        , [%[c], %[byte]]             \n\t"
+-#else
+-        "ldr        %[r_b]        , [%[c], %[end]]              \n\t"
+-        "ldrh       %[tmp]        , [%[r_c]]                    \n\t"
+-        "cmp        %[r_c]        , %[r_b]                      \n\t"
+-        "itt        lt                                          \n\t"
+-        "addlt      %[r_c]        , %[r_c]      , #2            \n\t"
+-        "strlt      %[r_c]        , [%[c], %[byte]]             \n\t"
+-#endif
+-        "sub        %[r_c]        , %[low]      , #1            \n\t"
+-        "add        %[r_b]        , %[tables]   , %[norm_off]   \n\t"
+-        "eor        %[r_c]        , %[low]      , %[r_c]        \n\t"
+-        "rev        %[tmp]        , %[tmp]                      \n\t"
+-        "lsr        %[r_c]        , %[r_c]      , #15           \n\t"
+-        "lsr        %[tmp]        , %[tmp]      , #15           \n\t"
+-        "ldrb       %[r_c]        , [%[r_b], %[r_c]]            \n\t"
+-        "movw       %[r_b]        , #0xFFFF                     \n\t"
+-        "sub        %[tmp]        , %[tmp]      , %[r_b]        \n\t"
+-        "rsb        %[r_c]        , %[r_c]      , #7            \n\t"
+-        "lsl        %[tmp]        , %[tmp]      , %[r_c]        \n\t"
+-        "add        %[low]        , %[low]      , %[tmp]        \n\t"
+-        "2:                                                     \n\t"
+-        :    [bit]"=&r"(bit),
+-             [low]"+&r"(c->low),
+-           [range]"+&r"(c->range),
+-             [r_b]"=&r"(reg_b),
+-             [r_c]"=&r"(reg_c),
+-             [tmp]"=&r"(tmp)
+-        :        [c]"r"(c),
+-             [state]"r"(state),
+-            [tables]"r"(ff_h264_cabac_tables),
+-              [byte]"M"(offsetof(CABACContext, bytestream)),
+-               [end]"M"(offsetof(CABACContext, bytestream_end)),
+-          [norm_off]"I"(H264_NORM_SHIFT_OFFSET),
+-           [lps_off]"I"(H264_LPS_RANGE_OFFSET),
+-          [mlps_off]"I"(H264_MLPS_STATE_OFFSET + 128)
+-        : "memory", "cc"
+-        );
++        "ldrh       %[tmp]   , [%[ptr]], #2         \n\t"
++#else
++        "cmp        %[tmp]   , %[ptr]               \n\t"
++#if CONFIG_THUMB
++        "it         cs                              \n\t"
++        "ldrhcs     %[tmp]   , [%[ptr]], #2         \n\t"
++#else
++        "ldrcsh     %[tmp]   , [%[ptr]], #2         \n\t"
++#endif
++#endif
++        "lsls       %[range] , %[low], #16          \n\t"
++        "bne        1f                              \n\t"
+
+-    return bit & 1;
++        "str        %[ptr]   , [%[c], %[ptr_off]]   \n\t"
++        "rev        %[tmp]   , %[tmp]               \n\t"
++        "add        %[low]   , %[low], %[tmp], lsr #15 \n\t"
++        "movw       %[tmp]   , 0xFFFF               \n\t"
++        "sub        %[low]   , %[tmp]               \n\t"
++        "1:                                         \n\t"
++        "str        %[low]   , [%[c], %[low_off]]   \n\t"
++        : // Outputs
++               [rv]"=&r"(rv),
++              [low]"+r"(low),
++            [range]"=&r"(range),
++              [ptr]"=&r"(ptr),
++              [tmp]"=&r"(tmp)
++        : // Inputs
++                    [c]"r"(c),
++              [low_off]"J"(offsetof(CABACContext, low)),
++            [range_off]"J"(offsetof(CABACContext, range)),
++              [ptr_off]"J"(offsetof(CABACContext, bytestream)),
++              [end_off]"J"(offsetof(CABACContext, bytestream_end))
++        : // Clobbers
++            "memory", "cc"
++    );
++    return rv;
+ }
++
++
++#define get_cabac_bypass_sign get_cabac_bypass_sign_arm
++static inline int get_cabac_bypass_sign_arm(CABACContext * const c, int rv)
++{
++    uint32_t low = c->low, range, ptr, tmp;
++    __asm volatile (
++        "ldr        %[range] , [%[c], %[range_off]] \n\t"
++        "ldr        %[ptr]   , [%[c], %[ptr_off]]   \n\t"
++        "lsl        %[low]   , #1                   \n\t"
++#if !UNCHECKED_BITSTREAM_READER
++        "ldr        %[tmp]   , [%[c], %[end_off]]   \n\t"
++#endif
++        "cmp        %[low]   , %[range], lsl #17    \n\t"
++        "it         cs                              \n\t"
++        "subcs      %[low]   , %[low], %[range], lsl #17 \n\t"
++        "it         cc                              \n\t"
++        "rsbcc      %[rv]    , %[rv], #0            \n\t"
++#if UNCHECKED_BITSTREAM_READER
++        "ldrh       %[tmp]   , [%[ptr]], #2         \n\t"
++#else
++        "cmp        %[tmp]   , %[ptr]               \n\t"
++#if CONFIG_THUMB
++        "it         cs                              \n\t"
++        "ldrhcs     %[tmp]   , [%[ptr]], #2         \n\t"
++#else
++        "ldrcsh     %[tmp]   , [%[ptr]], #2         \n\t"
++#endif
++#endif
++        "lsls       %[range] , %[low], #16          \n\t"
++        "bne        1f                              \n\t"
++
++        "str        %[ptr]   , [%[c], %[ptr_off]]   \n\t"
++        "rev        %[tmp]   , %[tmp]               \n\t"
++        "add        %[low]   , %[low], %[tmp], lsr #15 \n\t"
++        "movw       %[tmp]   , 0xFFFF               \n\t"
++        "sub        %[low]   , %[tmp]               \n\t"
++        "1:                                         \n\t"
++        "str        %[low]   , [%[c], %[low_off]]   \n\t"
++        : // Outputs
++               [rv]"+r"(rv),
++              [low]"+r"(low),
++            [range]"=&r"(range),
++              [ptr]"=&r"(ptr),
++              [tmp]"=&r"(tmp)
++        : // Inputs
++                    [c]"r"(c),
++              [low_off]"J"(offsetof(CABACContext, low)),
++            [range_off]"J"(offsetof(CABACContext, range)),
++              [ptr_off]"J"(offsetof(CABACContext, bytestream)),
++              [end_off]"J"(offsetof(CABACContext, bytestream_end))
++        : // Clobbers
++            "memory", "cc"
++    );
++    return rv;
++}
++
+ #endif /* HAVE_ARMV6T2_INLINE */
+
+ #endif /* AVCODEC_ARM_CABAC_H */
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevc_cabac.h
+@@ -0,0 +1,607 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * Copyright (C) 2018 John Cox, Ben Avison for Raspberry Pi (Trading)
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_ARM_HEVC_CABAC_H
++#define AVCODEC_ARM_HEVC_CABAC_H
++
++#include "config.h"
++#if HAVE_ARMV6T2_INLINE
++
++#define hevc_mem_bits32 hevc_mem_bits32_arm
++static inline uint32_t hevc_mem_bits32_arm(const void * p, const unsigned int bits)
++{
++    unsigned int n;
++    __asm__ (
++        "rev        %[n], %[x]                     \n\t"
++        : [n]"=r"(n)
++        : [x]"r"(*(const uint32_t *)((const uint8_t *)p + (bits >> 3)))
++        :
++        );
++    return n << (bits & 7);
++}
++
++
++// ---------------------------------------------------------------------------
++//
++// Helper fns - little bits of code where ARM has an instraction that the
++// compiler doesn't know about / use
++
++#define trans_scale_sat trans_scale_sat_arm
++static inline int trans_scale_sat_arm(const int level, const unsigned int scale, const unsigned int scale_m, const unsigned int shift)
++{
++    int rv;
++    int t = ((level * (int)(scale * scale_m)) >> shift) + 1;
++
++    __asm__ (
++    "ssat %[rv], #16, %[t], ASR #1 \n\t"
++    : [rv]"=r"(rv)
++    : [t]"r"(t)
++    :
++    );
++    return rv;
++}
++
++#define update_rice update_rice_arm
++static inline void update_rice_arm(uint8_t * const stat_coeff,
++    const unsigned int last_coeff_abs_level_remaining,
++    const unsigned int c_rice_param)
++{
++    int t = last_coeff_abs_level_remaining << 1;
++    __asm__ (
++    "lsrs  %[t], %[t], %[shift]             \n\t"
++
++    "it    eq                               \n\t"
++    "subeq %[stat], %[stat], #1             \n\t"
++    "cmp   %[t], #6                         \n\t"
++    "adc   %[stat], %[stat], #0             \n\t"
++    "usat  %[stat], #8, %[stat]             \n\t"
++    : [stat]"+r"(*stat_coeff),
++         [t]"+r"(t)
++    :  [shift]"r"(c_rice_param)
++    : "cc"
++    );
++}
++
++// ---------------------------------------------------------------------------
++//
++// CABAC get loops
++//
++// Where the loop is simple enough we can normally do 10-30% better than the
++// compiler
++
++// Get the residual greater than 1 bits
++
++#define get_cabac_greater1_bits get_cabac_greater1_bits_arm
++static inline unsigned int get_cabac_greater1_bits_arm(CABACContext * const c, const unsigned int n,
++    uint8_t * const state0)
++{
++    unsigned int i, reg_b, st, tmp, bit, rv;
++     __asm__ (
++         "mov        %[i]          , #0                          \n\t"
++         "mov        %[rv]         , #0                          \n\t"
++         "1:                                                     \n\t"
++         "add        %[i]          , %[i]        , #1            \n\t"
++         "cmp        %[rv]         , #0                          \n\t"
++         "ite        eq                                          \n\t"
++         "usateq     %[st]         , #2          , %[i]          \n\t"
++         "movne      %[st]         , #0                          \n\t"
++         "sub        %[r_b]        , %[mlps_tables], %[lps_off]  \n\t"
++         "and        %[tmp]        , %[range]    , #0xC0         \n\t"
++
++         "ldrb       %[bit]        , [%[state0], %[st]]          \n\t"
++         "add        %[r_b]        , %[r_b]      , %[bit]        \n\t"
++         "ldrb       %[tmp]        , [%[r_b], %[tmp], lsl #1]    \n\t"
++         "sub        %[range]      , %[range]    , %[tmp]        \n\t"
++
++         "cmp        %[low]        , %[range], lsl #17           \n\t"
++         "ittt       ge                                          \n\t"
++         "subge      %[low]        , %[low]      , %[range], lsl #17 \n\t"
++         "movge      %[range]      , %[tmp]                      \n\t"
++         "mvnge      %[bit]        , %[bit]                      \n\t"
++
++         "clz        %[tmp]        , %[range]                    \n\t"
++         "sub        %[tmp]        , #23                         \n\t"
++         "ldrb       %[r_b]        , [%[mlps_tables], %[bit]]    \n\t"
++         "and        %[bit]        , %[bit]      , #1            \n\t"
++         "strb       %[r_b]        , [%[state0], %[st]]          \n\t"
++         "lsl        %[low]        , %[low]      , %[tmp]        \n\t"
++         "orr        %[rv]         , %[bit]      , %[rv], lsl #1 \n\t"
++         "lsl        %[range]      , %[range]    , %[tmp]        \n\t"
++
++// There is a small speed gain from combining both conditions, using a single
++// branch and then working out what that meant later
++         "lsls       %[tmp]        , %[low]      , #16           \n\t"
++         "it         ne                                          \n\t"
++         "cmpne      %[n]          , %[i]                        \n\t"
++         "bne        1b                                          \n\t"
++
++// If reload is not required then we must have run out of flags to decode
++         "tst        %[tmp]        , %[tmp]                      \n\t"
++         "bne        2f                                          \n\t"
++
++// Do reload
++         "ldrh       %[tmp]        , [%[bptr]]   , #2            \n\t"
++         "rbit       %[bit]        , %[low]                      \n\t"
++         "movw       %[r_b]        , #0xFFFF                     \n\t"
++         "clz        %[bit]        , %[bit]                      \n\t"
++         "rev        %[tmp]        , %[tmp]                      \n\t"
++         "sub        %[bit]        , %[bit]      , #16           \n\t"
++         "cmp        %[n]          , %[i]                        \n\t"
++         "rsb        %[tmp]        , %[r_b]      , %[tmp], lsr #15 \n\t"
++
++#if CONFIG_THUMB
++         "lsl        %[tmp]        , %[tmp]      , %[bit]        \n\t"
++         "add        %[low]        , %[low]      , %[tmp]        \n\t"
++#else
++         "add        %[low]        , %[low]      , %[tmp], lsl %[bit] \n\t"
++#endif
++
++         "bne        1b                                          \n\t"
++         "2:                                                     \n\t"
++         :    [bit]"=&r"(bit),
++              [low]"+r"(c->low),
++            [range]"+r"(c->range),
++              [r_b]"=&r"(reg_b),
++             [bptr]"+r"(c->bytestream),
++                [i]"=&r"(i),
++              [tmp]"=&r"(tmp),
++               [st]"=&r"(st),
++               [rv]"=&r"(rv)
++          :  [state0]"r"(state0),
++                  [n]"r"(n),
++        [mlps_tables]"r"(ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128),
++            [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET)
++         : "memory", "cc"
++    );
++    return rv;
++}
++
++
++// n must be > 0 on entry
++#define get_cabac_sig_coeff_flag_idxs get_cabac_sig_coeff_flag_idxs_arm
++static inline uint8_t * get_cabac_sig_coeff_flag_idxs_arm(CABACContext * const c, uint8_t * const state0,
++    unsigned int n,
++    const uint8_t * ctx_map,
++    uint8_t * p)
++{
++    unsigned int reg_b, tmp, st, bit;
++     __asm__ (
++// Get bin from map
++#if CONFIG_THUMB
++         "add        %[ctx_map]    , %[n]                        \n\t"
++         "ldrb       %[st]         , [%[ctx_map]]                \n\t"
++#else
++         "ldrb       %[st]         , [%[ctx_map], %[n]]!         \n\t"
++#endif
++         "1:                                                     \n\t"
++
++// Load state & ranges
++         "ldrb       %[bit]        , [%[state0], %[st]]          \n\t"
++         "and        %[tmp]        , %[range]    , #0xC0         \n\t"
++         "sub        %[r_b]        , %[mlps_tables], %[lps_off]  \n\t"
++         "add        %[r_b]        , %[r_b]      , %[tmp], lsl #1 \n\t"
++         "ldrb       %[tmp]        , [%[r_b], %[bit]]            \n\t"
++         "sub        %[range]      , %[range]    , %[tmp]        \n\t"
++
++         "cmp        %[low]        , %[range], lsl #17           \n\t"
++         "ittt       ge                                          \n\t"
++         "mvnge      %[bit]        , %[bit]                      \n\t"
++         "subge      %[low]        , %[low]      , %[range], lsl #17 \n\t"
++         "movge      %[range]      , %[tmp]                      \n\t"
++
++// Renorm
++         "clz        %[tmp]        , %[range]                    \n\t"
++         "ldrb       %[r_b]        , [%[mlps_tables], %[bit]]    \n\t"
++         "sub        %[tmp]        , #23                         \n\t"
++         "strb       %[r_b]        , [%[state0], %[st]]          \n\t"
++         "tst        %[bit]        , #1                          \n\t"
++         "ldrb       %[st]         , [%[ctx_map], #-1]!          \n\t"
++         "lsl        %[low]        , %[low]      , %[tmp]        \n\t"
++// GCC asm seems to need strbne written differently for thumb and arm
++#if CONFIG_THUMB
++         "it         ne                                          \n\t"
++         "strbne     %[n]          , [%[idx]]    , #1            \n\t"
++#else
++         "strneb     %[n]          , [%[idx]]    , #1            \n\t"
++#endif
++
++// There is a small speed gain from combining both conditions, using a single
++// branch and then working out what that meant later
++         "subs       %[n]          , %[n]        , #1            \n\t"
++         "lsl        %[range]      , %[range]    , %[tmp]        \n\t"
++#if CONFIG_THUMB
++         "itt        ne                                          \n\t"
++         "lslsne     %[tmp]        , %[low]      , #16           \n\t"
++#else
++         "lslnes     %[tmp]        , %[low]      , #16           \n\t"
++#endif
++         "bne        1b                                          \n\t"
++
++// If we have bits left then n must be 0 so give up now
++         "lsls       %[tmp]        , %[low]      , #16           \n\t"
++         "bne        2f                                          \n\t"
++
++// Do reload
++         "ldrh       %[tmp]        , [%[bptr]]   , #2            \n\t"
++         "rbit       %[bit]        , %[low]                      \n\t"
++         "movw       %[r_b]        , #0xFFFF                     \n\t"
++         "clz        %[bit]        , %[bit]                      \n\t"
++         "cmp        %[n]          , #0                          \n\t"
++         "rev        %[tmp]        , %[tmp]                      \n\t"
++         "sub        %[bit]        , %[bit]      , #16           \n\t"
++         "rsb        %[tmp]        , %[r_b]      , %[tmp], lsr #15 \n\t"
++
++#if CONFIG_THUMB
++         "lsl        %[tmp]        , %[tmp]      , %[bit]        \n\t"
++         "add        %[low]        , %[low]      , %[tmp]        \n\t"
++#else
++         "add        %[low]        , %[low]      , %[tmp], lsl %[bit] \n\t"
++#endif
++
++// Check to see if we still have more to do
++         "bne        1b                                          \n\t"
++         "2:                                                     \n\t"
++         :    [bit]"=&r"(bit),
++              [low]"+r"(c->low),
++            [range]"+r"(c->range),
++              [r_b]"=&r"(reg_b),
++             [bptr]"+r"(c->bytestream),
++              [idx]"+r"(p),
++                [n]"+r"(n),
++              [tmp]"=&r"(tmp),
++               [st]"=&r"(st),
++          [ctx_map]"+r"(ctx_map)
++          :  [state0]"r"(state0),
++        [mlps_tables]"r"(ff_h264_cabac_tables + H264_MLPS_STATE_OFFSET + 128),
++            [lps_off]"I"((H264_MLPS_STATE_OFFSET + 128) - H264_LPS_RANGE_OFFSET)
++         : "memory", "cc"
++    );
++
++    return p;
++}
++
++// ---------------------------------------------------------------------------
++//
++// CABAC_BY22 functions
++
++
++#define get_cabac_by22_start get_cabac_by22_start_arm
++static inline void get_cabac_by22_start_arm(CABACContext * const c)
++{
++    const uint8_t *ptr = c->bytestream;
++    register uint32_t low __asm__("r1"), range __asm__("r2");
++    uint32_t m, range8, bits;
++#if !USE_BY22_DIV
++    uintptr_t inv;
++#endif
++
++    av_assert2(offsetof (CABACContext, low) == 0);
++    av_assert2(offsetof (CABACContext, range) == 4);
++    av_assert2(offsetof (CABACContext, by22.range) == offsetof (CABACContext, by22.bits) + 2);
++    __asm__ volatile (
++        "ldmia   %[c], {%[low], %[range]}                         \n\t"
++        : // Outputs
++               [low]"=r"(low),
++             [range]"=r"(range)
++        : // Inputs
++                 [c]"r"(c)
++        : // Clobbers
++    );
++#if !USE_BY22_DIV
++    inv = (uintptr_t)cabac_by22_inv_range;
++#endif
++    __asm__ volatile (
++        "ldr     %[m], [%[ptr]], #-("AV_STRINGIFY(CABAC_BITS)"/8) \n\t"
++#if !USE_BY22_DIV
++        "uxtb    %[range8], %[range]                              \n\t"
++#endif
++        "rbit    %[bits], %[low]                                  \n\t"
++        "lsl     %[low], %[low], #22 - "AV_STRINGIFY(CABAC_BITS)" \n\t"
++        "clz     %[bits], %[bits]                                 \n\t"
++        "str     %[ptr], [%[c], %[ptr_off]]                       \n\t"
++        "rev     %[m], %[m]                                       \n\t"
++        "rsb     %[ptr], %[bits], #9 + "AV_STRINGIFY(CABAC_BITS)" \n\t"
++        "eor     %[m], %[m], #0x80000000                          \n\t"
++#if !USE_BY22_DIV
++        "ldr     %[inv], [%[inv], %[range8], lsl #2]              \n\t"
++        "pkhbt   %[range], %[bits], %[range], lsl #16             \n\t"
++        "str     %[range], [%[c], %[bits_off]]                    \n\t"
++#else
++        "strh    %[bits], [%[c], %[bits_off]]                     \n\t"
++#endif
++#if CONFIG_THUMB
++        "lsr     %[m], %[ptr]                                     \n\t"
++        "eor     %[range], %[low], %[m]                           \n\t"
++#else
++        "eor     %[range], %[low], %[m], lsr %[ptr]               \n\t"
++#endif
++        : // Outputs
++               [ptr]"+&r"(ptr),
++               [low]"+&r"(low),
++             [range]"+&r"(range),
++#if !USE_BY22_DIV
++               [inv]"+&r"(inv),
++#endif
++                 [m]"=&r"(m),
++            [range8]"=&r"(range8),
++              [bits]"=&r"(bits)
++        : // Inputs
++                   [c]"r"(c),
++            [bits_off]"J"(offsetof (CABACContext, by22.bits)),
++             [ptr_off]"J"(offsetof (CABACContext, bytestream))
++        : // Clobbers
++            "memory"
++    );
++    c->low = range;
++#if !USE_BY22_DIV
++    c->range = inv;
++#endif
++}
++
++#define get_cabac_by22_peek get_cabac_by22_peek_arm
++static inline uint32_t get_cabac_by22_peek_arm(const CABACContext *const c)
++{
++    uint32_t rv = c->low &~ 1, tmp;
++    __asm__ (
++        "cmp      %[inv] , #0                    \n\t"
++        "it       ne                             \n\t"
++        "umullne  %[tmp] , %[rv] , %[inv], %[rv] \n\t"
++        :  // Outputs
++             [rv]"+r"(rv),
++             [tmp]"=r"(tmp)
++        :  // Inputs
++             [inv]"r"(c->range)
++        :  // Clobbers
++                "cc"
++    );
++    return rv << 1;
++}
++
++#define get_cabac_by22_flush get_cabac_by22_flush_arm
++static inline void get_cabac_by22_flush_arm(CABACContext *const c, const unsigned int n, uint32_t val)
++{
++    uint32_t bits, ptr, tmp1, tmp2;
++    __asm__ volatile (
++        "ldrh    %[bits], [%[cc], %[bits_off]]     \n\t"
++        "ldr     %[ptr], [%[cc], %[ptr_off]]       \n\t"
++        "rsb     %[tmp1], %[n], #32                \n\t"
++        "add     %[bits], %[bits], %[n]            \n\t"
++        "ldrh    %[tmp2], [%[cc], %[range_off]]    \n\t"
++        "lsr     %[tmp1], %[val], %[tmp1]          \n\t"
++        "ldr     %[val], [%[cc], %[low_off]]       \n\t"
++#if CONFIG_THUMB
++        "add     %[ptr], %[ptr], %[bits], lsr #3   \n\t"
++        "ldr     %[ptr], [%[ptr]]                  \n\t"
++#else
++        "ldr     %[ptr], [%[ptr], %[bits], lsr #3] \n\t"
++#endif
++        "mul     %[tmp1], %[tmp2], %[tmp1]         \n\t"
++        "and     %[tmp2], %[bits], #7              \n\t"
++        "strh    %[bits], [%[cc], %[bits_off]]     \n\t"
++        "rev     %[ptr], %[ptr]                    \n\t"
++        "lsl     %[tmp1], %[tmp1], #23             \n\t"
++#if CONFIG_THUMB
++        "lsl     %[val], %[n]                      \n\t"
++        "sub     %[val], %[tmp1]                   \n\t"
++#else
++        "rsb     %[val], %[tmp1], %[val], lsl %[n] \n\t"
++#endif
++        "lsl     %[ptr], %[ptr], %[tmp2]           \n\t"
++        "orr     %[val], %[val], %[ptr], lsr #9    \n\t"
++        "str     %[val], [%[cc], %[low_off]]       \n\t"
++        :  // Outputs
++            [val]"+r"(val),
++           [bits]"=&r"(bits),
++            [ptr]"=&r"(ptr),
++           [tmp1]"=&r"(tmp1),
++           [tmp2]"=&r"(tmp2)
++        :  // Inputs
++                  [cc]"r"(c),
++                   [n]"r"(n),
++            [bits_off]"J"(offsetof(CABACContext, by22.bits)),
++             [ptr_off]"J"(offsetof(CABACContext, bytestream)),
++           [range_off]"J"(offsetof(CABACContext, by22.range)),
++             [low_off]"J"(offsetof(CABACContext, low))
++        :  // Clobbers
++           "memory"
++    );
++}
++
++#define coeff_abs_level_remaining_decode_bypass coeff_abs_level_remaining_decode_bypass_arm
++static inline int coeff_abs_level_remaining_decode_bypass_arm(CABACContext *const c, unsigned int rice_param)
++{
++    uint32_t last_coeff_abs_level_remaining;
++    uint32_t prefix, n1, range, n2, ptr, tmp1, tmp2;
++    __asm__ volatile (
++        "ldr     %[remain], [%[cc], %[low_off]]               \n\t"
++        "ldr     %[prefix], [%[cc], %[range_off]]             \n\t"
++        "bic     %[remain], %[remain], #1                     \n\t"
++        "ldrh    %[tmp2], [%[cc], %[by22_bits_off]]           \n\t"
++        "ldr     %[ptr], [%[cc], %[ptr_off]]                  \n\t"
++        "cmp     %[prefix], #0                                \n\t"
++        "it      ne                                           \n\t"
++        "umullne %[prefix], %[remain], %[prefix], %[remain]   \n\t"
++        "ldrh    %[range], [%[cc], %[by22_range_off]]         \n\t"
++        "lsl     %[remain], %[remain], #1                     \n\t"
++        "mvn     %[prefix], %[remain]                         \n\t"
++        "clz     %[prefix], %[prefix]                         \n\t"
++        "rsbs    %[n1], %[prefix], #2                         \n\t"
++        "bcc     1f                                           \n\t"
++        "adc     %[n1], %[rice], %[prefix]                    \n\t"
++        "add     %[tmp2], %[tmp2], %[n1]                      \n\t"
++        "rsb     %[n2], %[n1], #32                            \n\t"
++        "and     %[tmp1], %[tmp2], #7                         \n\t"
++        "strh    %[tmp2], [%[cc], %[by22_bits_off]]           \n\t"
++        "lsr     %[tmp2], %[tmp2], #3                         \n\t"
++        "lsr     %[n2], %[remain], %[n2]                      \n\t"
++        "mul     %[n2], %[range], %[n2]                       \n\t"
++        "ldr     %[range], [%[cc], %[low_off]]                \n\t"
++        "ldr     %[ptr], [%[ptr], %[tmp2]]                    \n\t"
++        "rsb     %[tmp2], %[rice], #31                        \n\t"
++        "lsl     %[remain], %[remain], %[prefix]              \n\t"
++        "lsl     %[n2], %[n2], #23                            \n\t"
++#if CONFIG_THUMB
++        "lsl     %[range], %[n1]                              \n\t"
++        "sub     %[range], %[n2]                              \n\t"
++#else
++        "rsb     %[range], %[n2], %[range], lsl %[n1]         \n\t"
++#endif
++        "rev     %[ptr], %[ptr]                               \n\t"
++        "lsl     %[n2], %[prefix], %[rice]                    \n\t"
++#if CONFIG_THUMB
++        "lsr     %[remain], %[tmp2]                           \n\t"
++        "add     %[remain], %[n2]                             \n\t"
++#else
++        "add     %[remain], %[n2], %[remain], lsr %[tmp2]     \n\t"
++#endif
++        "b       3f                                           \n\t"
++        "1:                                                   \n\t"
++        "add     %[n2], %[rice], %[prefix], lsl #1            \n\t"
++        "cmp     %[n2], %[peek_bits_plus_2]                   \n\t"
++        "bhi     2f                                           \n\t"
++        "sub     %[n1], %[n2], #2                             \n\t"
++        "add     %[tmp2], %[tmp2], %[n1]                      \n\t"
++        "rsb     %[n2], %[n1], #32                            \n\t"
++        "strh    %[tmp2], [%[cc], %[by22_bits_off]]           \n\t"
++        "lsr     %[tmp1], %[tmp2], #3                         \n\t"
++        "lsr     %[n2], %[remain], %[n2]                      \n\t"
++        "mul     %[n2], %[range], %[n2]                       \n\t"
++        "rsb     %[range], %[rice], #34                       \n\t"
++        "ldr     %[ptr], [%[ptr], %[tmp1]]                    \n\t"
++        "and     %[tmp1], %[tmp2], #7                         \n\t"
++        "lsl     %[remain], %[remain], %[prefix]              \n\t"
++        "ldr     %[tmp2], [%[cc], %[low_off]]                 \n\t"
++        "rsb     %[prefix], %[prefix], %[range]               \n\t"
++        "orr     %[remain], %[remain], #0x80000000            \n\t"
++        "rev     %[ptr], %[ptr]                               \n\t"
++        "lsl     %[n2], %[n2], #23                            \n\t"
++        "mov     %[range], #2                                 \n\t"
++#if CONFIG_THUMB
++        "lsl     %[tmp2], %[n1]                               \n\t"
++        "sub     %[tmp2], %[n2]                               \n\t"
++#else
++        "rsb     %[tmp2], %[n2], %[tmp2], lsl %[n1]           \n\t"
++#endif
++        "lsl     %[ptr], %[ptr], %[tmp1]                      \n\t"
++        "lsl     %[rice], %[range], %[rice]                   \n\t"
++        "orr     %[range], %[tmp2], %[ptr], lsr #9            \n\t"
++#if CONFIG_THUMB
++        "lsr     %[remain], %[prefix]                         \n\t"
++        "add     %[remain], %[rice]                           \n\t"
++#else
++        "add     %[remain], %[rice], %[remain], lsr %[prefix] \n\t"
++#endif
++        "b       4f                                           \n\t"
++        "2:                                                   \n\t"
++        "add     %[n1], %[tmp2], %[prefix]                    \n\t"
++#if CONFIG_THUMB
++        "add     %[tmp2], %[ptr], %[n1], lsr #3               \n\t"
++        "ldr     %[tmp2], [%[tmp2]]                           \n\t"
++#else
++        "ldr     %[tmp2], [%[ptr], %[n1], lsr #3]             \n\t"
++#endif
++        "rsb     %[tmp1], %[prefix], #32                      \n\t"
++        "push    {%[rice]}                                    \n\t"
++        "and     %[rice], %[n1], #7                           \n\t"
++        "lsr     %[tmp1], %[remain], %[tmp1]                  \n\t"
++        "ldr     %[ptr], [%[cc], %[low_off]]                  \n\t"
++        "mul     %[remain], %[range], %[tmp1]                 \n\t"
++        "rev     %[tmp2], %[tmp2]                             \n\t"
++        "rsb     %[n2], %[prefix], %[n2]                      \n\t"
++        "ldr     %[tmp1], [%[cc], %[range_off]]               \n\t"
++        "lsl     %[rice], %[tmp2], %[rice]                    \n\t"
++        "sub     %[tmp2], %[n2], #2                           \n\t"
++        "lsl     %[remain], %[remain], #23                    \n\t"
++#if CONFIG_THUMB
++        "lsl     %[ptr], %[prefix]                            \n\t"
++        "rsb     %[remain], %[ptr]                            \n\t"
++#else
++        "rsb     %[remain], %[remain], %[ptr], lsl %[prefix]  \n\t"
++#endif
++        "orr     %[remain], %[remain], %[rice], lsr #9        \n\t"
++        "add     %[prefix], %[n1], %[tmp2]                    \n\t"
++        "bic     %[n1], %[remain], #1                         \n\t"
++        "ldr     %[ptr], [%[cc], %[ptr_off]]                  \n\t"
++        "cmp     %[tmp1], #0                                  \n\t"
++        "rsb     %[rice], %[tmp2], #32                        \n\t"
++        "it      ne                                           \n\t"
++        "umullne %[tmp1], %[n1], %[tmp1], %[n1]               \n\t"
++        "and     %[tmp1], %[prefix], #7                       \n\t"
++#if CONFIG_THUMB
++        "add     %[ptr], %[ptr], %[prefix], lsr #3            \n\t"
++        "ldr     %[ptr], [%[ptr]]                             \n\t"
++#else
++        "ldr     %[ptr], [%[ptr], %[prefix], lsr #3]          \n\t"
++#endif
++        "lsl     %[n1], %[n1], #1                             \n\t"
++        "lsr     %[rice], %[n1], %[rice]                      \n\t"
++        "rsb     %[n2], %[n2], #34                            \n\t"
++        "mul     %[range], %[range], %[rice]                  \n\t"
++        "pop     {%[rice]}                                    \n\t"
++        "rev     %[ptr], %[ptr]                               \n\t"
++        "orr     %[n1], %[n1], #0x80000000                    \n\t"
++        "strh    %[prefix], [%[cc], %[by22_bits_off]]         \n\t"
++        "mov     %[prefix], #2                                \n\t"
++        "lsl     %[range], %[range], #23                      \n\t"
++#if CONFIG_THUMB
++        "lsl     %[remain], %[tmp2]                           \n\t"
++        "rsb     %[range], %[remain]                          \n\t"
++#else
++        "rsb     %[range], %[range], %[remain], lsl %[tmp2]   \n\t"
++#endif
++        "lsl     %[remain], %[prefix], %[rice]                \n\t"
++#if CONFIG_THUMB
++        "lsr     %[n1], %[n2]                                 \n\t"
++        "add     %[remain], %[n1]                             \n\t"
++#else
++        "add     %[remain], %[remain], %[n1], lsr %[n2]       \n\t"
++#endif
++        "3:                                                   \n\t"
++        "lsl     %[ptr], %[ptr], %[tmp1]                      \n\t"
++        "orr     %[range], %[range], %[ptr], lsr #9           \n\t"
++        "4:                                                   \n\t"
++        "str     %[range], [%[cc], %[low_off]]                \n\t"
++        :  // Outputs
++            [remain]"=&r"(last_coeff_abs_level_remaining),
++              [rice]"+r"(rice_param),
++            [prefix]"=&r"(prefix),
++                [n1]"=&r"(n1),
++             [range]"=&r"(range),
++                [n2]"=&r"(n2),
++               [ptr]"=&r"(ptr),
++              [tmp1]"=&r"(tmp1),
++              [tmp2]"=&r"(tmp2)
++        :  // Inputs
++                          [cc]"r"(c),
++            [peek_bits_plus_2]"I"(CABAC_BY22_PEEK_BITS + 2),
++                     [low_off]"J"(offsetof(CABACContext, low)),
++                   [range_off]"J"(offsetof(CABACContext, range)),
++               [by22_bits_off]"J"(offsetof(CABACContext, by22.bits)),
++              [by22_range_off]"J"(offsetof(CABACContext, by22.range)),
++                     [ptr_off]"J"(offsetof(CABACContext, bytestream))
++        :  // Clobbers
++           "cc", "memory"
++    );
++    return last_coeff_abs_level_remaining;
++}
++
++#endif /* HAVE_ARMV6T2_INLINE */
++
++#endif /* AVCODEC_ARM_HEVC_CABAC_H */
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevc_idct_fn_neon.S
+@@ -0,0 +1,183 @@
++/*
++ * ARM NEON optimised IDCT functions for HEVC decoding
++ * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
++ * Copyright (C) 2018 John Cox, ben Avison for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++@ Included multiple times from hevc_idct_neon.S
++@ Macros defined there
++
++#define DC_SHIFT  (15 - BIT_DEPTH)
++#define DC_ADD    (1 | (1 << (14 - BIT_DEPTH)))
++#define TRN_SHIFT (20 - BIT_DEPTH)
++
++function JOIN(ff_hevc_rpi_idct_4x4_dc_neon_, BIT_DEPTH), export=1
++        ldrsh       r1, [r0]
++        add         r1, #DC_ADD
++        asr         r1, #DC_SHIFT
++        vdup.16     q0, r1
++        vdup.16     q1, r1
++        vst1.16     {q0, q1}, [r0]
++        bx lr
++endfunc
++
++function JOIN(ff_hevc_rpi_idct_8x8_dc_neon_, BIT_DEPTH), export=1
++        ldrsh       r1, [r0]
++        add         r2, r0, #32
++        mov         r3, #64
++        add         r1, #DC_ADD
++        asr         r1, #DC_SHIFT
++        vdup.16     q8, r1
++        vdup.16     q9, r1
++        vst1.16     {q8, q9}, [r0], r3
++        vst1.16     {q8, q9}, [r2], r3
++        vst1.16     {q8, q9}, [r0]
++        vst1.16     {q8, q9}, [r2]
++        bx lr
++endfunc
++
++function JOIN(ff_hevc_rpi_idct_16x16_dc_neon_, BIT_DEPTH), export=1
++        ldrsh       r1, [r0]
++        add         r2, r0, #32
++        mov         r3, #64
++        add         r1, #DC_ADD
++        mov         ip, #16*16
++        asr         r1, #DC_SHIFT
++        vdup.16     q8, r1
++        vdup.16     q9, r1
++1:      vst1.16     {q8, q9}, [r0], r3
++        subs        ip, ip, #32
++        vst1.16     {q8, q9}, [r2], r3
++        bhi         1b
++        bx lr
++endfunc
++
++function JOIN(ff_hevc_rpi_idct_32x32_dc_neon_, BIT_DEPTH), export=1
++        ldrsh       r1, [r0]
++        add         r2, r0, #32
++        mov         r3, #64
++        add         r1, #DC_ADD
++        mov         ip, #32*32
++        asr         r1, #DC_SHIFT
++        vdup.16     q8, r1
++        vdup.16     q9, r1
++1:      vst1.16     {q8, q9}, [r0], r3
++        subs        ip, ip, #32
++        vst1.16     {q8, q9}, [r2], r3
++        bhi         1b
++        bx lr
++endfunc
++
++
++function JOIN(ff_hevc_rpi_transform_4x4_neon_, BIT_DEPTH), export=1
++        vldr.i32    s0, =0x00240053 // 36 and 83
++        vld1.16     {q14, q15}, [r0 :256]  // coeffs
++
++        tr4_shift   #7
++
++        vzip.16     d28, d29
++        vzip.16     d30, d31
++        vzip.32     q14, q15
++
++        tr4_shift   #TRN_SHIFT
++
++        vst4.16     {q14, q15}, [r0 :256]
++        bx lr
++
++        .ltorg
++endfunc
++
++
++
++function JOIN(ff_hevc_rpi_transform_luma_4x4_neon_, BIT_DEPTH), export=1
++        vmov.i32    d0, #0x4a  // 74
++        vld1.16     {q14, q15}, [r0 :256]  // coeffs
++        vmov.i32    d1, #0x1d  // 29
++        vmov.i32    d2, #0x37  // 55
++
++        tr4_luma_shift #7
++
++        vzip.16     d28, d29
++        vzip.16     d30, d31
++        vzip.32     q14, q15
++
++        tr4_luma_shift #TRN_SHIFT
++
++        vst4.16     {q14, q15}, [r0 :256]
++        bx lr
++endfunc
++
++function JOIN(ff_hevc_rpi_transform_8x8_neon_, BIT_DEPTH), export=1
++        add      r2, r0, #16
++        adr      r3, tr4f
++        vpush    {d8-d15}
++        vld1.16  {d0, d1}, [r3]
++        mov      r3, #32
++
++        tr8_vert  d16, d17, d18, d19, d24, d25, d26, d27, q8,  q9,  \
++            "sub      r0, r0, #128-8",                              \
++            "sub      r2, r2, #128-8",                              \
++            "cmp      r1, #4"
++        ble      2f
++
++        tr8_vert  d20, d21, d22, d23, d28, d29, d30, d31, q10, q11, \
++            "sub      r0, r0, #128+8",                              \
++            "sub      r2, r2, #128+8+16-32",                        \
++            "mov      r3, #64"
++
++        vzip.16  d16, d17
++        vzip.16  d18, d19
++
++        vzip.16  d20, d21
++        vzip.16  d22, d23
++        vzip.16  d28, d29
++        vzip.16  d30, d31
++        vzip.32  q10, q11
++        vzip.32  q14, q15
++1:
++        vzip.16  d24, d25
++        vzip.16  d26, d27
++        vzip.32  q8, q9
++        vzip.32  q12, q13
++
++        tr8_horiz d16, d17, d18, d19, d20, d21, d22, d23, q8,  q9,  TRN_SHIFT
++        tr8_horiz d24, d25, d26, d27, d28, d29, d30, d31, q12, q13, TRN_SHIFT
++
++        vpop     {d8-d15}
++        bx       lr
++
++2:      vmov.i64 q10, #0
++        sub      r0, r0, #8
++        vmov.i64 q11, #0
++        sub      r2, r2, #8+16-32
++        vmov.i64 q14, #0
++        mov      r3, #64
++        vmov.i64 q15, #0
++
++        vzip.16  d16, d17
++        vzip.16  d18, d19
++
++        b        1b
++
++endfunc
++
++#undef DC_SHIFT
++#undef DC_ADD
++#undef TRN_SHIFT
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevc_misc_neon.S
+@@ -0,0 +1,267 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Written by John Cox, Ben Avison
++*/
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++@ rpi_zap_coeff_vals_neon(
++@   uint16_t * buf,          [r0]
++@   unsigned int log_n_m2)   [r1]
++
++function rpi_zap_coeff_vals_neon, export=1
++        mov      ip, #1
++        vmov.i64 q0, #0
++        teq      r1, #0
++        vmov.i64 q1, #0
++        beq      2f
++
++        lsl      ip, r1    @ 2, 4 or 8
++        add      r2, r0, #32
++        lsl      ip, r1    @ 4, 16 or 64 = number of 32-byte blocks to zero
++        mov      r3, #64
++1:      vst1.8   {q0,q1}, [r0:256], r3
++        subs     ip, #2
++        vst1.8   {q0,q1}, [r2:256], r3
++        bne      1b
++        bx       lr
++
++2:      vst1.8   {q0,q1}, [r0:256]
++        bx       lr
++endfunc
++
++@ PIC jump tables are more expensive than absolute for A32 code
++.set jent_pic, CONFIG_PIC || CONFIG_THUMB
++
++@ Jump table entry - if in neon mode the bottom bit must be set
++@ ? There is probably a real asm instruction to do this but I haven't found it
++.macro jent lab
++.if jent_pic
++T       .short ((0 + \lab) - (0 + 98b)) / 2
++A       .short (0 + \lab) - (4 + 98b)
++.else
++T       .word   1 + \lab
++A       .word   \lab
++.endif
++.endm
++
++.set expected_next, 0
++
++.macro cpy_compound val, p1, p2, drop_thru=0
++.if \p1 + \p2 != \val
++.error "Bad addition!  \p1 + \p2 != \val"
++.endif
++.if expected_next != 0 && expected_next != \val
++.error "Drop thru failure"
++.endif
++\val\():
++        push       {r0-r3}
++        bl          100\p1\()b
++        pop        {r0-r3}
++        add         r0, #\p1
++        add         r2, #\p1
++.if \drop_thru == 0
++        b           \p2\()b
++.set expected_next, 0
++.else
++.set expected_next, \p2
++.endif
++.endm
++
++@ ff_hevc_cpy_blks8x4_neon(
++@   dst         [r0]
++@   dst_stride  [r1]
++@   src         [r2]
++@   src_stride  [r3]
++@   width       [sp, #0] (bytes)
++@   height)     [sp, #4]
++@
++@ Power of 2 widths are directly coded, all others are done in stripes
++@ We expect the vast majority of calls to be power of 2
++@
++@ Currently has min width of 8, but we could make that 4 without issue
++@ Min height is 4
++
++function ff_hevc_rpi_cpy_blks8x4_neon, export=1
++        ldr         r12, [sp, #0]
++        push       {r11, lr}
++.if jent_pic
++A       adr         lr,  98f - 2
++.else
++A       adr         lr,  98f - 4
++.endif
++        lsr         r12, #3
++        ldr         r11, [sp, #(8 + 4)]
++.if jent_pic
++A       lsl         r12, #1
++A       ldrsh       lr,  [lr,  r12]
++A       add         pc,  lr
++T       tbh         [pc, r12, lsl #1]
++.else
++        @ A32 only, Thumb is always PIC
++        ldr         pc,  [lr,  r12, lsl #2]
++.endif
++
++98:
++T       .short      0 @ unused
++        jent        8f
++        jent        16f
++        jent        24f
++        jent        32f
++        jent        40f
++        jent        48f
++        jent        56f
++        jent        64f
++        jent        72f
++        jent        80f
++        jent        88f
++        jent        96f
++        jent        104f
++        jent        112f
++        jent        120f
++        jent        128f
++
++1008:
++        push       {r11, lr}
++8:
++        add         lr,  r2,  r3
++        lsl         r3,  #1
++        add         r12, r0,  r1
++        lsl         r1,  #1
++1:
++        vld1.32    {d0 }, [r2],  r3
++        vld1.32    {d1 }, [lr],  r3
++        vld1.32    {d2 }, [r2],  r3
++        vld1.32    {d3 }, [lr],  r3
++        subs        r11,  #4
++        vst1.32    {d0 }, [r0],  r1
++        vst1.32    {d1 }, [r12], r1
++        vst1.32    {d2 }, [r0],  r1
++        vst1.32    {d3 }, [r12], r1
++        bgt         1b
++        pop        {r11, pc}
++
++10016:
++        push       {r11, lr}
++16:
++        add         lr,  r2,  r3
++        lsl         r3,  #1
++        add         r12, r0,  r1
++        lsl         r1,  #1
++1:
++        vld1.32    {q0 }, [r2],  r3
++        vld1.32    {q1 }, [lr],  r3
++        vld1.32    {q2 }, [r2],  r3
++        vld1.32    {q3 }, [lr],  r3
++        subs        r11, #4
++        vst1.32    {q0 }, [r0],  r1
++        vst1.32    {q1 }, [r12], r1
++        vst1.32    {q2 }, [r0],  r1
++        vst1.32    {q3 }, [r12], r1
++        bgt         1b
++        pop        {r11, pc}
++
++10032:
++        push       {r11, lr}
++32:
++        add         lr,  r2,  r3
++        lsl         r3,  #1
++        add         r12, r0,  r1
++        lsl         r1,  #1
++1:
++        vld1.32    {q8,  q9 }, [r2],  r3
++        vld1.32    {q10, q11}, [lr],  r3
++        vld1.32    {q12, q13}, [r2],  r3
++        vld1.32    {q14, q15}, [lr],  r3
++        subs        r11, #4
++        vst1.32    {q8,  q9 }, [r0],  r1
++        vst1.32    {q10, q11}, [r12], r1
++        vst1.32    {q12, q13}, [r0],  r1
++        vst1.32    {q14, q15}, [r12], r1
++        bgt         1b
++        pop        {r11, pc}
++
++10064:
++        push       {r11, lr}
++64:
++        add         lr,  r2,  #32
++        add         r12, r0,  #32
++1:
++        vld1.32    {q8,  q9 }, [r2],  r3
++        vld1.32    {q10, q11}, [lr],  r3
++        vld1.32    {q12, q13}, [r2],  r3
++        vld1.32    {q14, q15}, [lr],  r3
++        subs        r11, #2
++        vst1.32    {q8,  q9 }, [r0],  r1
++        vst1.32    {q10, q11}, [r12], r1
++        vst1.32    {q12, q13}, [r0],  r1
++        vst1.32    {q14, q15}, [r12], r1
++        bgt         1b
++        pop        {r11, pc}
++
++128:
++        push       {r4, r5}
++        @ We could do this with fewer registers if we jump around but I
++        @ have a primative urge to load sequentially
++        mov         r4,  #64
++        add         lr,  r2,  #32
++        add         r12, r0,  #32
++        sub         r3,  r4
++        sub         r1,  r4
++1:
++        vld1.32    {q8,  q9 }, [r2],  r4
++        vld1.32    {q10, q11}, [lr],  r4
++        vld1.32    {q12, q13}, [r2],  r3
++        vld1.32    {q14, q15}, [lr],  r3
++        subs        r11, #1
++        vst1.32    {q8,  q9 }, [r0],  r4
++        vst1.32    {q10, q11}, [r12], r4
++        vst1.32    {q12, q13}, [r0],  r1
++        vst1.32    {q14, q15}, [r12], r1
++        bgt         1b
++        pop        {r4, r5, r11, pc}
++
++@ Use drop_thru where we can
++cpy_compound 104, 64, 40, 1
++cpy_compound 40, 32, 8
++
++cpy_compound 112, 64, 48, 1
++cpy_compound 48, 32, 16
++
++cpy_compound 120, 64, 56, 1
++cpy_compound 56, 32, 24, 1
++cpy_compound 24, 16, 8
++
++cpy_compound 72, 64, 8
++cpy_compound 80, 64, 16
++cpy_compound 88, 64, 24
++cpy_compound 96, 64, 32
++
++
++endfunc
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevc_misc_neon.h
+@@ -0,0 +1,438 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_ARM_RPI_HEVC_MISC_H
++#define AVCODEC_ARM_RPI_HEVC_MISC_H
++
++#include "config.h"
++#if HAVE_NEON_INLINE && !CONFIG_THUMB
++
++static av_noinline void ff_hevc_rpi_copy_vert_v2h_neon(uint8_t *dst, const uint8_t *src,
++                                                       int pixel_shift, int height,
++                                                       ptrdiff_t stride_src)
++{
++    const uint8_t *src2 = src + stride_src;
++    stride_src <<= 1;
++    switch (pixel_shift)
++    {
++        case 2:
++            __asm__ volatile (
++                "vld1.32     {d0[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.32     {d0[1]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.32     {d1[0]}, [%[src]], %[stride_src]  \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vld1.32     {d1[1]}, [%[src2]], %[stride_src] \n\t"
++                "beq         2f                                \n\t"
++                "1:                                            \n\t"
++                "vld1.32     {d2[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.32     {d2[1]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.32     {d3[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.32     {d3[1]}, [%[src2]], %[stride_src] \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vst1.32     {q0}, [%[dst]]!                   \n\t"
++                "beq         3f                                \n\t"
++                "vld1.32     {d0[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.32     {d0[1]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.32     {d1[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.32     {d1[1]}, [%[src2]], %[stride_src] \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vst1.32     {q1}, [%[dst]]!                   \n\t"
++                "bne         1b                                \n\t"
++                "2:                                            \n\t"
++                "vst1.32     {q0}, [%[dst]]                    \n\t"
++                "b           4f                                \n\t"
++                "3:                                            \n\t"
++                "vst1.32     {q1}, [%[dst]]                    \n\t"
++                "4:                                            \n\t"
++                :  // Outputs
++                           [src]"+r"(src),
++                          [src2]"+r"(src2),
++                           [dst]"+r"(dst),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_src]"r"(stride_src)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++        case 1:
++            __asm__ volatile (
++                "vld1.16     {d0[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.16     {d1[0]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.16     {d0[1]}, [%[src]], %[stride_src]  \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vld1.16     {d1[1]}, [%[src2]], %[stride_src] \n\t"
++                "beq         2f                                \n\t"
++                "1:                                            \n\t"
++                "vld1.16     {d2[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.16     {d3[0]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.16     {d2[1]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.16     {d3[1]}, [%[src2]], %[stride_src] \n\t"
++                "vzip.16     d0, d1                            \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vst1.16     {d0}, [%[dst]]!                   \n\t"
++                "beq         3f                                \n\t"
++                "vld1.16     {d0[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.16     {d1[0]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.16     {d0[1]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.16     {d1[1]}, [%[src2]], %[stride_src] \n\t"
++                "vzip.16     d2, d3                            \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vst1.16     {d2}, [%[dst]]!                   \n\t"
++                "bne         1b                                \n\t"
++                "2:                                            \n\t"
++                "vzip.16     d0, d1                            \n\t"
++                "vst1.16     {d0}, [%[dst]]                    \n\t"
++                "b           4f                                \n\t"
++                "3:                                            \n\t"
++                "vzip.16     d2, d3                            \n\t"
++                "vst1.16     {d2}, [%[dst]]                    \n\t"
++                "4:                                            \n\t"
++                :  // Outputs
++                           [src]"+r"(src),
++                          [src2]"+r"(src2),
++                           [dst]"+r"(dst),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_src]"r"(stride_src)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++        default:
++            __asm__ volatile (
++                "vld1.8      {d0[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d1[0]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d0[1]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d1[1]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d0[2]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d1[2]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d0[3]}, [%[src]], %[stride_src]  \n\t"
++                "subs        %[height], #8                     \n\t"
++                "vld1.8      {d1[3]}, [%[src2]], %[stride_src] \n\t"
++                "beq         2f                                \n\t"
++                "1:                                            \n\t"
++                "vld1.8      {d2[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d3[0]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d2[1]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d3[1]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d2[2]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d3[2]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d2[3]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d3[3]}, [%[src2]], %[stride_src] \n\t"
++                "vzip.8      d0, d1                            \n\t"
++                "subs        %[height], #8                     \n\t"
++                "vst1.8      {d0}, [%[dst]]!                   \n\t"
++                "beq         3f                                \n\t"
++                "vld1.8      {d0[0]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d1[0]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d0[1]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d1[1]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d0[2]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d1[2]}, [%[src2]], %[stride_src] \n\t"
++                "vld1.8      {d0[3]}, [%[src]], %[stride_src]  \n\t"
++                "vld1.8      {d1[3]}, [%[src2]], %[stride_src] \n\t"
++                "vzip.8      d2, d3                            \n\t"
++                "subs        %[height], #8                     \n\t"
++                "vst1.8      {d2}, [%[dst]]!                   \n\t"
++                "bne         1b                                \n\t"
++                "2:                                            \n\t"
++                "vzip.8      d0, d1                            \n\t"
++                "vst1.8      {d0}, [%[dst]]                    \n\t"
++                "b           4f                                \n\t"
++                "3:                                            \n\t"
++                "vzip.8      d2, d3                            \n\t"
++                "vst1.8      {d2}, [%[dst]]                    \n\t"
++                "4:                                            \n\t"
++                :  // Outputs
++                           [src]"+r"(src),
++                          [src2]"+r"(src2),
++                           [dst]"+r"(dst),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_src]"r"(stride_src)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++    }
++}
++
++static av_noinline void ff_hevc_rpi_copy_vert_h2v_neon(uint8_t *dst, const uint8_t *src,
++                                                       int pixel_shift, int height,
++                                                      ptrdiff_t stride_dst)
++{
++    uint8_t *dst2 = dst + stride_dst;
++    stride_dst <<= 1;
++    switch (pixel_shift)
++    {
++        case 2:
++            __asm__ volatile (
++                "subs        %[height], #4                     \n\t"
++                "vld1.32     {q0}, [%[src]]!                   \n\t"
++                "beq         2f                                \n\t"
++                "1:                                            \n\t"
++                "vld1.32     {q1}, [%[src]]!                   \n\t"
++                "vst1.32     {d0[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.32     {d0[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.32     {d1[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vst1.32     {d1[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "beq         3f                                \n\t"
++                "vld1.32     {q0}, [%[src]]!                   \n\t"
++                "vst1.32     {d2[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.32     {d2[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.32     {d3[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vst1.32     {d3[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "bne         1b                                \n\t"
++                "2:                                            \n\t"
++                "vst1.32     {d0[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.32     {d0[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.32     {d1[0]}, [%[dst]]                 \n\t"
++                "vst1.32     {d1[1]}, [%[dst2]]                \n\t"
++                "b           4f                                \n\t"
++                "3:                                            \n\t"
++                "vst1.32     {d2[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.32     {d2[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.32     {d3[0]}, [%[dst]]                 \n\t"
++                "vst1.32     {d3[1]}, [%[dst2]]                \n\t"
++                "4:                                            \n\t"
++                :  // Outputs
++                           [dst]"+r"(dst),
++                          [dst2]"+r"(dst2),
++                           [src]"+r"(src),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_dst]"r"(stride_dst)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++        case 1:
++            __asm__ volatile (
++                "subs        %[height], #4                     \n\t"
++                "vld1.16     {d0}, [%[src]]!                   \n\t"
++                "beq         2f                                \n\t"
++                "1:                                            \n\t"
++                "vld1.16     {d2}, [%[src]]!                   \n\t"
++                "vst1.16     {d0[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.16     {d0[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.16     {d0[2]}, [%[dst]], %[stride_dst]  \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vst1.16     {d0[3]}, [%[dst2]], %[stride_dst] \n\t"
++                "beq         3f                                \n\t"
++                "vld1.16     {d0}, [%[src]]!                   \n\t"
++                "vst1.16     {d2[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.16     {d2[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.16     {d2[2]}, [%[dst]], %[stride_dst]  \n\t"
++                "subs        %[height], #4                     \n\t"
++                "vst1.16     {d2[3]}, [%[dst2]], %[stride_dst] \n\t"
++                "bne         1b                                \n\t"
++                "2:                                            \n\t"
++                "vst1.16     {d0[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.16     {d0[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.16     {d0[2]}, [%[dst]]                 \n\t"
++                "vst1.16     {d0[3]}, [%[dst2]]                \n\t"
++                "b           4f                                \n\t"
++                "3:                                            \n\t"
++                "vst1.16     {d2[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.16     {d2[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.16     {d2[2]}, [%[dst]]                 \n\t"
++                "vst1.16     {d2[3]}, [%[dst2]]                \n\t"
++                "4:                                            \n\t"
++                :  // Outputs
++                           [dst]"+r"(dst),
++                          [dst2]"+r"(dst2),
++                           [src]"+r"(src),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_dst]"r"(stride_dst)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++        default:
++            __asm__ volatile (
++                "subs        %[height], #8                     \n\t"
++                "vld1.8      {d0}, [%[src]]!                   \n\t"
++                "beq         2f                                \n\t"
++                "1:                                            \n\t"
++                "vld1.8      {d2}, [%[src]]!                   \n\t"
++                "vst1.8      {d0[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d0[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d0[2]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d0[3]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d0[4]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d0[5]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d0[6]}, [%[dst]], %[stride_dst]  \n\t"
++                "subs        %[height], #8                     \n\t"
++                "vst1.8      {d0[7]}, [%[dst2]], %[stride_dst] \n\t"
++                "beq         3f                                \n\t"
++                "vld1.8      {d0}, [%[src]]!                   \n\t"
++                "vst1.8      {d2[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d2[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d2[2]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d2[3]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d2[4]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d2[5]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d2[6]}, [%[dst]], %[stride_dst]  \n\t"
++                "subs        %[height], #8                     \n\t"
++                "vst1.8      {d2[7]}, [%[dst2]], %[stride_dst] \n\t"
++                "bne         1b                                \n\t"
++                "2:                                            \n\t"
++                "vst1.8      {d0[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d0[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d0[2]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d0[3]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d0[4]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d0[5]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d0[6]}, [%[dst]]                 \n\t"
++                "vst1.8      {d0[7]}, [%[dst2]]                \n\t"
++                "b           4f                                \n\t"
++                "3:                                            \n\t"
++                "vst1.8      {d2[0]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d2[1]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d2[2]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d2[3]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d2[4]}, [%[dst]], %[stride_dst]  \n\t"
++                "vst1.8      {d2[5]}, [%[dst2]], %[stride_dst] \n\t"
++                "vst1.8      {d2[6]}, [%[dst]]                 \n\t"
++                "vst1.8      {d2[7]}, [%[dst2]]                \n\t"
++                "4:                                            \n\t"
++                :  // Outputs
++                           [dst]"+r"(dst),
++                          [dst2]"+r"(dst2),
++                           [src]"+r"(src),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_dst]"r"(stride_dst)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++    }
++}
++
++static av_noinline void ff_hevc_rpi_copy_vert_v2v_neon(uint8_t *dst, const uint8_t *src,
++                                                       int pixel_shift, int height,
++                                                       ptrdiff_t stride_dst, ptrdiff_t stride_src)
++{
++    int x, y;
++    switch (pixel_shift)
++    {
++        case 2:
++            __asm__ volatile (
++                "ldr         %[x], [%[src]], %[stride_src] \n\t"
++                "ldr         %[y], [%[src]], %[stride_src] \n\t"
++                "str         %[x], [%[dst]], %[stride_dst] \n\t"
++                "sub         %[height], #2                 \n\t"
++                "1:                                        \n\t"
++                "ldr         %[x], [%[src]], %[stride_src] \n\t"
++                "str         %[y], [%[dst]], %[stride_dst] \n\t"
++                "ldr         %[y], [%[src]], %[stride_src] \n\t"
++                "subs        %[height], #2                 \n\t"
++                "str         %[x], [%[dst]], %[stride_dst] \n\t"
++                "bne         1b                            \n\t"
++                "str         %[y], [%[dst]]                \n\t"
++                :  // Outputs
++                             [x]"=&r"(x),
++                             [y]"=&r"(y),
++                           [src]"+r"(src),
++                           [dst]"+r"(dst),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_src]"r"(stride_src),
++                    [stride_dst]"r"(stride_dst)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++        case 1:
++            __asm__ volatile (
++                "ldrh        %[x], [%[src]], %[stride_src] \n\t"
++                "ldrh        %[y], [%[src]], %[stride_src] \n\t"
++                "strh        %[x], [%[dst]], %[stride_dst] \n\t"
++                "sub         %[height], #2                 \n\t"
++                "1:                                        \n\t"
++                "ldrh        %[x], [%[src]], %[stride_src] \n\t"
++                "strh        %[y], [%[dst]], %[stride_dst] \n\t"
++                "ldrh        %[y], [%[src]], %[stride_src] \n\t"
++                "subs        %[height], #2                 \n\t"
++                "strh        %[x], [%[dst]], %[stride_dst] \n\t"
++                "bne         1b                            \n\t"
++                "strh        %[y], [%[dst]]                \n\t"
++                :  // Outputs
++                             [x]"=&r"(x),
++                             [y]"=&r"(y),
++                           [src]"+r"(src),
++                           [dst]"+r"(dst),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_src]"r"(stride_src),
++                    [stride_dst]"r"(stride_dst)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++        default:
++            __asm__ volatile (
++                "ldrb        %[x], [%[src]], %[stride_src] \n\t"
++                "ldrb        %[y], [%[src]], %[stride_src] \n\t"
++                "strb        %[x], [%[dst]], %[stride_dst] \n\t"
++                "sub         %[height], #2                 \n\t"
++                "1:                                        \n\t"
++                "ldrb        %[x], [%[src]], %[stride_src] \n\t"
++                "strb        %[y], [%[dst]], %[stride_dst] \n\t"
++                "ldrb        %[y], [%[src]], %[stride_src] \n\t"
++                "subs        %[height], #2                 \n\t"
++                "strb        %[x], [%[dst]], %[stride_dst] \n\t"
++                "bne         1b                            \n\t"
++                "strb        %[y], [%[dst]]                \n\t"
++                :  // Outputs
++                             [x]"=&r"(x),
++                             [y]"=&r"(y),
++                           [src]"+r"(src),
++                           [dst]"+r"(dst),
++                        [height]"+r"(height)
++                :  // Inputs
++                    [stride_src]"r"(stride_src),
++                    [stride_dst]"r"(stride_dst)
++                :  // Clobbers
++                    "cc", "memory"
++            );
++            break;
++    }
++}
++
++#define ff_hevc_rpi_copy_vert ff_hevc_rpi_copy_vert_neon
++static inline void ff_hevc_rpi_copy_vert_neon(uint8_t *dst, const uint8_t *src,
++                                              int pixel_shift, int height,
++                                              ptrdiff_t stride_dst, ptrdiff_t stride_src)
++{
++    if (stride_dst == 1 << pixel_shift)
++        ff_hevc_rpi_copy_vert_v2h_neon(dst, src, pixel_shift, height, stride_src);
++    else if (stride_src == 1 << pixel_shift)
++        ff_hevc_rpi_copy_vert_h2v_neon(dst, src, pixel_shift, height, stride_dst);
++    else
++        ff_hevc_rpi_copy_vert_v2v_neon(dst, src, pixel_shift, height, stride_dst, stride_src);
++}
++
++#endif /* HAVE_NEON_INLINE */
++
++#endif /* AVCODEC_ARM_RPI_HEVC_MISC_H */
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevc_mv_arm.h
+@@ -0,0 +1,93 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Written by John Cox, Ben Avison
++*/
++
++#ifndef AVCODEC_ARM_RPI_HEVC_MV_H
++#define AVCODEC_ARM_RPI_HEVC_MV_H
++
++#if HAVE_ARMV6T2_INLINE
++static inline MvXY mvxy_add_arm(const MvXY a, const MvXY b)
++{
++    MvXY r;
++    __asm__ (
++        "sadd16    %[r], %[a], %[b]        \n\t"
++        : [r]"=r"(r)
++        : [a]"r"(a),
++          [b]"r"(b)
++        :
++        );
++    return r;
++}
++#define mvxy_add mvxy_add_arm
++#endif
++
++#if HAVE_ARMV6T2_INLINE
++#if (defined(__ARM_ARCH_EXT_IDIV__) || defined (__ARM_FEATURE_IDIV))
++static inline int32_t mv_scale_xy_arm(int32_t xy, int td, int tb)
++{
++    int t;
++    __asm__ (
++    "ssat   %[td], #8,    %[td]          \n\t"
++    "ssat   %[tb], #8,    %[tb]          \n\t"
++    "eor    %[t],  %[td], %[td], asr #31 \n\t"
++    "adds   %[t],  %[t],  %[td], lsr #31 \n\t"
++    "asr    %[t],  #1                    \n\t"
++    "add    %[t],  #0x4000               \n\t"
++    "it ne                               \n\t"
++    "sdivne %[t],  %[t],  %[td]          \n\t"
++    "mov    %[td], #32                   \n\t"
++    "smlabb %[td], %[t],  %[tb], %[td]   \n\t"
++    "ssat   %[td], #13,   %[td], asr #6  \n\t"
++    "mov    %[tb], #127                  \n\t"
++    "smlatb %[t],  %[xy], %[td], %[tb]   \n\t"
++    "smlabb %[tb], %[xy], %[td], %[tb]   \n\t"
++// This takes the sign of x & y for rounding at the "wrong" point
++// (i.e. after adding 127) but for the range of values (-1,-127)
++// where it does the wrong thing you get the right answer (0) anyway
++    "add    %[t],  %[t],  %[t],  lsr #31 \n\t"
++    "add    %[xy], %[tb], %[tb], lsr #31 \n\t"
++    "ssat   %[t],  #16,   %[t],  asr #8  \n\t"
++    "ssat   %[xy], #16,   %[xy], asr #8  \n\t"
++    "pkhbt  %[xy], %[xy], %[t],  lsl #16 \n\t"
++    :
++         [t]"=&r"(t),
++        [xy]"+r"(xy),
++        [td]"+r"(td),
++        [tb]"+r"(tb)
++    :
++    :
++        "cc"
++    );
++    return xy;
++}
++#define mv_scale_xy mv_scale_xy_arm
++#endif
++#endif
++
++#endif // AVCODEC_ARM_RPI_HEVC_MV_H
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_arm.h
+@@ -0,0 +1,26 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_ARM_HEVCDSP_ARM_H
++#define AVCODEC_ARM_HEVCDSP_ARM_H
++
++#include "libavcodec/rpi_hevcdsp.h"
++
++void ff_hevcdsp_rpi_init_neon(HEVCDSPContext *c, const int bit_depth);
++
++#endif /* AVCODEC_ARM_HEVCDSP_ARM_H */
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_deblock_neon.S
+@@ -0,0 +1,1634 @@
++/*
++ * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
++ * Copyright (C) 2018 John Cox, Ben Avison for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1
++ */
++
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++.macro hevc_loop_filter_uv_body1 P1a, P0a, Q0a, Q1a, I1, I2, I3, I4, I5, I6, I7, I8
++        vsubl.u8  q0, \Q0a, \P0a
++        vsubl.u8  q1, \P1a, \Q1a
++        vdup.16   d4, r2
++        \I1
++        vshl.i16  q0, #2
++        \I2
++        vadd.i16  q0, q1
++        \I3
++        vmovl.u8  q2, d4
++        \I4
++        vneg.s16  q1, q2
++        \I5
++        vrshr.s16 q0, #3
++        \I6
++        \I7
++        \I8
++        vmin.s16  q0, q2
++        vmovl.u8  q2, \Q0a
++        vmax.s16  q0, q1
++        vaddw.u8  q1, q0, \P0a
++        vsub.i16  q0, q2, q0
++        vqmovun.s16 \P0a, q1
++        vqmovun.s16 \Q0a, q0
++.endm
++
++
++.macro hevc_loop_filter_uv_body2 P1a, P1b, P0a, P0b, Q0a, Q0b, Q1a, Q1b, I1, I2, I3, I4, I5, I6, I7
++        vsubl.u8  q0, \Q0a, \P0a  @ q0a - p0a
++        lsr       r12, r2, #16
++        vsubl.u8  q1, \Q0b, \P0b  @ q0b - p0b
++        vsubl.u8  q2, \P1a, \Q1a  @ p1a - q1a
++        vsubl.u8  q3, \P1b, \Q1b  @ p1b - q1b
++        vshl.i16  q0, #2          @ (q0a - p0a) * 4
++        vshl.i16  q1, #2          @ (q0b - p0b) * 4
++        vadd.i16  q0, q2          @ ((q0a - p0a) * 4) + p1a - q1a
++        vadd.i16  q1, q3          @ ((q0b - p0b) * 4) + p1b - q1b
++        vdup.16   d4, r2          @ tc0a, tc0b
++        vdup.16   d6, r12         @ tc1a, tc1b
++        vrshr.s16 q0, #3          @ (((q0a - p0a) * 4) + p1a - q1a + 4) >> 3
++        \I1
++        vrshr.s16 q1, #3          @ (((q0b - p0b) * 4) + p1b - q1b + 4) >> 3
++        \I2
++        vmovl.u8  q2, d4          @ tc0a, tc0b
++        \I3
++        vmovl.u8  q3, d6          @ tc1a, tc1b
++        \I4
++        vmin.s16  q0, q2
++        \I5
++        vneg.s16  q2, q2          @ -tc0a, -tc0b
++        \I6
++        vmin.s16  q1, q3
++        \I7
++        vneg.s16  q3, q3          @ -tc1a, -tc1b
++        vmax.s16  q0, q2          @ delta0a
++        vmovl.u8  q2, \Q0a
++        vmax.s16  q1, q3          @ delta0b
++        vaddw.u8  q3, q0, \P0a    @ p0a + delta0a
++        vsub.i16  q0, q2, q0      @ q0a - delta0a
++        vmovl.u8  q2, \Q0b
++        vsub.i16  q2, q1          @ q0b - delta0b
++        vaddw.u8  q1, \P0b        @ p0b + delta0b
++        vqmovun.s16 \Q0a, q0
++        vqmovun.s16 \P0a, q3
++        vqmovun.s16 \Q0b, q2
++        vqmovun.s16 \P0b, q1
++.endm
++
++
++@ Preserves r12
++@ Clobbers r2
++@ P0a et al all contain UVUVUVUV
++@ r2 (tc4) contains
++@   [0..7]   tc U a
++@   [8..15]  tc V a
++
++.macro hevc_loop_filter_uv_body1_16 P1a, P0a, Q0a, Q1a, bit_depth, I1, I2, I3, I4, I5, I6, I7, I8
++        vsub.i16  q0, \Q0a, \P0a
++        vsub.i16  q1, \P1a, \Q1a
++        vdup.16   d4, r2
++        \I1
++        vshl.i16  q0, #2
++        \I2
++        vadd.i16  q0, q1
++        \I3
++        vshll.u8  q2, d4, #\bit_depth - 8
++        \I4
++        vneg.s16  q1, q2
++        \I5
++        vrshr.s16 q0, #3
++        \I6
++        \I7
++        \I8
++        vmin.s16  q0, q2
++        vmov.i16  q2, #0
++        vmax.s16  q0, q1
++        vadd.i16  \P0a, q0
++        vsub.i16  \Q0a, q0
++        vmov.i16  q1, #(1 << \bit_depth) - 1
++        vmax.s16  \P0a, q2
++        vmax.s16  \Q0a, q2
++        vmin.s16  \P0a, q1
++        vmin.s16  \Q0a, q1
++.endm
++
++@ Clobbers r2, r12
++@ P0a et al all contain UVUVUVUV
++@ r2 (tc4) contains
++@   [0..7]   tc U a
++@   [8..15]  tc V a
++@  [16..23]  tc U b
++@  [24..31]  tc V b
++
++.macro hevc_loop_filter_uv_body2_16 P1a, P1b, P0a, P0b, Q0a, Q0b, Q1a, Q1b, bit_depth, I1, I2, I3, I4, I5, I6, I7
++        vsub.i16  q0, \Q0a, \P0a  @ q0a - p0a
++        lsr       r12, r2, #16
++        vsub.i16  q1, \Q0b, \P0b  @ q0b - p0b
++        vsub.i16  q2, \P1a, \Q1a  @ p1a - q1a
++        vsub.i16  q3, \P1b, \Q1b  @ p1b - q1b
++        vshl.i16  q0, #2          @ (q0a - p0a) * 4
++        vshl.i16  q1, #2          @ (q0b - p0b) * 4
++        vadd.i16  q0, q2          @ ((q0a - p0a) * 4) + p1a - q1a
++        vadd.i16  q1, q3          @ ((q0b - p0b) * 4) + p1b - q1b
++        vdup.16   d4, r2          @ tc0a, tc0b
++        vdup.16   d6, r12         @ tc1a, tc1b
++        vrshr.s16 q0, #3          @ (((q0a - p0a) * 4) + p1a - q1a + 4) >> 3
++        \I1
++        vrshr.s16 q1, #3          @ (((q0b - p0b) * 4) + p1b - q1b + 4) >> 3
++        \I2
++        vshll.u8  q2, d4, #\bit_depth - 8 @ tc0a, tc0b
++        \I3
++        vshll.u8  q3, d6, #\bit_depth - 8 @ tc1a, tc1b
++        \I4
++        vmin.s16  q0, q2
++        \I5
++        vneg.s16  q2, q2          @ -tc0a, -tc0b
++        \I6
++        vmin.s16  q1, q3
++        \I7
++        vneg.s16  q3, q3          @ -tc1a, -tc1b
++        vmax.s16  q0, q2          @ delta0a
++        vadd.i16  \P0a, q0        @ p0a + delta0a
++        vsub.i16  \Q0a, q0        @ q0a - delta0a
++        vmax.s16  q1, q3          @ delta0b
++        vadd.i16  \P0b, q1        @ p0b + delta0b
++        vsub.i16  \Q0b, q1        @ q0b - delta0b
++        vmov.i16  q2, #0
++        vmov.i16  q3, #(1 << \bit_depth) - 1
++        vmax.s16  \P0a, q2
++        vmax.s16  \Q0a, q2
++        vmax.s16  \P0b, q2
++        vmax.s16  \Q0b, q2
++        vmin.s16  \P0a, q3
++        vmin.s16  \Q0a, q3
++        vmin.s16  \P0b, q3
++        vmin.s16  \Q0b, q3
++.endm
++
++
++
++@   uint8_t *_no_p,     [sp+0]
++@   uint8_t *_no_q)     [sp+4]
++
++.macro hevc_loop_filter_luma_start
++        ldr     r12, [r3]
++        ldr      r3, [r3, #4]
++        orrs     r3, r12, r3, lsl #16
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldrd     r4, r5, [sp, #32]      @ &_no_p
++        ldrb     r4, [r4]
++        ldrb     r5, [r5]
++        movs     r10, r4
++        it ne
++        movne    r10, #1
++        cmp      r5, #0
++        it ne
++        orrne    r10, #2
++.endm
++
++@ Input:
++@  r2          beta    (raw: needs shift for bitdepth > 8)
++@  r3[ 0:15]   tc[0]   (raw: needs shift for bitdepth > 8)
++@  r3[16:31]   tc[1]   (raw: needs shift for bitdepth > 8)
++@
++@ Input & output
++@  8-bit: d16-d23      (Q3,Q2,Q1,Q0,P0,P1,P2,P3)
++@ 16-bit:  q8-q15
++@
++@  r1         -r1
++@  r10        b1->C, b0->N  (r10 junk)
++@
++@ Junks:
++@  r5, r6, r7, r8, r9
++
++.macro m_filter_luma bit_depth, Q11, Q15
++.if \bit_depth == 8
++        vmovl.u8    q14, d22      @ q2,7 q2,6 ... q2,0 = TQ2' ... Q2' TQ2 ... Q2
++        vmovl.u8    q13, d21      @ q1,7 q1,6 ... q1,0 = TQ1' ... Q1' TQ1 ... Q1
++        vmovl.u8    q12, d20      @ q0,7 q0,6 ... q0,0 = TQ0' ... Q0' TQ0 ... Q0
++        vmovl.u8    \Q11, d19     @ p0,7 p0,6 ... p0,0 = TP0' ... P0' TP0 ... P0
++        vmovl.u8    q10, d18      @ p1,7 p1,6 ... p1,0 = TP1' ... P1' TP1 ... P1
++        vmovl.u8    q9, d17       @ p2,7 p2,6 ... p2,0 = TP2' ... P2' TP2 ... P2
++.endif
++        vadd.i16    q0, q9, \Q11  @ P2 + P0
++.if \bit_depth > 8
++        lsl         r3, r3, #(\bit_depth - 8)
++.endif
++        vadd.i16    q1, q14, q12  @ Q2 + Q0
++.if \bit_depth > 8
++        lsl         r2, r2, #(\bit_depth - 8)
++.endif
++        vsub.i16    q0, q10       @ P2 - P1 + P0
++        lsr         r5, r3, #16
++        vsub.i16    q1, q13       @ Q2 - Q1 + Q0
++.if \bit_depth == 8
++        vmovl.u8    q8, d16       @ p3,7 p3,6 ... p3,0 = TP3' ... P3' TP3 ... P3
++        vmovl.u8    \Q15, d23     @ q3,7 q3,6 ... q3,0 = TQ3' ... Q3' TQ3 ... Q3
++.endif
++        vabd.s16    q0, q10       @ dp0 = abs(P2 - 2 * P1 + P0)
++        vabd.s16    q1, q13       @ dq0 = abs(Q2 - 2 * Q1 + Q0)
++        vmov.i64    q2, #0xffffffff0000
++        vbic        q0, q2        @ only dp0(') and dp3(')
++        vbic        q1, q2        @ only dq0(') and dq3(')
++        vsra.u64    q0, #16
++        vsra.u64    q1, #16
++        vdup.16     q3, r2        @ beta
++        vdup.16     d14, r3       @ tC[0]
++        vdup.16     d15, r5       @ tC[1]
++        vabd.s16    q4, q8, \Q11  @ abs(TP3'-TP0' ... P3'-P0' TP3-TP0 ... P3-P0)
++        vmovn.i32   d0, q0        @ dp3' dp0' dp3 dp0
++        vmovn.i32   d1, q1        @ dq3' dq0' dq3 dq0
++        vadd.i16    d5, d0, d1    @ d3'=dp3'+dq3' d0'=dp0'+dq0' d3=dp3+dq3 d0=dp0+dq0
++        vabd.s16    q5, \Q11, q12 @ abs(TP0'-TQ0' ... P0'-Q0' TP0-TQ0 ... P0-Q0)
++        vaba.s16    q4, \Q15, q12 @ +abs(TQ3'-TQ0' ... Q3'-Q0' TQ3-TQ0 ... Q3-Q0)
++        vpadd.i16   d2, d5, d5    @ dontcare dontcare d0'+d3' d0+d3
++        vshl.s16    q6, q7, #2    @ tC[] * 4
++        vrhadd.s16  q6, q7        @ tc25 = (tc[] * 5 + 1) >> 1
++        vcgt.s16    d2, d6, d2    @ if (d0 + d3 < beta)
++        vmov        r7, s4        @ (d2) r7 = mask of blocks to apply filtering (16b/block)
++        vshr.s16    q1, q3, #3    @ beta_3 = beta >> 3
++        cmp         r7, #0
++        beq         .Lbypasswrite
++
++        vcgt.s16    q5, q6, q5    @ if < tc25
++        vcgt.s16    q4, q1, q4    @ if (abs({T}P[0-3]{'}-{T}P[0-3]{'})+abs({T}Q[0-3]{'}-{T}Q[0-3]{'}) < beta_3)
++        vand        q4, q5
++        vbic        d8, d4
++        vbic        d9, d4
++        vshr.s16    q3, #2        @ beta_2 = beta >> 2
++        vsra.u64    q4, #16
++        vshl.s16    d5, #1        @ d3'<<1 d0'<<1 d3<<1 d0<<1
++        vshl.i16    q7, #1        @ tc2 = tC[] << 1
++        vcgt.s16    d6, d5        @ if (d3'<<1 < beta_2) etc
++        vmovn.i32   d8, q4        @ beta_3 && tc25 tests, prime block in ms half
++        vand        d6, d8        @ && beta_2 tests, prime in ms half
++        vpadd.i16   d0, d1        @ dq0'+dq3' dq0+dq3 dp0'+dp3' dp0+dp3
++        vneg.s16    q6, q7        @ -tc2
++        vmovn.i32   d8, q3
++        vshrn.i32   d6, q3, #16
++        vand        d6, d8
++        vmov        r5, r6, d0    @ r5 = dp0'+dp3' dp0+dp3  r6 = dq0'+dq3' dq0+dq3
++        vmov        r8, s12       @ (d6) r8 = mask of strong filtering blocks (16b/block)
++        vadd.i16    q0, \Q11, q12 @ p0 + q0
++        ands        r9, r7, r8
++        beq         1f
++
++        vadd.i16    q2, q0, q10   @ p1 + p0 + q0
++        vadd.i16    q3, q0, q13   @ p0 + q0 + q1
++        lsr         r3, r9, #16
++        vadd.i16    q1, q2, q9    @ p2 + p1 + p0 + q0 (new P1 before clipping)
++        vadd.i16    q4, q3, q14   @ p0 + q0 + q1 + q2 (new Q1 before clipping)
++        vadd.i16    q0, q8, q9    @ p3 + p2
++        vadd.i16    q5, \Q15, q14 @ q2 + q3
++        vadd.i16    q2, q1        @ p2 + 2 * p1 + 2 * p0 + 2 * q0
++        vadd.i16    q3, q4        @ 2 * p0 + 2 * q0 + 2 * q1 + q2
++        vshl.i16    q0, #1        @ 2 * p3 + 2 * p2
++        vshl.i16    q5, #1        @ 2 * q2 + 2 * q3
++        vadd.i16    q0, q1        @ 2 * p3 + 3 * p2 + p1 + p0 + q0 (new P2 before clipping)
++        vadd.i16    q5, q4        @ p0 + q0 + q1 + 3 * q2 + 2 * q3 (new Q2 before clipping)
++        vadd.i16    q2, q13       @ p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 (new P0 before clipping)
++        vadd.i16    q3, q10       @ p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 (new Q0 before clipping)
++        vrshr.s16   q0, #3        @ scale, with rounding
++        vrshr.s16   q5, #3
++        vrshr.s16   q1, #2
++        vrshr.s16   q4, #2
++        vrshr.s16   q2, #3
++        vrshr.s16   q3, #3
++        vsub.i16    q0, q9        @ find difference
++        vsub.i16    q5, q14
++        vsub.i16    q1, q10
++        vsub.i16    q4, q13
++        vsub.i16    q2, \Q11
++        vsub.i16    q3, q12
++        vmax.s16    q0, q6        @ clip difference to -tc2 .. tc2
++        vmax.s16    q5, q6
++        vmax.s16    q1, q6
++        vmax.s16    q4, q6
++        vmax.s16    q2, q6
++        vmax.s16    q3, q6
++        vdup.16     d12, r9       @ expand mask, reuse q6 due to register pressure
++        vdup.16     d13, r3
++        vmin.s16    q0, q7
++        vmin.s16    q5, q7
++        vmin.s16    q1, q7
++        vmin.s16    q4, q7
++        vmin.s16    q2, q7
++        vmin.s16    q3, q7
++        vadd.i16    q0, q9        @ apply difference
++        vadd.i16    q5, q14
++        vadd.i16    q1, q10
++        vadd.i16    q4, q13
++        vadd.i16    q2, \Q11
++        vadd.i16    q3, q12
++        vbit        q9, q0, q6    @ apply filtered values according to mask
++        vbit        q14, q5, q6
++        vbit        q10, q1, q6
++        vbit        q13, q4, q6
++        vbit        \Q11, q2, q6
++        vbit        q12, q3, q6
++        vneg.s16    q6, q7        @ restore -tc2
++
++1:
++        bics        r9, r7, r8
++        beq         2f
++
++        vsub.i16    q0, q12, \Q11 @ q0 - p0
++        vsub.i16    q1, q13, q10  @ q1 - p1
++        lsr         r3, r9, #16
++        vshl.i16    q2, q0, #3
++        lsr         r7, r5, #16
++        vadd.i16    q3, q0, q2    @ 9 * (q0 - p0)
++        lsr         r8, r6, #16
++        vshl.i16    q2, q1, #1
++        vadd.i16    q4, q1, q2    @ 3 * (q1 - p1)
++        vshr.s16    q6, #1        @ -tc = -tc2 >> 1
++        vsub.i16    q5, q3, q4
++        vrhadd.s16  q1, q9, \Q11  @ (p2 + p0 + 1) >> 1
++        vrhadd.s16  q3, q14, q12  @ (q2 + q0 + 1) >> 1
++        vrshr.s16   q5, #4        @ delta0 = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4
++        vsub.i16    q1, q10       @ ((p2 + p0 + 1) >> 1) - p1
++        vsub.i16    q3, q13       @ ((q2 + q0 + 1) >> 1) - q1
++        vmax.s16    q6, q5        @
++        vshr.s16    q4, q7, #1    @ tc = tc2 >> 1
++        vdup.16     q0, r2        @ beta
++        vmin.s16    q6, q4        @ delta0 clamped to [-tc, tc]
++        vshr.s16    q4, #1        @ tc_2 = tc >> 1
++        vhadd.s16   q1, q6        @ (((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1
++        vhsub.s16   q3, q6        @ (((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1
++        vshr.s16    q2, q0, #1    @ beta >> 1
++        vadd.i16    q2, q0        @ beta + (beta >> 1)
++        vneg.s16    q0, q4        @ -tc_2
++        vabs.s16    q5, q5        @ abs(original delta0)
++        vshr.s16    q2, #3        @ (beta + (beta >> 1)) >> 3
++        vmax.s16    q1, q0
++        vmax.s16    q3, q0
++        vshl.s16    q0, q7, #2    @ 8 * tc
++        vadd.i16    q7, q0        @ 10 * tc
++        vdup.16     d0, r9
++        vdup.16     d1, r3        @ q0 = mask of blocks to apply filtering
++        vmin.s16    q1, q4        @ deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2)
++        vmin.s16    q3, q4        @ deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 + delta0) >> 1, -tc_2, tc_2)
++        vdup.16     d8, r5        @ dp0 + dp3
++        vdup.16     d9, r7        @ dp0' + dp3'
++        vcgt.s16    q7, q5        @ if ((10 * tc) > abs(delta0))
++        vdup.16     d10, r6       @ dq0 + dq3
++        vdup.16     d11, r8       @ dq0' + dq3'
++        vand        q7, q0        @ AND block and line masks
++        vcgt.s16    q4, q2, q4    @ if (((beta + (beta >> 1)) >> 3) > dp0 + dp3), i.e. if (nd_p > 1)
++        vadd.i16    q0, q1, q10   @ p1 + deltap1
++        vcgt.s16    q5, q2, q5    @ if (((beta + (beta >> 1)) >> 3) > dq0 + dq3), i.e. if (nd_q > 1)
++        vadd.i16    q3, q3, q13   @ q1 + deltaq1
++        vadd.i16    q1, \Q11, q6  @ p0 + delta0
++        vsub.i16    q2, q12, q6   @ q0 - delta0
++        vand        q4, q7        @ AND nd_p test with block/line masks
++        vand        q5, q7        @ AND nd_q test with block/line masks
++        vbit        q10, q0, q4
++        vbit        \Q11, q1, q7
++        vbit        q12, q2, q7
++        vbit        q13, q3, q5
++
++2:
++.if \bit_depth == 8
++        vmovn.i16 d16, q8
++        vmovn.i16 d23, \Q15
++        neg       r1, r1
++        vqmovun.s16 d17, q9
++        vqmovun.s16 d18, q10
++        vqmovun.s16 d19, \Q11
++        lsls      r10, #31
++        vqmovun.s16 d20, q12
++        vqmovun.s16 d21, q13
++        vqmovun.s16 d22, q14
++.else
++        vmov.i16  q0, #0
++        vmov.i16  q1, #(1 << \bit_depth - 1)
++        @ q8 & q15 should be unaltered and so don't require clipping
++        neg       r1, r1
++        vmax.s16  q9,  q0
++        vmax.s16  q10, q0
++        vmax.s16  q11, q0
++        vmax.s16  q12, q0
++        vmax.s16  q13, q0
++        vmax.s16  q14, q0
++        lsls      r10, #31
++        vmin.s16  q9,  q1
++        vmin.s16  q10, q1
++        vmin.s16  q11, q1
++        vmin.s16  q12, q1
++        vmin.s16  q13, q1
++        vmin.s16  q14, q1
++.endif
++        bx        lr
++.endm
++
++function hevc_loop_filter_luma_body
++        m_filter_luma 8, q15, q11
++endfunc
++
++@ void ff_hevc_rpi_v_loop_filter_luma_neon_8(
++@   uint8_t *_pix,      [r0]
++@   ptrdiff_t _stride,  [r1]
++@   int _beta,          [r2]
++@   int *_tc,           [r3]
++@   uint8_t *_no_p,     [sp+0]
++@   uint8_t *_no_q)     [sp+4]
++
++function ff_hevc_rpi_v_loop_filter_luma_neon_8, export=1
++        hevc_loop_filter_luma_start
++
++        sub      r4, r0, #4
++        b        .Lv_loop_luma_common
++endfunc
++
++@ void ff_hevc_rpi_v_loop_filter2_luma_neon(
++@   uint8_t * pix_r,    [r0]
++@   ptrdiff_t _stride,  [r1]
++@   int _beta,          [r2]
++@   int tc2,            [r3]
++@   int no_f,           [sp+0]
++@   uint8_t * pix_l)    [sp+4]
++
++function ff_hevc_rpi_v_loop_filter_luma2_neon_8, export=1
++        cmp      r3, #0
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r4, [sp, #36]
++        ldr      r10, [sp, #32]
++
++.Lv_loop_luma_common:
++        vpush    {d8-d15}
++
++        @ It's slightly faster to do unlaned loads and transpose in the
++        @ 8-bit case, even though it needs more instructions, because
++        @ VLD4.8 is a really slow way to read from memory.
++        vld1.32 {d16[0]}, [r4:32], r1
++        vld1.32 {d20[0]}, [r0:32], r1
++        vld1.32 {d16[1]}, [r4:32], r1
++        vld1.32 {d20[1]}, [r0:32], r1
++        vld1.32 {d17[0]}, [r4:32], r1
++        vld1.32 {d21[0]}, [r0:32], r1
++        vld1.32 {d17[1]}, [r4:32], r1
++        vld1.32 {d21[1]}, [r0:32], r1
++        vld1.32 {d18[0]}, [r4:32], r1
++        vld1.32 {d22[0]}, [r0:32], r1
++        vld1.32 {d18[1]}, [r4:32], r1
++        vld1.32 {d22[1]}, [r0:32], r1
++        vld1.32 {d19[0]}, [r4:32], r1
++        vld1.32 {d23[0]}, [r0:32], r1
++        vld1.32 {d19[1]}, [r4:32]
++        vld1.32 {d23[1]}, [r0:32]
++        vuzp.16 q8, q9
++        vuzp.16 q10, q11
++        vuzp.8  q8, q9
++        vuzp.8  q10, q11
++        vswp    d17, d18
++        vswp    d21, d22
++
++        bl hevc_loop_filter_luma_body
++
++        add     r6, r4, r1
++        add     r2, r0, r1
++        lsl     r1, #1
++
++        vpop     {d8-d15}
++
++        @ no_p[1]
++        bmi     1f
++        vst4.8  {d16[7],d17[7],d18[7],d19[7]}, [r4:32], r1
++        vst4.8  {d16[6],d17[6],d18[6],d19[6]}, [r6:32], r1
++        vst4.8  {d16[5],d17[5],d18[5],d19[5]}, [r4:32], r1
++        vst4.8  {d16[4],d17[4],d18[4],d19[4]}, [r6:32], r1
++
++        vst4.8  {d16[3],d17[3],d18[3],d19[3]}, [r4:32], r1
++        vst4.8  {d16[2],d17[2],d18[2],d19[2]}, [r6:32], r1
++        vst4.8  {d16[1],d17[1],d18[1],d19[1]}, [r4:32], r1
++        vst4.8  {d16[0],d17[0],d18[0],d19[0]}, [r6:32]
++1:
++        @ no_q[1]
++        bcs     1f
++        vst4.8  {d20[7],d21[7],d22[7],d23[7]}, [r0:32], r1
++        vst4.8  {d20[6],d21[6],d22[6],d23[6]}, [r2:32], r1
++        vst4.8  {d20[5],d21[5],d22[5],d23[5]}, [r0:32], r1
++        vst4.8  {d20[4],d21[4],d22[4],d23[4]}, [r2:32], r1
++
++        vst4.8  {d20[3],d21[3],d22[3],d23[3]}, [r0:32], r1
++        vst4.8  {d20[2],d21[2],d22[2],d23[2]}, [r2:32], r1
++        vst4.8  {d20[1],d21[1],d22[1],d23[1]}, [r0:32], r1
++        vst4.8  {d20[0],d21[0],d22[0],d23[0]}, [r2:32]
++1:
++        pop      {r4-r10,pc}
++
++.Lbypasswrite:
++        vpop     {d8-d15}
++        pop      {r4-r10,pc}
++endfunc
++
++.macro m_filter_v_luma_16 bit_depth
++        vpush    {d8-d15}
++
++        @ Uses slightly fewer instructions to do laned loads than unlaned
++        @ and transpose.  This also means that we can use the same code for
++        @ both split & unsplit deblock
++        vld4.16  {d16[0], d18[0], d20[0], d22[0]}, [r4], r1
++        vld4.16  {d24[0], d26[0], d28[0], d30[0]}, [r0], r1
++
++        vld4.16  {d16[1], d18[1], d20[1], d22[1]}, [r4], r1
++        vld4.16  {d24[1], d26[1], d28[1], d30[1]}, [r0], r1
++
++        vld4.16  {d16[2], d18[2], d20[2], d22[2]}, [r4], r1
++        vld4.16  {d24[2], d26[2], d28[2], d30[2]}, [r0], r1
++
++        vld4.16  {d16[3], d18[3], d20[3], d22[3]}, [r4], r1
++        vld4.16  {d24[3], d26[3], d28[3], d30[3]}, [r0], r1
++
++        vld4.16  {d17[0], d19[0], d21[0], d23[0]}, [r4], r1
++        vld4.16  {d25[0], d27[0], d29[0], d31[0]}, [r0], r1
++
++        vld4.16  {d17[1], d19[1], d21[1], d23[1]}, [r4], r1
++        vld4.16  {d25[1], d27[1], d29[1], d31[1]}, [r0], r1
++
++        vld4.16  {d17[2], d19[2], d21[2], d23[2]}, [r4], r1
++        vld4.16  {d25[2], d27[2], d29[2], d31[2]}, [r0], r1
++
++        vld4.16  {d17[3], d19[3], d21[3], d23[3]}, [r4]
++        vld4.16  {d25[3], d27[3], d29[3], d31[3]}, [r0]
++
++        bl hevc_loop_filter_luma_body_\bit_depth
++
++        add      r6, r4, r1
++        add      r2, r0, r1
++        lsl      r1, #1
++
++        vpop     {d8-d15}
++
++        @ p[1]
++        bmi      1f
++        vst4.16  {d17[3], d19[3], d21[3], d23[3]}, [r4], r1
++        vst4.16  {d17[2], d19[2], d21[2], d23[2]}, [r6], r1
++        vst4.16  {d17[1], d19[1], d21[1], d23[1]}, [r4], r1
++        vst4.16  {d17[0], d19[0], d21[0], d23[0]}, [r6], r1
++        vst4.16  {d16[3], d18[3], d20[3], d22[3]}, [r4], r1
++        vst4.16  {d16[2], d18[2], d20[2], d22[2]}, [r6], r1
++        vst4.16  {d16[1], d18[1], d20[1], d22[1]}, [r4], r1
++        vst4.16  {d16[0], d18[0], d20[0], d22[0]}, [r6]
++1:
++        @ q[1]
++        bcs      1f
++        vst4.16  {d25[3], d27[3], d29[3], d31[3]}, [r0], r1
++        vst4.16  {d25[2], d27[2], d29[2], d31[2]}, [r2], r1
++        vst4.16  {d25[1], d27[1], d29[1], d31[1]}, [r0], r1
++        vst4.16  {d25[0], d27[0], d29[0], d31[0]}, [r2], r1
++        vst4.16  {d24[3], d26[3], d28[3], d30[3]}, [r0], r1
++        vst4.16  {d24[2], d26[2], d28[2], d30[2]}, [r2], r1
++        vst4.16  {d24[1], d26[1], d28[1], d30[1]}, [r0], r1
++        vst4.16  {d24[0], d26[0], d28[0], d30[0]}, [r2]
++1:
++        pop      {r4-r10,pc}
++.endm
++
++
++
++
++@ void (*hevc_h_loop_filter_luma)(uint8_t *pix,     [r0]
++@                                 ptrdiff_t stride, [r1]
++@                                 int beta,         [r2]
++@                                 int32_t *tc,      [r3]
++@                                 uint8_t *no_p,    sp[0]
++@                                 uint8_t *no_q);   sp[4]
++@
++@ Src should always be on 8 byte boundry & all in the same slice
++
++function ff_hevc_rpi_h_loop_filter_luma_neon_8, export=1
++        hevc_loop_filter_luma_start
++        b        .Lh_loop_filter_luma_common_8
++endfunc
++
++function ff_hevc_rpi_h_loop_filter_luma2_neon_8, export=1
++        cmp      r3, #0
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r10, [sp, #32]
++
++.Lh_loop_filter_luma_common_8:
++        sub      r4, r0, r1, lsl #2
++        add      r0, r4, r1
++        lsl      r1, #1
++        vpush    {d8-d15}
++
++        vld1.8  {d16}, [r4], r1
++        vld1.8  {d17}, [r0], r1
++        vld1.8  {d18}, [r4], r1
++        vld1.8  {d19}, [r0], r1
++        vld1.8  {d20}, [r4], r1
++        vld1.8  {d21}, [r0], r1
++        vld1.8  {d22}, [r4]
++        vld1.8  {d23}, [r0]
++
++        bl hevc_loop_filter_luma_body
++
++        add      r0, r0, r1, lsl #1
++        add      r2, r4, r1, lsl #1
++        add      r6, r4, r1, asr #1
++        vpop     {d8-d15}
++
++        @ P2-P0
++        bcs      1f
++        vst1.8   {d22}, [r4], r1
++        vst1.8   {d21}, [r6]
++        vst1.8   {d20}, [r4]
++1:
++        @ Q0-Q2
++        bmi      1f
++        vst1.8   {d19}, [r0], r1
++        vst1.8   {d18}, [r2]
++        vst1.8   {d17}, [r0]
++1:
++        pop      {r4-r10,pc}
++endfunc
++
++
++.macro m_filter_h_luma_16 bit_depth
++        sub      r4, r0, r1, lsl #2
++        add      r0, r4, r1
++        lsl      r1, #1
++        vpush    {d8-d15}
++
++        vld1.16 { q8}, [r4], r1
++        vld1.16 { q9}, [r0], r1
++        vld1.16 {q10}, [r4], r1
++        vld1.16 {q11}, [r0], r1
++        vld1.16 {q12}, [r4], r1
++        vld1.16 {q13}, [r0], r1
++        vld1.16 {q14}, [r4]
++        vld1.16 {q15}, [r0]
++
++        bl hevc_loop_filter_luma_body_\bit_depth
++
++        add      r0, r0, r1, lsl #1
++        add      r2, r4, r1, lsl #1
++        add      r6, r4, r1, asr #1
++        vpop     {d8-d15}
++
++        @ P2-P0
++        bcs      1f
++        vst1.16  {q14}, [r4], r1
++        vst1.16  {q13}, [r6]
++        vst1.16  {q12}, [r4]
++1:
++        bmi      1f
++        vst1.16  {q11}, [r0], r1
++        vst1.16  {q10}, [r2]
++        vst1.16  { q9}, [r0]
++1:
++        pop      {r4-r10,pc}
++.endm
++
++
++@ void ff_hevc_rpi_h_loop_filter_uv_neon(uint8_t * src_r,        // r0
++@                                     unsigned int stride,   // r1
++@                                     uint32_t tc4,          // r2
++@                                     unsigned int no_f);    // r3
++@
++@ no_f
++@ 0  tl P0
++@ 1  tr P1
++@ 2  bl Q0
++@ 3  br Q1
++@
++@ Probably not worth having the P/Qa only special case in this direction
++@ Given layout we won't save any memory reads or avoid any cache dirtying
++@ We would save a bit of computation but I expect the partials to be less
++@ common in the H direction than V due to how we arrange deblock.
++
++function ff_hevc_rpi_h_loop_filter_uv_neon_8, export=1
++        sub      r12, r0, r1
++        cmp      r2, #0
++        it eq
++        bxeq     lr
++        vld1.8   {d26,d27}, [r0]
++        lsl      r1, #1
++        sub      r0, r1
++        vld1.8   {d18,d19}, [r12], r1
++        vld1.8   {d16,d17}, [r0], r1
++        vld1.8   {d28,d29}, [r12]
++
++        hevc_loop_filter_uv_body2 d16, d17, d18, d19, d26, d27, d28, d29, \
++        "sub      r12, r0, r1, asr #1"
++
++        lsls     r3, #29                @ b2 -> N, b3 -> C
++        it pl
++        vstrpl   d26, [r0, #0]
++        it cc
++        vstrcc   d27, [r0, #8]
++        lsls     r3, #2                 @ b0 -> N, b1 -> C
++        it pl
++        vstrpl   d18, [r12, #0]
++        it cc
++        vstrcc   d19, [r12, #8]
++        bx       lr
++
++endfunc
++
++
++@ void ff_hevc_rpi_h_loop_filter_uv_neon_10(uint8_t * src_r,     // r0
++@                                     unsigned int stride,   // r1
++@                                     uint32_t tc4,          // r2
++@                                     unsigned int no_f);    // r3
++@
++@ no-F = b0:no_p[0], b1:no_p[1], b2:no_q[0], b3:no_q[1]
++@
++@ Macro here actual function near bottom
++
++.macro m_filter_h_uv_16 bit_depth
++        sub      r12, r0, r1
++        cmp      r2, #0
++        it eq
++        bxeq     lr
++        vld1.16  {q12, q13}, [r0]
++        lsl      r1, #1
++        sub      r0, r1
++        vld1.16  {q10, q11}, [r12], r1
++        vld1.16  {q8,  q9 }, [r0], r1
++        vld1.16  {q14, q15}, [r12]
++
++        hevc_loop_filter_uv_body2_16  q8, q9, q10, q11, q12, q13, q14, q15, \bit_depth, \
++        "sub      r12, r0, r1, asr #1", \
++        "cmp      r3, #0"
++
++        bne      1f
++        vst1.16  {q10, q11}, [r12]
++        vst1.16  {q12, q13}, [r0]
++        bx       lr
++
++        @ At least one no_f bit is set
++        @ Which means we need to break this apart in an ugly fashion
++1:
++        lsls     r3, #29                @ b2 -> N, b3 -> C
++        itt pl
++        vstrpl   d24, [r0, #0]
++        vstrpl   d25, [r0, #8]
++        itt cc
++        vstrcc   d26, [r0, #16]
++        vstrcc   d27, [r0, #24]
++        lsls     r3, #2                 @ b0 -> N, b1 -> C
++        itt pl
++        vstrpl   d20, [r12, #0]
++        vstrpl   d21, [r12, #8]
++        itt cc
++        vstrcc   d22, [r12, #16]
++        vstrcc   d23, [r12, #24]
++        bx       lr
++.endm
++
++
++@ void ff_hevc_rpi_v_loop_filter_uv2_neon(uint8_t * src_r,       // r0
++@                                     unsigned int stride,   // r1
++@                                     uint32_t tc4,          // r2
++@                                     uint8_t * src_l,       // r3
++@                                     unsigned int no_f);   // sp[0]
++@
++@ no_f:
++@ 0  tl P0
++@ 1  tr Q0
++@ 2  bl P1
++@ 3  br Q1
++
++function ff_hevc_rpi_v_loop_filter_uv2_neon_8, export=1
++        cmp      r2, #0
++        it eq
++        bxeq     lr
++        push     {lr}
++        vld2.16  {d16[0], d18[0]}, [r3], r1
++        vld2.16  {d20[0], d22[0]}, [r0], r1
++
++        cmp      r2, #0x10000
++        vld2.16  {d16[1], d18[1]}, [r3], r1
++        vld2.16  {d20[1], d22[1]}, [r0], r1
++
++        vld2.16  {d16[2], d18[2]}, [r3], r1
++        vld2.16  {d20[2], d22[2]}, [r0], r1
++
++        vld2.16  {d16[3], d18[3]}, [r3], r1
++        vld2.16  {d20[3], d22[3]}, [r0], r1
++        blo      10f
++
++        vld2.16  {d17[0], d19[0]}, [r3], r1
++        vld2.16  {d21[0], d23[0]}, [r0], r1
++
++        sub      ip, r0, r3
++        vld2.16  {d17[1], d19[1]}, [r3], r1
++        vld2.16  {d21[1], d23[1]}, [r0], r1
++
++        cmp      ip, #4
++        vld2.16  {d17[2], d19[2]}, [r3], r1
++        vld2.16  {d21[2], d23[2]}, [r0], r1
++
++        vld2.16  {d17[3], d19[3]}, [r3]
++        vld2.16  {d21[3], d23[3]}, [r0]
++
++        hevc_loop_filter_uv_body2 d16, d17, d18, d19, d20, d21, d22, d23 \
++        "ldr      lr, [sp, #4]", \
++        "neg      r1, r1",       \
++        "it eq; cmpeq lr, #0",   \
++        "add      r3, #2",       \
++        "add      ip, r3, r1",   \
++        "add      r2, r0, r1",   \
++        "lsl      r1, #1"
++
++        bne      1f
++
++@ Much/most of the time r0 == r3 + 4 and no_f == 0
++@ so it is worth having this special case
++        vst2.16   {d19[3], d21[3]}, [r3], r1    @ P0b, Q0b
++        vst2.16   {d19[2], d21[2]}, [ip], r1
++        vst2.16   {d19[1], d21[1]}, [r3], r1
++        vst2.16   {d19[0], d21[0]}, [ip], r1
++        vst2.16   {d18[3], d20[3]}, [r3], r1    @ P0a, Q0a
++        vst2.16   {d18[2], d20[2]}, [ip], r1
++        vst2.16   {d18[1], d20[1]}, [r3]
++        vst2.16   {d18[0], d20[0]}, [ip]
++        pop       {pc}
++
++@ Either split or partial
++1:
++        lsls     lr, #29               @ b3 (Q0b) -> C, b2 (P0b) -> N & b31, b1 (Q0a) -> b30, b0 (P0a) -> b29
++        ittt cs
++        addcs    r0, r0, r1, lsl #1
++        addcs    r2, r2, r1, lsl #1
++        bcs      1f
++        @ Q0b
++        vst1.16  {d21[3]}, [r0], r1
++        vst1.16  {d21[2]}, [r2], r1
++        vst1.16  {d21[1]}, [r0], r1
++        vst1.16  {d21[0]}, [r2], r1
++1:
++        ittt mi
++        addmi    r3, r3, r1, lsl #1
++        addmi    ip, ip, r1, lsl #1
++        bmi      1f
++        @ P0b
++        vst1.16  {d19[3]}, [r3], r1
++        vst1.16  {d19[2]}, [ip], r1
++        vst1.16  {d19[1]}, [r3], r1
++        vst1.16  {d19[0]}, [ip], r1
++1:
++        lsls     lr, #2                @ b30 (Q0a) -> C, b29 (P0a) -> N & b31
++        bcs      1f
++        @ Q0a
++        vst1.16  {d20[3]}, [r0], r1
++        vst1.16  {d20[2]}, [r2], r1
++        vst1.16  {d20[1]}, [r0]
++        vst1.16  {d20[0]}, [r2]
++1:
++        it       mi
++        popmi    {pc}
++        @ P0a
++        vst1.16  {d18[3]}, [r3], r1
++        vst1.16  {d18[2]}, [ip], r1
++        vst1.16  {d18[1]}, [r3]
++        vst1.16  {d18[0]}, [ip]
++        pop      {pc}
++
++@ Single lump (rather than double)
++10:
++        @ As we have post inced r0/r3 in the load the easiest thing to do is
++        @ to subtract and write forwards, rather than backwards (as above)
++        @ b0 (P0a) -> N, b1 (Q0a) -> C
++
++        hevc_loop_filter_uv_body1 d16, d18, d20, d22 \
++        "ldr      lr, [sp, #4]",       \
++        "add      r3, #2",             \
++        "sub      r0, r0, r1, lsl #2", \
++        "sub      r3, r3, r1, lsl #2", \
++        "lsls     lr, #31",            \
++        "add      r2, r0, r1",         \
++        "add      ip, r3, r1",         \
++        "lsl      r1, #1"
++
++        bcs      3f
++        @ Q0a
++        vst1.16  {d20[0]}, [r0], r1
++        vst1.16  {d20[1]}, [r2], r1
++        vst1.16  {d20[2]}, [r0]
++        vst1.16  {d20[3]}, [r2]
++3:
++        it       mi
++        popmi    {pc}
++        @ P0a
++        vst1.16  {d18[0]}, [r3], r1
++        vst1.16  {d18[1]}, [ip], r1
++        vst1.16  {d18[2]}, [r3]
++        vst1.16  {d18[3]}, [ip]
++        pop      {pc}
++
++endfunc
++
++
++@ void ff_hevc_rpi_v_loop_filter_uv2_neon(uint8_t * src_r,       // r0
++@                                     unsigned int stride,   // r1
++@                                     uint32_t tc4,          // r2
++@                                     uint8_t * src_l,       // r3
++@                                     unsigned int no_f);   // sp[0]
++@
++
++@ no_f
++@ 0  tl P0a
++@ 1  tr Q0a
++@ 2  bl P0b
++@ 3  br Q0b
++
++@ P1: q8,  q12
++@ P0: q9,  q13
++@ Q0: q10, q14
++@ Q1: q11, q15
++
++.macro m_filter_v_uv2_16 bit_depth
++        cmp      r2, #0
++        it eq
++        bxeq     lr
++        push     {lr}
++        vld2.32  {d16[0], d18[0]}, [r3], r1
++        vld2.32  {d20[0], d22[0]}, [r0], r1
++
++        cmp      r2, #0x10000
++        vld2.32  {d16[1], d18[1]}, [r3], r1
++        vld2.32  {d20[1], d22[1]}, [r0], r1
++
++        vld2.32  {d17[0], d19[0]}, [r3], r1
++        vld2.32  {d21[0], d23[0]}, [r0], r1
++
++        vld2.32  {d17[1], d19[1]}, [r3], r1
++        vld2.32  {d21[1], d23[1]}, [r0], r1
++        blo      10f
++
++        vld2.32  {d24[0], d26[0]}, [r3], r1
++        vld2.32  {d28[0], d30[0]}, [r0], r1
++
++        sub      ip, r0, r3
++        vld2.32  {d24[1], d26[1]}, [r3], r1
++        vld2.32  {d28[1], d30[1]}, [r0], r1
++
++        cmp      ip, #8
++        vld2.32  {d25[0], d27[0]}, [r3], r1
++        vld2.32  {d29[0], d31[0]}, [r0], r1
++
++        vld2.32  {d25[1], d27[1]}, [r3]
++        vld2.32  {d29[1], d31[1]}, [r0]
++
++        hevc_loop_filter_uv_body2_16  q8, q12, q9, q13, q10, q14, q11, q15, \bit_depth, \
++        "ldr      lr, [sp, #4]", \
++        "neg      r1, r1",       \
++        "it eq; cmpeq lr, #0",   \
++        "add      r3, #4",       \
++        "add      ip, r3, r1",   \
++        "add      r2, r0, r1",   \
++        "lsl      r1, #1"
++
++        bne      1f
++
++@ Much/most of the time r0 == r3 + 8 and no_f == 0
++@ so it is worth having this special case
++        vst2.32   {d27[1], d29[1]}, [r3], r1    @ P0b, Q0b
++        vst2.32   {d27[0], d29[0]}, [ip], r1
++        vst2.32   {d26[1], d28[1]}, [r3], r1
++        vst2.32   {d26[0], d28[0]}, [ip], r1
++        vst2.32   {d19[1], d21[1]}, [r3], r1    @ P0a, Q0a
++        vst2.32   {d19[0], d21[0]}, [ip], r1
++        vst2.32   {d18[1], d20[1]}, [r3]
++        vst2.32   {d18[0], d20[0]}, [ip]
++        pop       {pc}
++
++@ Either split or partial
++1:
++        lsls     lr, #29               @ b3 (Q0b) -> C, b2 (P0b) -> N & b31, b1 (Q0a) -> b30, b0 (P0a) -> b29
++        ittt cs
++        addcs    r0, r0, r1, lsl #1
++        addcs    r2, r2, r1, lsl #1
++        bcs      1f
++        @ Q0b
++        vst1.32  {d29[1]}, [r0], r1
++        vst1.32  {d29[0]}, [r2], r1
++        vst1.32  {d28[1]}, [r0], r1
++        vst1.32  {d28[0]}, [r2], r1
++1:
++        ittt mi
++        addmi    r3, r3, r1, lsl #1
++        addmi    ip, ip, r1, lsl #1
++        bmi      1f
++        @ P0b
++        vst1.32  {d27[1]}, [r3], r1
++        vst1.32  {d27[0]}, [ip], r1
++        vst1.32  {d26[1]}, [r3], r1
++        vst1.32  {d26[0]}, [ip], r1
++1:
++        lsls     lr, #2                @ b30 (Q0a) -> C, b29 (P0a) -> N & b31
++        bcs      1f
++        @ Q0a
++        vst1.32  {d21[1]}, [r0], r1
++        vst1.32  {d21[0]}, [r2], r1
++        vst1.32  {d20[1]}, [r0]
++        vst1.32  {d20[0]}, [r2]
++1:
++        it       mi
++        popmi    {pc}
++        @ P0a
++        vst1.32  {d19[1]}, [r3], r1
++        vst1.32  {d19[0]}, [ip], r1
++        vst1.32  {d18[1]}, [r3]
++        vst1.32  {d18[0]}, [ip]
++        pop      {pc}
++
++@ Single lump (rather than double)
++10:
++        @ As we have post inced r0/r3 in the load the easiest thing to do is
++        @ to subtract and write forwards, rather than backwards (as above)
++        @ b0 (P0a) -> N, b1 (Q0a) -> C
++
++        hevc_loop_filter_uv_body1_16  q8, q9, q10, q11, \bit_depth, \
++        "ldr      lr, [sp, #4]",       \
++        "add      r3, #4",             \
++        "sub      r0, r0, r1, lsl #2", \
++        "sub      r3, r3, r1, lsl #2", \
++        "lsls     lr, #31",            \
++        "add      r2, r0, r1",         \
++        "add      ip, r3, r1",         \
++        "lsl      r1, #1"
++
++        bcs      3f
++        @ Q0a
++        vst1.32  {d20[0]}, [r0], r1
++        vst1.32  {d20[1]}, [r2], r1
++        vst1.32  {d21[0]}, [r0]
++        vst1.32  {d21[1]}, [r2]
++3:
++        it       mi
++        popmi    {pc}
++        @ P0a
++        vst1.32  {d18[0]}, [r3], r1
++        vst1.32  {d18[1]}, [ip], r1
++        vst1.32  {d19[0]}, [r3]
++        vst1.32  {d19[1]}, [ip]
++        pop      {pc}
++.endm
++
++
++@ The NEON version is faster under ideal circumstances (i.e. everything in L1)
++@ But in real world testing it is ~20% slower, presumably due to code size
++
++#if 0 // NEON version
++
++/* uint32_t ff_hevc_rpi_deblocking_boundary_strengths_neon(int pus, int dup, const HEVCRpiMvField *curr, const HEVCRpiMvField *neigh,
++ *                                            const int *curr_rpl0, const int *curr_rpl1, const int *neigh_rpl0, const int *neigh_rpl1,
++ *                                            int in_inc0, int in_inc1)
++ */
++function ff_hevc_rpi_deblocking_boundary_strengths_neon, export=1
++        mov         ip, sp
++        push        {a1-a3,v1-v8,lr}
++        ldm         ip, {v1-v6}
++        cmp         a1, #2
++        bls         2f
++        vpush       {d8-d13}
++        sub         v5, v5, #10
++        sub         v6, v6, #10
++1:
++        vld2.32     {d0[0], d2[0]}, [a3]!
++        vld2.32     {d4[0], d6[0]}, [a4]!
++          vmov.u8     q12, #0
++        ldrb        a2, [a3], #1
++        ldrb        ip, [a4], #1
++        ldrb        v8, [a3], #1
++        ldrb        lr, [a4], #1
++        add         a2, v1, a2, lsl #2
++        vld1.8      {d24[0]}, [a3], v5
++        add         ip, v3, ip, lsl #2
++        vld1.8      {d25[0]}, [a4], v6
++        add         v8, v2, v8, lsl #2
++        vld1.32     {d16[0]}, [a2]
++        add         lr, v4, lr, lsl #2
++        vld1.32     {d20[0]}, [ip]
++        vld1.32     {d18[0]}, [v8]
++        vld1.32     {d22[0]}, [lr]
++
++        vld2.32     {d0[1], d2[1]}, [a3]!
++        vld2.32     {d4[1], d6[1]}, [a4]!
++        ldrb        a2, [a3], #1
++          vmov.u16    d12, #1
++        ldrb        ip, [a4], #1
++          vmov.u16    d13, #2
++        ldrb        v8, [a3], #1
++          vmov.u16    d27, #4
++        ldrb        lr, [a4], #1
++        add         a2, v1, a2, lsl #2
++        vld1.8      {d24[2]}, [a3], v5
++        add         ip, v3, ip, lsl #2
++        vld1.8      {d25[2]}, [a4], v6
++        add         v8, v2, v8, lsl #2
++        vld1.32     {d16[1]}, [a2]
++        add         lr, v4, lr, lsl #2
++        vld1.32     {d20[1]}, [ip]
++        vld1.32     {d18[1]}, [v8]
++        vld1.32     {d22[1]}, [lr]
++
++        vld2.32     {d1[0], d3[0]}, [a3]!
++        vld2.32     {d5[0], d7[0]}, [a4]!
++        ldrb        a2, [a3], #1
++        ldrb        ip, [a4], #1
++        ldrb        lr, [a4], #1
++        ldrb        v8, [a3], #1
++        add         a2, v1, a2, lsl #2
++        vld1.8      {d24[4]}, [a3], v5
++        add         ip, v3, ip, lsl #2
++        vld1.8      {d25[4]}, [a4], v6
++        add         v8, v2, v8, lsl #2
++        vld1.32     {d17[0]}, [a2]
++        add         lr, v4, lr, lsl #2
++        vld1.32     {d21[0]}, [ip]
++        vld1.32     {d19[0]}, [v8]
++        vld1.32     {d23[0]}, [lr]
++
++        vld2.32     {d1[1], d3[1]}, [a3]!
++        vld2.32     {d5[1], d7[1]}, [a4]!
++        ldrb        a2, [a3], #1
++        ldrb        ip, [a4], #1
++        ldrb        v8, [a3], #1
++        ldrb        lr, [a4], #1
++        add         a2, v1, a2, lsl #2
++        vld1.8      {d24[6]}, [a3], v5
++        add         ip, v3, ip, lsl #2
++        vld1.8      {d25[6]}, [a4], v6
++        add         v8, v2, v8, lsl #2
++        vld1.32     {d17[1]}, [a2]
++        add         lr, v4, lr, lsl #2
++        vld1.32     {d21[1]}, [ip]
++        vld1.32     {d19[1]}, [v8]
++        vld1.32     {d23[1]}, [lr]
++
++        @ So now we have:
++        @ q0.32[i]  = curr[i].mv[0]
++        @ q1.32[i]  = curr[i].mv[1]
++        @ q2.32[i]  = neigh[i].mv[0]
++        @ q3.32[i]  = neigh[i].mv[1]
++        @ q8.32[i]  = curr_rpl0[curr[i].ref_idx[0]]
++        @ q9.32[i]  = curr_rpl1[curr[i].ref_idx[1]]
++        @ q10.32[i] = neigh_rpl0[neigh[i].ref_idx[0]]
++        @ q11.32[i] = neigh_rpl1[neigh[i].ref_idx[1]]
++        @ d24.16[i] = curr[i].pred_flag
++        @ d25.16[i] = neigh[i].pred_flag
++
++        vtst.16     d28, d24, d12
++        vtst.16     d29, d24, d13
++        vadd.i16    d8, d24, d12
++        vadd.i16    d9, d25, d12
++        vtst.16     d30, d25, d12
++        vtst.16     d31, d25, d13
++        veor        d26, d8, d9
++          ldr         lr, [sp, 6*8 + 1*4]
++        vmovl.s16   q4, d28
++        vmovl.s16   q5, d29
++          teq         lr, #1
++        vmovl.s16   q14, d30
++          it ne
++          lslne       v1, lr, #1
++        vmovl.s16   q15, d31
++          it ne
++          rsbne       v2, v1, #32
++        vbif        q0, q1, q4
++        vbif        q2, q3, q14
++        vbif        q1, q0, q5
++        vbif        q3, q2, q15
++        vabd.s16    q12, q0, q2
++        vabd.s16    q2, q1
++        vabd.s16    q0, q3
++        vabd.s16    q1, q3
++        vbif        q8, q9, q4
++        vbif        q10, q11, q14
++        vbif        q9, q8, q5
++        vbif        q11, q10, q15
++        vclt.u16    d6, d24, d27
++        vclt.u16    d8, d2, d27
++        vclt.u16    d7, d25, d27
++        vclt.u16    d9, d3, d27
++        vclt.u16    d2, d0, d27
++        vclt.u16    d0, d4, d27
++        vclt.u16    d3, d1, d27
++        vclt.u16    d1, d5, d27
++        vceq.i32    q12, q10, q8
++        vceq.i32    q10, q9
++        vceq.i32    q8, q11
++        vceq.i32    q9, q11
++        vshrn.i32   d6, q3, #8
++        vshrn.i32   d7, q4, #8
++        vshrn.i32   d8, q1, #8
++        vshrn.i32   d9, q0, #8
++        vmovn.i32   d4, q12
++        vmovn.i32   d2, q10
++        vmovn.i32   d3, q8
++        vmovn.i32   d5, q9
++        vand        q2, q3
++        vrev16.8    q3, q3
++        vand        q2, q3
++        vand        q1, q4
++        vrev16.8    q4, q4
++        vand        q1, q4
++        vand        d4, d5
++        vand        d2, d3
++        vbic        d0, d12, d4
++        vshr.u16    d26, #2
++        vbic        d0, d2
++        vmov.i16    d1, #0x5555
++        vorr        d0, d26
++          bne         10f
++
++        @ Merge results into result word, no duplicates
++        vmov        a2, s0
++        vmov        v8, s1
++        vmov.u16    ip, d0[1]
++        vmov.u16    lr, d0[3]
++        lsl         a2, #30
++        lsl         v8, #30
++        lsl         ip, #30
++        lsl         lr, #30
++        orr         a2, ip, a2, lsr #2
++        orr         v8, lr, v8, lsr #2
++        orr         a2, v8, a2, lsr #4
++        subs        a1, #4
++        orr         v7, a2, v7, lsr #8
++        bhi         1b
++
++        mov         a1, #32
++        ldr         a3, [sp, #6*8]
++        vpop        {d8-d13}
++        sub         a1, a1, a3, lsl #1
++        mov         a1, v7, lsr a1
++        pop         {a2-a4,v1-v8,pc}
++10:
++        @ Merge results into result word, with duplicates
++        vmul.i16    d0, d1
++        vmov        a2, s0
++        vmov        v8, s1
++        vmov.u16    ip, d0[1]
++        vmov.u16    lr, d0[3]
++        lsl         a2, v2
++        subs        a1, #4
++        lsl         v8, v2
++        lsl         ip, v2
++        lsl         lr, v2
++        ldr         v2, [sp, #6*8 + 12*4 + 1*4]
++T       lsr         a2, v1
++T       orr         a2, ip, a2
++A       orr         a2, ip, a2, lsr v1
++        lsl         ip, v1, #1
++T       lsr         v8, v1
++T       orr         v8, lr, v8
++A       orr         v8, lr, v8, lsr v1
++        lsl         lr, v1, #2
++T       lsr         a2, ip
++T       orr         a2, v8, a2
++A       orr         a2, v8, a2, lsr ip
++        ldr         v1, [sp, #6*8 + 12*4]
++T       lsr         v7, lr
++T       orr         v7, a2, v7
++A       orr         v7, a2, v7, lsr lr
++        bhi         1b
++
++        mov         a1, #32
++        ldrd        a3, a4, [sp, #6*8]
++        vpop        {d8-d13}
++        mls         a1, a3, a4, a1
++        mls         a1, a3, a4, a1
++        mov         a1, v7, lsr a1
++        pop         {a2-a4,v1-v8,pc}
++
++
++2:
++        sub         v5, v5, #10
++        sub         v6, v6, #10
++        vmov.u8     d16, #0
++        blo         3f
++        vld2.32     {d0[0], d1[0]}, [a3]!
++        vld2.32     {d2[0], d3[0]}, [a4]!
++        ldrb        a2, [a3], #1
++        ldrb        ip, [a4], #1
++        ldrb        lr, [a4], #1
++        ldrb        v8, [a3], #1
++        add         a2, v1, a2, lsl #2
++        vld1.8      {d16[0]}, [a3], v5
++        add         ip, v3, ip, lsl #2
++        vld1.8      {d16[4]}, [a4], v6
++        add         v8, v2, v8, lsl #2
++        vld1.32     {d4[0]}, [a2]
++        add         lr, v4, lr, lsl #2
++        vld1.32     {d5[0]}, [ip]
++        vld1.32     {d6[0]}, [v8]
++        vld1.32     {d7[0]}, [lr]
++
++3:
++        vld2.32     {d0[1], d1[1]}, [a3]!
++        vld2.32     {d2[1], d3[1]}, [a4]!
++        ldrb        a2, [a3], #1
++          vmov.u16    d17, #1
++        ldrb        ip, [a4], #1
++          vmov.u16    d18, #2
++        ldrb        v8, [a3], #1
++          vmov.u16    d19, #4
++        ldrb        lr, [a4], #1
++        add         a2, v1, a2, lsl #2
++        vld1.8      {d16[2]}, [a3], v5
++        add         ip, v3, ip, lsl #2
++        vld1.8      {d16[6]}, [a4], v6
++        add         v8, v2, v8, lsl #2
++        vld1.32     {d4[1]}, [a2]
++        add         lr, v4, lr, lsl #2
++        vld1.32     {d5[1]}, [ip]
++        vld1.32     {d6[1]}, [v8]
++        vld1.32     {d7[1]}, [lr]
++
++        @ So now we have:
++        @ d0.32[i]  = curr[i].mv[0]
++        @ d1.32[i]  = curr[i].mv[1]
++        @ d2.32[i]  = neigh[i].mv[0]
++        @ d3.32[i]  = neigh[i].mv[1]
++        @ d4.32[i] = curr_rpl0[curr[i].ref_idx[0]]
++        @ d5.32[i] = neigh_rpl0[neigh[i].ref_idx[0]]
++        @ d6.32[i] = curr_rpl1[curr[i].ref_idx[1]]
++        @ d7.32[i] = neigh_rpl1[neigh[i].ref_idx[1]]
++        @ d16.16[i] = curr[i].pred_flag
++        @ d16.16[2+i] = neigh[i].pred_flag
++
++        vtst.16     d20, d16, d17
++        vtst.16     d22, d16, d18
++        vadd.i16    d30, d16, d17
++        vswp        d2, d3
++        ldr         lr, [sp, #1*4]
++        vmovl.s16   q10, d20
++          teq         lr, #1
++        vmovl.s16   q11, d22
++          it ne
++          lslne       v1, lr, #1
++        vbif        d0, d1, d20
++        vbif        d4, d6, d20
++        vbif        d3, d2, d21
++        vbif        d5, d7, d21
++        vbif        d1, d0, d22
++        vbif        d6, d4, d22
++        vbif        d2, d3, d23
++        vbif        d7, d5, d23
++        vshr.u16    d30, #2
++        vabd.s16    d24, d0, d3
++        vabd.s16    d25, d1, d2
++        vabd.s16    q0, q0, q1
++        vceq.i32    d2, d4, d5
++        vceq.i32    d20, d5, d6
++        vceq.i32    d21, d4, d7
++        vceq.i32    d3, d6, d7
++        vclt.u16    d6, d24, d19
++        vclt.u16    d7, d25, d19
++        vclt.u16    d22, d1, d19
++        vclt.u16    d23, d0, d19
++        vshrn.i32   d6, q3, #8
++        vmovn.i32   d2, q1
++        vshrn.i32   d7, q11, #8
++        vmovn.i32   d3, q10
++        vand        q0, q3, q1
++          it ne
++          rsbne       v2, v1, #32
++        vrev16.8    q3, q3
++        vand        q0, q3
++        vsra.u64    d30, #32
++        vshr.u64    q1, q0, #32
++        vand        q0, q1
++        vbic        d0, d17, d0
++        vand        d30, d30, d17
++        vbic        d0, d1
++        vmov.i16    d1, #0x5555
++        vorr        d0, d30
++          bne         10f
++
++        @ Construct result word, no duplicates
++        cmp         a1, #2
++        vmov.u16    a1, d0[1]
++        vmov.u16    a2, d0[0]
++        it eq
++        orreq       a1, a2, a1, lsl #2
++        pop         {a2-a4,v1-v8,pc}
++10:
++        @ Construct result word, with duplicates
++        cmp         a1, #2
++        vmul.i16    d0, d1
++        vmov        a2, s0
++        vmov.u16    a1, d0[1]
++        lsl         a2, #16
++        pkhbt       a1, a1, a1, lsl #16
++        lsr         a2, v2
++        lsr         a1, v2
++T       itt eq
++T       lsleq       a1, v1
++T       orreq       a1, a2, a1
++A       orreq       a1, a2, a1, lsl v1
++        pop         {a2-a4,v1-v8,pc}
++endfunc
++
++
++
++#else // non-NEON version
++
++
++/* uint32_t ff_hevc_rpi_deblocking_boundary_strengths_neon(int pus, int dup, const HEVCRpiMvField *curr, const HEVCRpiMvField *neigh,
++ *                                            const int *curr_rpl0, const int *curr_rpl1, const int *neigh_rpl0, const int *neigh_rpl1,
++ *                                            int in_inc0, in_inc1)
++ */
++function ff_hevc_rpi_deblocking_boundary_strengths_neon, export=1
++        add         ip, sp, #4*4
++        push        {a2-a4,v1-v8,lr}
++        mov         v6, #32
++1:      ldmdb       ip, {v1-v4}
++        ldrsb       v5, [a3, #8]    @ curr->ref_idx
++        ldrsb       v8, [a3, #9]
++        ldrsb       ip, [a4, #8]    @ neigh->ref_idx
++        ldrsb       lr, [a4, #9]
++        ldr         v1, [v1, v5, lsl #2]
++        ldrb        v5, [a3, #10]   @ curr->pred_flag
++        ldr         v2, [v2, v8, lsl #2]
++        ldrb        v8, [a4, #10]   @ neigh->pred_flag
++        ldr         v3, [v3, ip, lsl #2]
++        ldr         v4, [v4, lr, lsl #2]
++        teq         v5, #3
++        beq         20f
++        teq         v8, #3
++        beq         90f
++
++        tst         v5, #1
++        itee        ne
++        ldrne       v5, [a3, #0]    @ curr->mv[0]
++        moveq       v1, v2
++        ldreq       v5, [a3, #4]    @ curr->mv[1]
++        tst         v8, #1
++        itee        ne
++        ldrne       v8, [a4, #0]    @ neigh->mv[0]
++        moveq       v3, v4
++        ldreq       v8, [a4, #4]    @ neigh->mv[1]
++        teq         v1, v3
++        bne         10f
++        ldr         lr, =0xFFFCFFFC
++        ssub16      ip, v8, v5
++        ssub16      v5, v5, v8
++        sel         v5, v5, ip
++        ands        v5, v5, lr
++        @ drop through
++10:     it          ne
++        movne       v5, #1<<30
++11:
++        sub         v6, v6, #2
++T       mov         v7, v7, lsr #2
++        subs        a2, a2, #1
++A       orr         v7, v5, v7, lsr #2
++T       orr         v7, v5, v7
++        bhi         11b
++
++        ldrd        v3, v4, [sp, #16*4]
++        ldr         a2, [sp]
++        add         ip, sp, #16*4
++        subs        a1, a1, #1
++        add         a3, a3, v3
++        add         a4, a4, v4
++        bhi         1b
++        mov         a1, v7, lsr v6
++        pop         {a2-a4,v1-v8,pc}
++
++20:     teq         v8, #3
++        bne         10b
++
++        teq         v1, v3
++        it          eq
++        teqeq       v2, v4
++        bne         40f
++        teq         v1, v2
++        bne         30f
++
++        ldrd        v1, v2, [a3]    @ curr->mv
++        ldrd        v3, v4, [a4]    @ neigh->mv
++        ldr         lr, =0xFFFCFFFC
++        ssub16      ip, v3, v1
++        ssub16      v5, v1, v3
++        sel         v5, v5, ip
++        ands        v5, v5, lr
++        bne         25f
++        ssub16      ip, v4, v2
++        ssub16      v5, v2, v4
++        sel         v5, v5, ip
++        ands        v5, v5, lr
++        beq         11b
++        @ drop through
++25:     ssub16      ip, v4, v1
++        ssub16      v5, v1, v4
++        sel         v5, v5, ip
++        ands        v5, v5, lr
++        bne         10b
++        ssub16      ip, v3, v2
++        ssub16      v5, v2, v3
++        sel         v5, v5, ip
++        ands        v5, v5, lr
++        b           10b
++
++30:     ldrd        v1, v2, [a3]    @ curr->mv
++        ldrd        v3, v4, [a4]    @ neigh->mv
++        ldr         lr, =0xFFFCFFFC
++        ssub16      ip, v3, v1
++        ssub16      v5, v1, v3
++        sel         v5, v5, ip
++        ands        v5, v5, lr
++        bne         10b
++        ssub16      ip, v4, v2
++        ssub16      v5, v2, v4
++        sel         v5, v5, ip
++        ands        v5, v5, lr
++        b           10b
++
++40:     teq         v1, v4
++        ite         eq
++        teqeq       v2, v3
++        bne         10b
++
++        ldrd        v1, v2, [a3]    @ curr->mv
++        ldrd        v3, v4, [a4]    @ neigh->mv
++        ldr         lr, =0xFFFCFFFC
++        b           25b
++
++90:
++        mov         v5, #1<<30
++        b           11b
++endfunc
++
++
++#endif
++
++
++@ =============================================================================
++@
++@ 10 bit
++
++function hevc_loop_filter_luma_body_10
++        m_filter_luma 10, q11, q15
++endfunc
++
++function ff_hevc_rpi_h_loop_filter_luma_neon_10, export=1
++        hevc_loop_filter_luma_start
++        b        .Lh_loop_luma_common_10
++endfunc
++
++function ff_hevc_rpi_h_loop_filter_luma2_neon_10, export=1
++        cmp      r3, #0
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r10, [sp, #32]
++.Lh_loop_luma_common_10:
++        m_filter_h_luma_16 10
++endfunc
++
++function ff_hevc_rpi_v_loop_filter_luma_neon_10, export=1
++        hevc_loop_filter_luma_start
++        sub      r4, r0, #8
++        b        .Lv_loop_luma_common_10
++endfunc
++
++function ff_hevc_rpi_v_loop_filter_luma2_neon_10, export=1
++        cmp      r3, #0
++        it       eq
++        bxeq     lr
++        push     {r4-r10,lr}            @ 32 bytes
++        ldr      r4, [sp, #36]
++        ldr      r10, [sp, #32]
++
++.Lv_loop_luma_common_10:
++        m_filter_v_luma_16 10
++endfunc
++
++function ff_hevc_rpi_h_loop_filter_uv_neon_10, export=1
++        m_filter_h_uv_16 10
++endfunc
++
++function ff_hevc_rpi_v_loop_filter_uv2_neon_10, export=1
++        m_filter_v_uv2_16 10
++endfunc
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_idct_neon.S
+@@ -0,0 +1,184 @@
++/*
++ * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
++ * Copyright (C) 2018 John Cox, Ben Avison for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++/* uses registers q8 - q13 for temp values */
++.macro tr4_luma_shift shift
++        vaddl.s16   q8, d28, d30    // c0 = src0 + src2
++        vaddl.s16   q9, d30, d31    // c1 = src2 + src3
++        vsubl.s16   q10, d28, d31   // c2 = src0 - src3
++        vaddl.s16   q11, d28, d31   // src0 + src3
++
++        vmul.i32    q12, q8, d1[0]  // 29 * c0
++        vmul.i32    q13, q10, d2[0] // 55 * c2
++        vmul.i32    q8, q8, d2[0]   // 55 * c0
++        vmull.s16   q14, d29, d0[0] // c3 = 74 * src1
++
++        vsubw.s16   q11, q11, d30   // src0 - src2 + src3
++        vmla.i32    q12, q9, d2[0]  // 29 * c0 + 55 * c1
++        vmls.i32    q13, q9, d1[0]  // 55 * c2 - 29 * c1
++        vmla.i32    q8, q10, d1[0]  // 55 * c0 + 29 * c2
++
++        vmul.i32    q11, q11, d0[0] // dst2 = 74 * (src0 - src2 + src3)
++        vadd.i32    q12, q12, q14   // dst0 = 29 * c0 + 55 * c1 + c3
++        vadd.i32    q13, q13, q14   // dst1 = 55 * c2 - 29 * c1 + c3
++        vsub.i32    q8, q8, q14     // dst3 = 55 * c0 + 29 * c2 - c3
++
++        vqrshrn.s32 d28, q12, \shift
++        vqrshrn.s32 d29, q13, \shift
++        vqrshrn.s32 d30, q11, \shift
++        vqrshrn.s32 d31, q8, \shift
++.endm
++
++/* uses registers q8 - q11 for temp values */
++.macro tr4_shift shift
++        vmull.s16   q9, d29, d0[0]   // 83 * src1
++        vmull.s16   q8, d29, d0[1]   // 36 * src1
++        vshll.s16   q14, d28, #6     // 64 * src0
++        vshll.s16   q10, d30, #6     // 64 * src2
++        vmlal.s16   q9, d31, d0[1]   // 83 * src1 + 36 * src3  o0
++        vmlsl.s16   q8, d31, d0[0]   // 36 * src1 - 83 * src3  o1
++        vadd.s32    q11, q14, q10    // 64 * (src0 + src2)     e0
++        vsub.s32    q10, q14, q10    // 64 * (src0 - src2)     e1
++        vadd.s32    q14, q11, q9     // e0 + o0
++        vadd.s32    q15, q10, q8     // e1 + o1
++        vsub.s32    q8, q10, q8      // e1 - o1
++        vsub.s32    q9, q11, q9      // e0 - o0
++
++        vqrshrn.s32 d28, q14, \shift
++        vqrshrn.s32 d29, q15, \shift
++        vqrshrn.s32 d30, q8, \shift
++        vqrshrn.s32 d31, q9, \shift
++.endm
++
++.macro tr8_process d0, d1, d2, d3, d4, d5, d6, d7,                         \
++                   tmp0, /* Q reg which doesn't alias with d4, d6 or d7 */ \
++                   tmp1, /* Q reg which doesn't alias with d7 or d0     */ \
++                   shift, I1, I2, I3
++
++        vmull.s16  q4, \d1, d1[1]        // 89 * src1
++        \I1
++        vmull.s16  q5, \d1, d1[0]        // 75 * src1
++        \I2
++        vmull.s16  q6, \d1, d1[3]        // 50 * src1
++        \I3
++        vmull.s16  q7, \d1, d1[2]        // 18 * src1
++        vmlal.s16  q4, \d3, d1[0]        // 75 * src3
++        vmlsl.s16  q5, \d3, d1[2]        //-18 * src3
++        vmlsl.s16  q6, \d3, d1[1]        //-89 * src3
++        vmlsl.s16  q7, \d3, d1[3]        //-50 * src3
++
++          // tr4
++          vmull.s16  q1, \d2, d0[0]      // 83 * src(1*2)
++          vmull.s16  q2, \d2, d0[1]      // 36 * src(1*2)
++
++        vmlal.s16  q4, \d5, d1[3]        // 50 * src5
++        vmlsl.s16  q5, \d5, d1[1]        //-89 * src5
++        vmlal.s16  q6, \d5, d1[2]        // 18 * src5
++        vmlal.s16  q7, \d5, d1[0]        // 75 * src5
++
++          vshll.s16  q3, \d0, #6         // 64 * src(0*2)
++          vshll.s16  \tmp0, \d4, #6      // 64 * src(2*2)
++          vmlal.s16  q1, \d6, d0[1]      // 83 * src(1*2) + 36 * src(3*2)  o0
++          vmlsl.s16  q2, \d6, d0[0]      // 36 * src(1*2) - 83 * src(3*2)  o1
++          vadd.i32   \tmp1, q3, \tmp0    // 64 * (src(0*2) + src(2*2))     e0
++          vsub.i32   \tmp0, q3, \tmp0    // 64 * (src(0*2) - src(2*2))     e1
++
++        vmlal.s16  q4, \d7, d1[2]        // 18 * src7
++        vmlsl.s16  q5, \d7, d1[3]        //-50 * src7
++        vmlal.s16  q6, \d7, d1[0]        // 75 * src7
++        vmlsl.s16  q7, \d7, d1[1]        //-89 * src7
++
++          vsub.i32   q3, \tmp1, q1       // e0 - o0
++          vadd.i32   \tmp1, \tmp1, q1    // e0 + o0
++          vadd.i32   q1, \tmp0, q2       // e1 + o1
++          vsub.i32   q2, \tmp0, q2       // e1 - o1
++
++        vadd.i32   \tmp0, \tmp1, q4      // e_8[0] + o_8[0], dst[0]
++        vsub.i32   q4, \tmp1, q4         // e_8[0] - o_8[0], dst[7]
++        vsub.i32   \tmp1, q3, q7         // e_8[3] - o_8[3], dst[4]
++        vadd.i32   q7, q3, q7            // e_8[3] + o_8[3], dst[3]
++        vadd.i32   q3, q1, q5            // e_8[1] + o_8[1], dst[1]
++        vsub.i32   q5, q1, q5            // e_8[1] - o_8[1], dst[6]
++        vsub.i32   q1, q2, q6            // e_8[2] - o_8[2], dst[5]
++        vadd.i32   q6, q2, q6            // e_8[2] + o_8[2], dst[2]
++        vqrshrn.s32   \d0, \tmp0, #\shift
++        vqrshrn.s32   \d4, \tmp1, #\shift
++        vqrshrn.s32   \d1, q3, #\shift
++        vqrshrn.s32   \d5, q1, #\shift
++        vqrshrn.s32   \d2, q6, #\shift
++        vqrshrn.s32   \d6, q5, #\shift
++        vqrshrn.s32   \d3, q7, #\shift
++        vqrshrn.s32   \d7, q4, #\shift
++.endm
++
++.macro tr8_vert d0, d1, d2, d3, d4, d5, d6, d7, q01, q23, I1, I2, I3
++        vld1.16     {\d0}, [r0 :64], r3
++        vld1.16     {\d1}, [r2 :64], r3
++        vld1.16     {\d2}, [r0 :64], r3
++        vld1.16     {\d3}, [r2 :64], r3
++        vld1.16     {\d4}, [r0 :64], r3
++        vld1.16     {\d5}, [r2 :64], r3
++        vld1.16     {\d6}, [r0 :64], r3
++        vld1.16     {\d7}, [r2 :64], r3
++
++        tr8_process \
++            \d0, \d1, \d2, \d3, \d4, \d5, \d6, \d7, \
++            \q01, \q23, 7, "\I1", "\I2", "\I3"
++.endm
++
++.macro tr8_horiz d0, d1, d2, d3, d4, d5, d6, d7, q01, q23, shift
++        tr8_process \
++            \d0, \d1, \d2, \d3, \d4, \d5, \d6, \d7, \
++            \q01, \q23, \shift
++
++        vzip.16    \d0, \d4
++        vzip.16    \d1, \d5
++        vzip.16    \d2, \d6
++        vzip.16    \d3, \d7
++        vst4.16    {\d0-\d3}, [r0 :128], r3
++        vst4.16    {\d4-\d7}, [r2 :128], r3
++.endm
++
++#define BIT_DEPTH 8
++#include "rpi_hevc_idct_fn_neon.S"
++
++.text
++
++.align 4
++tr4f:
++.word 0x00240053  // 36 and d1[0] = 83
++.word 0x00000000
++tr8f:
++.word 0x0059004b  // 89, d0[0] = 75
++.word 0x00320012  // 50, d0[2] = 18
++tr16:
++.word 0x005a0057  // 90, d2[0] = 87
++.word 0x00500046  // 80, d2[2] = 70
++.word 0x0039002b  // 57, d2[0] = 43
++.word 0x00190009  // 25, d2[2] = 9
++
++#undef BIT_DEPTH
++#define BIT_DEPTH 10
++#include "rpi_hevc_idct_fn_neon.S"
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_init_arm.c
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/attributes.h"
++#include "libavutil/arm/cpu.h"
++#include "libavcodec/rpi_hevcdsp.h"
++#include "rpi_hevcdsp_arm.h"
++
++av_cold void ff_hevcdsp_rpi_init_arm(HEVCDSPContext *c, const int bit_depth)
++{
++    int cpu_flags = av_get_cpu_flags();
++
++    if (have_neon(cpu_flags))
++        ff_hevcdsp_rpi_init_neon(c, bit_depth);
++}
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_init_neon.c
+@@ -0,0 +1,467 @@
++/*
++ * Copyright (c) 2014 Seppo Tomperi <seppo.tomperi@vtt.fi>
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "config.h"
++#include "libavutil/attributes.h"
++#include "libavutil/arm/cpu.h"
++#include "libavcodec/rpi_hevcdsp.h"
++#include "rpi_hevcdsp_arm.h"
++#include "libavcodec/avcodec.h"
++#include "libavcodec/bit_depth_template.c"
++
++// NEON inter pred fns for qpel & epel (non-sand) exist in the git repo but
++// have been removed from head as we never use them.
++
++void ff_hevc_rpi_v_loop_filter_luma_neon_8(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
++void ff_hevc_rpi_h_loop_filter_luma_neon_8(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
++
++void ff_hevc_rpi_v_loop_filter_luma_neon_10(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
++void ff_hevc_rpi_h_loop_filter_luma_neon_10(uint8_t *_pix, ptrdiff_t _stride, int _beta, int *_tc, uint8_t *_no_p, uint8_t *_no_q);
++
++void ff_hevc_rpi_h_loop_filter_luma2_neon_8(uint8_t * _pix_r,
++                             unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f);
++void ff_hevc_rpi_v_loop_filter_luma2_neon_8(uint8_t * _pix_r,
++                             unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f,
++                             uint8_t * _pix_l);
++void ff_hevc_rpi_h_loop_filter_uv_neon_8(uint8_t * src, unsigned int stride, uint32_t tc4,
++                             unsigned int no_f);
++void ff_hevc_rpi_v_loop_filter_uv2_neon_8(uint8_t * src_r, unsigned int stride, uint32_t tc4,
++                             uint8_t * src_l,
++                             unsigned int no_f);
++
++void ff_hevc_rpi_h_loop_filter_luma2_neon_10(uint8_t * _pix_r,
++                             unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f);
++void ff_hevc_rpi_v_loop_filter_luma2_neon_10(uint8_t * _pix_r,
++                             unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f,
++                             uint8_t * _pix_l);
++void ff_hevc_rpi_h_loop_filter_uv_neon_10(uint8_t * src, unsigned int stride, uint32_t tc4,
++                             unsigned int no_f);
++void ff_hevc_rpi_v_loop_filter_uv2_neon_10(uint8_t * src_r, unsigned int stride, uint32_t tc4,
++                             uint8_t * src_l,
++                             unsigned int no_f);
++
++void ff_hevc_rpi_transform_4x4_neon_8(int16_t *coeffs, int col_limit);
++void ff_hevc_rpi_transform_8x8_neon_8(int16_t *coeffs, int col_limit);
++void ff_hevc_rpi_idct_4x4_dc_neon_8(int16_t *coeffs);
++void ff_hevc_rpi_idct_8x8_dc_neon_8(int16_t *coeffs);
++void ff_hevc_rpi_idct_16x16_dc_neon_8(int16_t *coeffs);
++void ff_hevc_rpi_idct_32x32_dc_neon_8(int16_t *coeffs);
++void ff_hevc_rpi_transform_luma_4x4_neon_8(int16_t *coeffs);
++
++void ff_hevc_rpi_transform_4x4_neon_10(int16_t *coeffs, int col_limit);
++void ff_hevc_rpi_transform_8x8_neon_10(int16_t *coeffs, int col_limit);
++void ff_hevc_rpi_idct_4x4_dc_neon_10(int16_t *coeffs);
++void ff_hevc_rpi_idct_8x8_dc_neon_10(int16_t *coeffs);
++void ff_hevc_rpi_idct_16x16_dc_neon_10(int16_t *coeffs);
++void ff_hevc_rpi_idct_32x32_dc_neon_10(int16_t *coeffs);
++void ff_hevc_rpi_transform_luma_4x4_neon_10(int16_t *coeffs);
++
++void ff_hevc_rpi_add_residual_4x4_neon_8(uint8_t *_dst, int16_t *coeffs,
++                                     ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_8x8_neon_8(uint8_t *_dst, int16_t *coeffs,
++                                     ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_16x16_neon_8(uint8_t *_dst, int16_t *coeffs,
++                                       ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_32x32_neon_8(uint8_t *_dst, int16_t *coeffs,
++                                       ptrdiff_t stride);
++
++void ff_hevc_rpi_add_residual_4x4_dc_neon_8(uint8_t *_dst, ptrdiff_t stride, int dc);
++void ff_hevc_rpi_add_residual_8x8_dc_neon_8(uint8_t *_dst, ptrdiff_t stride, int dc);
++void ff_hevc_rpi_add_residual_16x16_dc_neon_8(uint8_t *_dst, ptrdiff_t stride, int dc);
++void ff_hevc_rpi_add_residual_32x32_dc_neon_8(uint8_t *_dst, ptrdiff_t stride, int dc);
++
++
++void ff_hevc_rpi_add_residual_4x4_neon_10(uint8_t *_dst, int16_t *coeffs,
++                                     ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_8x8_neon_10(uint8_t *_dst, int16_t *coeffs,
++                                     ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_16x16_neon_10(uint8_t *_dst, int16_t *coeffs,
++                                       ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_32x32_neon_10(uint8_t *_dst, int16_t *coeffs,
++                                       ptrdiff_t stride);
++
++void ff_hevc_rpi_add_residual_4x4_dc_neon_10(uint8_t *_dst, ptrdiff_t stride, int dc);
++void ff_hevc_rpi_add_residual_8x8_dc_neon_10(uint8_t *_dst, ptrdiff_t stride, int dc);
++void ff_hevc_rpi_add_residual_16x16_dc_neon_10(uint8_t *_dst, ptrdiff_t stride, int dc);
++void ff_hevc_rpi_add_residual_32x32_dc_neon_10(uint8_t *_dst, ptrdiff_t stride, int dc);
++
++
++void ff_hevc_rpi_add_residual_4x4_u_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_v);
++void ff_hevc_rpi_add_residual_8x8_u_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_v);
++void ff_hevc_rpi_add_residual_16x16_u_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_v);
++void ff_hevc_rpi_add_residual_4x4_v_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_u);
++void ff_hevc_rpi_add_residual_8x8_v_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_u);
++void ff_hevc_rpi_add_residual_16x16_v_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_u);
++void ff_hevc_rpi_add_residual_4x4_c_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_8x8_c_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_16x16_c_neon_8(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_4x4_dc_c_neon_8(uint8_t *_dst, ptrdiff_t stride, int32_t dc);
++void ff_hevc_rpi_add_residual_8x8_dc_c_neon_8(uint8_t *_dst, ptrdiff_t stride, int32_t dc);
++void ff_hevc_rpi_add_residual_16x16_dc_c_neon_8(uint8_t *_dst, ptrdiff_t stride, int32_t dc);
++
++
++void ff_hevc_rpi_add_residual_4x4_u_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_v);
++void ff_hevc_rpi_add_residual_8x8_u_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_v);
++void ff_hevc_rpi_add_residual_16x16_u_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_v);
++void ff_hevc_rpi_add_residual_4x4_v_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_u);
++void ff_hevc_rpi_add_residual_8x8_v_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_u);
++void ff_hevc_rpi_add_residual_16x16_v_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride, int dc_u);
++void ff_hevc_rpi_add_residual_4x4_c_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_8x8_c_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_16x16_c_neon_10(uint8_t *_dst, const int16_t * residual,
++                                       ptrdiff_t stride);
++void ff_hevc_rpi_add_residual_4x4_dc_c_neon_10(uint8_t *_dst, ptrdiff_t stride, int32_t dc);
++void ff_hevc_rpi_add_residual_8x8_dc_c_neon_10(uint8_t *_dst, ptrdiff_t stride, int32_t dc);
++void ff_hevc_rpi_add_residual_16x16_dc_c_neon_10(uint8_t *_dst, ptrdiff_t stride, int32_t dc);
++
++void ff_hevc_rpi_sao_edge_8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height);
++
++void ff_hevc_rpi_sao_edge_8_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_16_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_32_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_64_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height);
++
++void ff_hevc_rpi_sao_edge_c_8_neon_8(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *_sao_offset_val_u, const int16_t *_sao_offset_val_v,
++                                  int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_c_16_neon_8(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *_sao_offset_val_u, const int16_t *_sao_offset_val_v,
++                                  int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_c_32_neon_8(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *_sao_offset_val_u, const int16_t *_sao_offset_val_v,
++                                  int eo, int width, int height);
++
++void ff_hevc_rpi_sao_edge_c_8_neon_10(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *_sao_offset_val_u, const int16_t *_sao_offset_val_v,
++                                  int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_c_16_neon_10(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *_sao_offset_val_u, const int16_t *_sao_offset_val_v,
++                                  int eo, int width, int height);
++void ff_hevc_rpi_sao_edge_c_32_neon_10(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *_sao_offset_val_u, const int16_t *_sao_offset_val_v,
++                                  int eo, int width, int height);
++
++void ff_hevc_rpi_sao_band_c_8_neon_8(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height);
++void ff_hevc_rpi_sao_band_c_16_neon_8(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height);
++void ff_hevc_rpi_sao_band_c_32_neon_8(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height);
++
++void ff_hevc_rpi_sao_band_c_8_neon_10(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height);
++void ff_hevc_rpi_sao_band_c_16_neon_10(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height);
++void ff_hevc_rpi_sao_band_c_32_neon_10(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height);
++
++void ff_hevc_rpi_sao_band_8_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
++void ff_hevc_rpi_sao_band_16_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
++void ff_hevc_rpi_sao_band_32_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
++void ff_hevc_rpi_sao_band_64_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
++
++void ff_hevc_rpi_sao_band_8_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
++void ff_hevc_rpi_sao_band_16_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
++void ff_hevc_rpi_sao_band_32_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
++void ff_hevc_rpi_sao_band_64_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height);
++
++
++uint32_t ff_hevc_rpi_deblocking_boundary_strengths_neon(int pus, int dup, const struct HEVCRpiMvField *curr, const struct HEVCRpiMvField *neigh,
++                                                const int *curr_rpl0, const int *curr_rpl1, const int *neigh_rpl0, const int *neigh_rpl1,
++                                                int in_inc0, int in_inc1);
++void ff_hevc_rpi_cpy_blks8x4_neon(uint8_t *dst, unsigned int stride_dst, const uint8_t *src, unsigned stride_src, unsigned int width, unsigned int height);
++
++
++static void ff_hevc_rpi_sao_edge_48_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height)
++{
++    ff_hevc_rpi_sao_edge_32_neon_8(_dst, _src, stride_dst, _sao_offset_val, eo, 32, height);
++    ff_hevc_rpi_sao_edge_16_neon_8(_dst + 32, _src + 32, stride_dst, _sao_offset_val, eo, 16, height);
++}
++static void ff_hevc_rpi_sao_edge_48_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height)
++{
++    ff_hevc_rpi_sao_edge_32_neon_10(_dst, _src, stride_dst, _sao_offset_val, eo, 32, height);
++    ff_hevc_rpi_sao_edge_16_neon_10(_dst + 64, _src + 64, stride_dst, _sao_offset_val, eo, 16, height);
++}
++
++static void ff_hevc_rpi_sao_band_48_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height)
++{
++    ff_hevc_rpi_sao_band_32_neon_8(_dst, _src, stride_dst, stride_src, sao_offset_val, sao_left_class, 32, height);
++    ff_hevc_rpi_sao_band_16_neon_8(_dst + 32, _src + 32, stride_dst, stride_src, sao_offset_val, sao_left_class, 16, height);
++}
++static void ff_hevc_rpi_sao_band_48_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height)
++{
++    ff_hevc_rpi_sao_band_32_neon_10(_dst, _src, stride_dst, stride_src, sao_offset_val, sao_left_class, 32, height);
++    ff_hevc_rpi_sao_band_16_neon_10(_dst + 64, _src + 64, stride_dst, stride_src, sao_offset_val, sao_left_class, 16, height);
++}
++
++#if SAO_FILTER_N == 6
++static void ff_hevc_rpi_sao_edge_24_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height)
++{
++    ff_hevc_rpi_sao_edge_16_neon_8(_dst, _src, stride_dst, _sao_offset_val, eo, 16, height);
++    ff_hevc_rpi_sao_edge_8_neon_8(_dst + 16, _src + 16, stride_dst, _sao_offset_val, eo, 8, height);
++}
++static void ff_hevc_rpi_sao_edge_24_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *_sao_offset_val, int eo, int width, int height)
++{
++    ff_hevc_rpi_sao_edge_16_neon_10(_dst, _src, stride_dst, _sao_offset_val, eo, 16, height);
++    ff_hevc_rpi_sao_edge_8_neon_10(_dst + 32, _src + 32, stride_dst, _sao_offset_val, eo, 8, height);
++}
++
++static void ff_hevc_rpi_sao_band_24_neon_8(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height)
++{
++    ff_hevc_rpi_sao_band_16_neon_8(_dst, _src, stride_dst, stride_src, sao_offset_val, sao_left_class, 16, height);
++    ff_hevc_rpi_sao_band_8_neon_8(_dst + 16, _src + 16, stride_dst, stride_src, sao_offset_val, sao_left_class, 8, height);
++}
++static void ff_hevc_rpi_sao_band_24_neon_10(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                int16_t *sao_offset_val, int sao_left_class, int width, int height)
++{
++    ff_hevc_rpi_sao_band_16_neon_10(_dst, _src, stride_dst, stride_src, sao_offset_val, sao_left_class, 16, height);
++    ff_hevc_rpi_sao_band_8_neon_10(_dst + 32, _src + 32, stride_dst, stride_src, sao_offset_val, sao_left_class, 8, height);
++}
++
++static void ff_hevc_rpi_sao_edge_c_24_neon_8(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *_sao_offset_val_u, const int16_t *_sao_offset_val_v,
++                                  int eo, int width, int height)
++{
++    ff_hevc_rpi_sao_edge_c_16_neon_8(_dst, _src, stride_dst, _sao_offset_val_u, _sao_offset_val_v, eo, 16, height);
++    ff_hevc_rpi_sao_edge_c_8_neon_8(_dst + 32, _src + 32, stride_dst, _sao_offset_val_u, _sao_offset_val_v, eo, 8, height);
++}
++static void ff_hevc_rpi_sao_edge_c_24_neon_10(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *_sao_offset_val_u, const int16_t *_sao_offset_val_v,
++                                  int eo, int width, int height)
++{
++    ff_hevc_rpi_sao_edge_c_16_neon_10(_dst, _src, stride_dst, _sao_offset_val_u, _sao_offset_val_v, eo, 16, height);
++    ff_hevc_rpi_sao_edge_c_8_neon_10(_dst + 64, _src + 64, stride_dst, _sao_offset_val_u, _sao_offset_val_v, eo, 8, height);
++}
++
++static void ff_hevc_rpi_sao_band_c_24_neon_8(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height)
++{
++    ff_hevc_rpi_sao_band_c_16_neon_8(_dst, _src, stride_dst, stride_src,
++                                sao_offset_val_u, sao_left_class_u, sao_offset_val_v, sao_left_class_v, 16, height);
++    ff_hevc_rpi_sao_band_c_8_neon_8(_dst + 32, _src + 32, stride_dst, stride_src,
++                                sao_offset_val_u, sao_left_class_u, sao_offset_val_v, sao_left_class_v, 8, height);
++}
++static void ff_hevc_rpi_sao_band_c_24_neon_10(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height)
++{
++    ff_hevc_rpi_sao_band_c_16_neon_10(_dst, _src, stride_dst, stride_src,
++                                sao_offset_val_u, sao_left_class_u, sao_offset_val_v, sao_left_class_v, 16, height);
++    ff_hevc_rpi_sao_band_c_8_neon_10(_dst + 64, _src + 64, stride_dst, stride_src,
++                                sao_offset_val_u, sao_left_class_u, sao_offset_val_v, sao_left_class_v, 8, height);
++}
++#endif
++
++
++
++#if RPI_HEVC_SAO_BUF_STRIDE != 160
++#error SAO edge src stride not 160 - value used in .S
++#endif
++
++av_cold void ff_hevcdsp_rpi_init_neon(HEVCDSPContext *c, const int bit_depth)
++{
++    if (bit_depth == 8) {
++        c->hevc_v_loop_filter_luma     = ff_hevc_rpi_v_loop_filter_luma_neon_8;
++        c->hevc_v_loop_filter_luma_c   = ff_hevc_rpi_v_loop_filter_luma_neon_8;
++        c->hevc_h_loop_filter_luma     = ff_hevc_rpi_h_loop_filter_luma_neon_8;
++        c->hevc_h_loop_filter_luma_c   = ff_hevc_rpi_h_loop_filter_luma_neon_8;
++        c->hevc_h_loop_filter_luma2    = ff_hevc_rpi_h_loop_filter_luma2_neon_8;
++        c->hevc_v_loop_filter_luma2    = ff_hevc_rpi_v_loop_filter_luma2_neon_8;
++        c->hevc_h_loop_filter_uv       = ff_hevc_rpi_h_loop_filter_uv_neon_8;
++        c->hevc_v_loop_filter_uv2      = ff_hevc_rpi_v_loop_filter_uv2_neon_8;
++        c->idct[0]                     = ff_hevc_rpi_transform_4x4_neon_8;
++        c->idct[1]                     = ff_hevc_rpi_transform_8x8_neon_8;
++        c->idct_dc[0]                  = ff_hevc_rpi_idct_4x4_dc_neon_8;
++        c->idct_dc[1]                  = ff_hevc_rpi_idct_8x8_dc_neon_8;
++        c->idct_dc[2]                  = ff_hevc_rpi_idct_16x16_dc_neon_8;
++        c->idct_dc[3]                  = ff_hevc_rpi_idct_32x32_dc_neon_8;
++        c->add_residual[0]             = ff_hevc_rpi_add_residual_4x4_neon_8;
++        c->add_residual[1]             = ff_hevc_rpi_add_residual_8x8_neon_8;
++        c->add_residual[2]             = ff_hevc_rpi_add_residual_16x16_neon_8;
++        c->add_residual[3]             = ff_hevc_rpi_add_residual_32x32_neon_8;
++        c->add_residual_dc[0]          = ff_hevc_rpi_add_residual_4x4_dc_neon_8;
++        c->add_residual_dc[1]          = ff_hevc_rpi_add_residual_8x8_dc_neon_8;
++        c->add_residual_dc[2]          = ff_hevc_rpi_add_residual_16x16_dc_neon_8;
++        c->add_residual_dc[3]          = ff_hevc_rpi_add_residual_32x32_dc_neon_8;
++        c->add_residual_u[0]           = ff_hevc_rpi_add_residual_4x4_u_neon_8;
++        c->add_residual_u[1]           = ff_hevc_rpi_add_residual_8x8_u_neon_8;
++        c->add_residual_u[2]           = ff_hevc_rpi_add_residual_16x16_u_neon_8;
++        c->add_residual_v[0]           = ff_hevc_rpi_add_residual_4x4_v_neon_8;
++        c->add_residual_v[1]           = ff_hevc_rpi_add_residual_8x8_v_neon_8;
++        c->add_residual_v[2]           = ff_hevc_rpi_add_residual_16x16_v_neon_8;
++        c->add_residual_c[0]           = ff_hevc_rpi_add_residual_4x4_c_neon_8;
++        c->add_residual_c[1]           = ff_hevc_rpi_add_residual_8x8_c_neon_8;
++        c->add_residual_c[2]           = ff_hevc_rpi_add_residual_16x16_c_neon_8;
++        c->add_residual_dc_c[0]        = ff_hevc_rpi_add_residual_4x4_dc_c_neon_8;
++        c->add_residual_dc_c[1]        = ff_hevc_rpi_add_residual_8x8_dc_c_neon_8;
++        c->add_residual_dc_c[2]        = ff_hevc_rpi_add_residual_16x16_dc_c_neon_8;
++        c->transform_4x4_luma          = ff_hevc_rpi_transform_luma_4x4_neon_8;
++        c->sao_band_filter[0]          = ff_hevc_rpi_sao_band_8_neon_8;
++        c->sao_band_filter[1]          = ff_hevc_rpi_sao_band_16_neon_8;
++        c->sao_band_filter[2]          = ff_hevc_rpi_sao_band_32_neon_8;
++        c->sao_band_filter[3]          = ff_hevc_rpi_sao_band_48_neon_8;
++        c->sao_band_filter[4]          = ff_hevc_rpi_sao_band_64_neon_8;
++        c->sao_edge_filter[0]          = ff_hevc_rpi_sao_edge_8_neon_8;
++        c->sao_edge_filter[1]          = ff_hevc_rpi_sao_edge_16_neon_8;
++        c->sao_edge_filter[2]          = ff_hevc_rpi_sao_edge_32_neon_8;
++        c->sao_edge_filter[3]          = ff_hevc_rpi_sao_edge_48_neon_8;
++        c->sao_edge_filter[4]          = ff_hevc_rpi_sao_edge_64_neon_8;
++#if SAO_FILTER_N == 6
++        c->sao_band_filter[5]          = ff_hevc_rpi_sao_band_24_neon_8;
++        c->sao_edge_filter[5]          = ff_hevc_rpi_sao_edge_24_neon_8;
++#endif
++        c->sao_band_filter_c[0]        = ff_hevc_rpi_sao_band_c_8_neon_8;
++        c->sao_band_filter_c[1]        = ff_hevc_rpi_sao_band_c_16_neon_8;
++        c->sao_band_filter_c[2]        = ff_hevc_rpi_sao_band_c_32_neon_8;
++
++        c->sao_edge_filter_c[0]        = ff_hevc_rpi_sao_edge_c_8_neon_8;
++        c->sao_edge_filter_c[1]        = ff_hevc_rpi_sao_edge_c_16_neon_8;
++        c->sao_edge_filter_c[2]        = ff_hevc_rpi_sao_edge_c_32_neon_8;
++
++#if SAO_FILTER_N == 6
++        c->sao_band_filter_c[5]        = ff_hevc_rpi_sao_band_c_24_neon_8;
++        c->sao_edge_filter_c[5]        = ff_hevc_rpi_sao_edge_c_24_neon_8;
++#endif
++    }
++    else if (bit_depth == 10) {
++        c->hevc_v_loop_filter_luma     = ff_hevc_rpi_v_loop_filter_luma_neon_10;
++        c->hevc_v_loop_filter_luma_c   = ff_hevc_rpi_v_loop_filter_luma_neon_10;
++        c->hevc_h_loop_filter_luma     = ff_hevc_rpi_h_loop_filter_luma_neon_10;
++        c->hevc_h_loop_filter_luma_c   = ff_hevc_rpi_h_loop_filter_luma_neon_10;
++        c->hevc_h_loop_filter_luma2    = ff_hevc_rpi_h_loop_filter_luma2_neon_10;
++        c->hevc_v_loop_filter_luma2    = ff_hevc_rpi_v_loop_filter_luma2_neon_10;
++        c->hevc_h_loop_filter_uv       = ff_hevc_rpi_h_loop_filter_uv_neon_10;
++        c->hevc_v_loop_filter_uv2      = ff_hevc_rpi_v_loop_filter_uv2_neon_10;
++        c->idct[0]                     = ff_hevc_rpi_transform_4x4_neon_10;
++        c->idct[1]                     = ff_hevc_rpi_transform_8x8_neon_10;
++        c->idct_dc[0]                  = ff_hevc_rpi_idct_4x4_dc_neon_10;
++        c->idct_dc[1]                  = ff_hevc_rpi_idct_8x8_dc_neon_10;
++        c->idct_dc[2]                  = ff_hevc_rpi_idct_16x16_dc_neon_10;
++        c->idct_dc[3]                  = ff_hevc_rpi_idct_32x32_dc_neon_10;
++        c->add_residual[0]             = ff_hevc_rpi_add_residual_4x4_neon_10;
++        c->add_residual[1]             = ff_hevc_rpi_add_residual_8x8_neon_10;
++        c->add_residual[2]             = ff_hevc_rpi_add_residual_16x16_neon_10;
++        c->add_residual[3]             = ff_hevc_rpi_add_residual_32x32_neon_10;
++        c->add_residual_dc[0]          = ff_hevc_rpi_add_residual_4x4_dc_neon_10;
++        c->add_residual_dc[1]          = ff_hevc_rpi_add_residual_8x8_dc_neon_10;
++        c->add_residual_dc[2]          = ff_hevc_rpi_add_residual_16x16_dc_neon_10;
++        c->add_residual_dc[3]          = ff_hevc_rpi_add_residual_32x32_dc_neon_10;
++        c->add_residual_u[0]           = ff_hevc_rpi_add_residual_4x4_u_neon_10;
++        c->add_residual_u[1]           = ff_hevc_rpi_add_residual_8x8_u_neon_10;
++        c->add_residual_u[2]           = ff_hevc_rpi_add_residual_16x16_u_neon_10;
++        c->add_residual_v[0]           = ff_hevc_rpi_add_residual_4x4_v_neon_10;
++        c->add_residual_v[1]           = ff_hevc_rpi_add_residual_8x8_v_neon_10;
++        c->add_residual_v[2]           = ff_hevc_rpi_add_residual_16x16_v_neon_10;
++        c->add_residual_c[0]           = ff_hevc_rpi_add_residual_4x4_c_neon_10;
++        c->add_residual_c[1]           = ff_hevc_rpi_add_residual_8x8_c_neon_10;
++        c->add_residual_c[2]           = ff_hevc_rpi_add_residual_16x16_c_neon_10;
++        c->add_residual_dc_c[0]        = ff_hevc_rpi_add_residual_4x4_dc_c_neon_10;
++        c->add_residual_dc_c[1]        = ff_hevc_rpi_add_residual_8x8_dc_c_neon_10;
++        c->add_residual_dc_c[2]        = ff_hevc_rpi_add_residual_16x16_dc_c_neon_10;
++        c->transform_4x4_luma          = ff_hevc_rpi_transform_luma_4x4_neon_10;
++        c->sao_band_filter[0]          = ff_hevc_rpi_sao_band_8_neon_10;
++        c->sao_band_filter[1]          = ff_hevc_rpi_sao_band_16_neon_10;
++        c->sao_band_filter[2]          = ff_hevc_rpi_sao_band_32_neon_10;
++        c->sao_band_filter[3]          = ff_hevc_rpi_sao_band_48_neon_10;
++        c->sao_band_filter[4]          = ff_hevc_rpi_sao_band_64_neon_10;
++
++        c->sao_edge_filter[0]          = ff_hevc_rpi_sao_edge_8_neon_10;
++        c->sao_edge_filter[1]          = ff_hevc_rpi_sao_edge_16_neon_10;
++        c->sao_edge_filter[2]          = ff_hevc_rpi_sao_edge_32_neon_10;
++        c->sao_edge_filter[3]          = ff_hevc_rpi_sao_edge_48_neon_10;
++        c->sao_edge_filter[4]          = ff_hevc_rpi_sao_edge_64_neon_10;
++#if SAO_FILTER_N == 6
++        c->sao_band_filter[5]          = ff_hevc_rpi_sao_band_24_neon_10;
++        c->sao_edge_filter[5]          = ff_hevc_rpi_sao_edge_24_neon_10;
++#endif
++        c->sao_band_filter_c[0]        = ff_hevc_rpi_sao_band_c_8_neon_10;
++        c->sao_band_filter_c[1]        = ff_hevc_rpi_sao_band_c_16_neon_10;
++        c->sao_band_filter_c[2]        = ff_hevc_rpi_sao_band_c_32_neon_10;
++
++        c->sao_edge_filter_c[0]        = ff_hevc_rpi_sao_edge_c_8_neon_10;
++        c->sao_edge_filter_c[1]        = ff_hevc_rpi_sao_edge_c_16_neon_10;
++        c->sao_edge_filter_c[2]        = ff_hevc_rpi_sao_edge_c_32_neon_10;
++
++#if SAO_FILTER_N == 6
++        c->sao_band_filter_c[5]        = ff_hevc_rpi_sao_band_c_24_neon_10;
++        c->sao_edge_filter_c[5]        = ff_hevc_rpi_sao_edge_c_24_neon_10;
++#endif
++    }
++
++    assert(offsetof(HEVCRpiMvField, mv) == 0);
++    assert(offsetof(HEVCRpiMvField, ref_idx) == 8);
++    assert(offsetof(HEVCRpiMvField, pred_flag) == 10);
++    c->hevc_deblocking_boundary_strengths = ff_hevc_rpi_deblocking_boundary_strengths_neon;
++    c->cpy_blk = ff_hevc_rpi_cpy_blks8x4_neon;
++}
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_res16_neon.S
+@@ -0,0 +1,620 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++ .arch_extension mp @ enable PLDW
++
++#define BIT_DEPTH 10
++
++.macro clip16_4 Q0, Q1, Q2, Q3, Q_MIN, Q_MAX
++        vmax.s16  \Q0, \Q_MIN
++        vmax.s16  \Q1, \Q_MIN
++        vmax.s16  \Q2, \Q_MIN
++        vmax.s16  \Q3, \Q_MIN
++        vmin.s16  \Q0, \Q_MAX
++        vmin.s16  \Q1, \Q_MAX
++        vmin.s16  \Q2, \Q_MAX
++        vmin.s16  \Q3, \Q_MAX
++.endm
++
++@ add_residual4x4(
++@  uint16_t *_dst,    [r0]
++@  int16_t *res,      [r1]
++@  ptrdiff_t stride)  [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_4x4_neon_, BIT_DEPTH), export=1
++        add         ip, r0, r2
++        vld1.16     {q10, q11}, [r1]
++        lsl         r2, #1
++        vld1.16     {d0}, [r0 :64], r2
++        vld1.16     {d1}, [ip :64], r2
++        vld1.16     {d2}, [r0 :64]
++        vld1.16     {d3}, [ip :64]
++        sub         r0, r2
++        vqadd.s16   q0,  q10
++        sub         ip, r2
++        vqadd.s16   q1,  q11
++        vmov.i16    q8,  #0
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        vmax.s16    q0,  q0,  q8
++        vmax.s16    q1,  q1,  q8
++        vmin.s16    q0,  q0,  q9
++        vmin.s16    q1,  q1,  q9
++        vst1.16     {d0}, [r0 :64], r2
++        vst1.16     {d1}, [ip :64], r2
++        vst1.16     {d2}, [r0 :64]
++        vst1.16     {d3}, [ip :64]
++        bx          lr
++
++endfunc
++
++@ add_residual4x4_dc(
++@  uint16_t *_dst,    [r0]
++@  ptrdiff_t stride,  [r1]
++@  int dc)            [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_4x4_dc_neon_, BIT_DEPTH), export=1
++        add         ip, r0, r1
++        vdup.16     q15, r2
++        lsl         r1, #1
++        vld1.16     {d0}, [r0 :64], r1
++        vld1.16     {d1}, [ip :64], r1
++        vld1.16     {d2}, [r0 :64]
++        vld1.16     {d3}, [ip :64]
++        sub         r0, r1
++        vqadd.s16   q0,  q15
++        sub         ip, r1
++        vqadd.s16   q1,  q15
++        vmov.i16    q8,  #0
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        vmax.s16    q0,  q0,  q8
++        vmax.s16    q1,  q1,  q8
++        vmin.s16    q0,  q0,  q9
++        vmin.s16    q1,  q1,  q9
++        vst1.16     {d0}, [r0 :64], r1
++        vst1.16     {d1}, [ip :64], r1
++        vst1.16     {d2}, [r0 :64]
++        vst1.16     {d3}, [ip :64]
++        bx          lr
++
++endfunc
++
++
++@ add_residual8x8(
++@  uint16_t *_dst,    [r0]
++@  int16_t *res,      [r1]
++@  ptrdiff_t stride)  [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_8x8_neon_, BIT_DEPTH), export=1
++        mov         r3, #8
++        vmov.i64    q8,  #0
++        add         ip, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r2, #1
++1:
++        vldm        r1!, {q10-q13}
++        vld1.16     {q0}, [r0 :128], r2
++        vld1.16     {q1}, [ip :128], r2
++        vld1.16     {q2}, [r0 :128]
++        vld1.16     {q3}, [ip :128]
++        sub         r0, r2
++        vqadd.s16   q0,  q10
++        sub         ip, r2
++        vqadd.s16   q1,  q11
++        subs        r3, #4
++        vqadd.s16   q2,  q12
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0}, [r0 :128], r2
++        vst1.16     {q1}, [ip :128], r2
++        vst1.16     {q2}, [r0 :128], r2
++        vst1.16     {q3}, [ip :128], r2
++        bne         1b
++        bx          lr
++
++endfunc
++
++@ add_residual4x4_dc_c(
++@  uint16_t *_dst,    [r0]
++@  ptrdiff_t stride,  [r1]
++@  int dc_uv)         [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_4x4_dc_c_neon_, BIT_DEPTH), export=1
++        mov         r3, #4
++        vdup.32     q15, r2
++        b           9f
++endfunc
++
++@ add_residual8x8_dc(
++@  uint16_t *_dst,    [r0]
++@  ptrdiff_t stride,  [r1]
++@  int dc)            [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_8x8_dc_neon_, BIT_DEPTH), export=1
++        vdup.16     q15, r2
++        mov         r3, #8
++9:
++        vmov.i16    q8,  #0
++        add         ip, r0, r1
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r1, #1
++1:
++        vld1.16     {q0}, [r0 :128], r1
++        vld1.16     {q1}, [ip :128], r1
++        vld1.16     {q2}, [r0 :128]
++        vld1.16     {q3}, [ip :128]
++        sub         r0, r1
++        vqadd.s16   q0,  q15
++        sub         ip, r1
++        vqadd.s16   q1,  q15
++        subs        r3, #4
++        vqadd.s16   q2,  q15
++        vqadd.s16   q3,  q15
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0}, [r0 :128], r1
++        vst1.16     {q1}, [ip :128], r1
++        vst1.16     {q2}, [r0 :128], r1
++        vst1.16     {q3}, [ip :128], r1
++        bne         1b
++        bx          lr
++
++endfunc
++
++@ add_residual16x16(
++@  uint16_t *_dst,    [r0]
++@  int16_t *res,      [r1]
++@  ptrdiff_t stride)  [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_16x16_neon_, BIT_DEPTH), export=1
++        add         ip, r0, r2
++        vmov.i16    q8,  #0
++        lsl         r2, #1
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        mov         r3, #16
++1:
++        vldm        r1!, {q10-q13}
++        @ For RPI Sand we could guarantee :256 but not for general
++        @ non-RPI allocation. :128 is as good as we can claim
++        vld1.16     {q0, q1}, [r0 :128]
++        subs        r3, #2
++        vld1.16     {q2, q3}, [ip :128]
++        vqadd.s16   q0,  q10
++        vqadd.s16   q1,  q11
++        vqadd.s16   q2,  q12
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0, q1}, [r0 :128], r2
++        vst1.16     {q2, q3}, [ip :128], r2
++        bne         1b
++        bx          lr
++endfunc
++
++@ add_residual8x8_dc_c(
++@  uint16_t *_dst,    [r0]
++@  ptrdiff_t stride,  [r1]
++@  int dc_uv)         [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_8x8_dc_c_neon_, BIT_DEPTH), export=1
++        mov         r3, #8
++        vdup.32     q15, r2
++        b           9f
++endfunc
++
++@ add_residual16x16_dc(
++@  uint16_t *_dst,    [r0]
++@  ptrdiff_t stride,  [r1]
++@  int dc)            [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_16x16_dc_neon_, BIT_DEPTH), export=1
++        vdup.i16    q15, r2
++        mov         r3, #16
++9:
++        vmov.i16    q8,  #0
++        add         ip, r0, r1
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r1, #1
++1:
++        @ For RPI Sand we could guarantee :256 but not for general
++        @ non-RPI allocation. :128 is as good as we can claim
++        vld1.16     {q0, q1}, [r0 :128]
++        subs        r3, #2
++        vqadd.s16   q0,  q15
++        vqadd.s16   q1,  q15
++        vld1.16     {q2, q3}, [ip :128]
++        vqadd.s16   q2,  q15
++        vqadd.s16   q3,  q15
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0, q1}, [r0 :128], r1
++        vst1.16     {q2, q3}, [ip :128], r1
++        bne         1b
++        bx          lr
++
++endfunc
++
++
++@ add_residual32x32(
++@  uint16_t *_dst,    [r0]
++@  int16_t *res,      [r1]
++@  ptrdiff_t stride)  [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_32x32_neon_, BIT_DEPTH), export=1
++        push        {lr}
++        mov         r3, #32
++        vmov.i16    q8,  #0
++        add         lr, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
++1:
++        vldm        r1!, {q10-q13}
++        vldm        r0,  {q0-q3}
++        vqadd.s16   q0,  q10
++          pldw        [lr]
++        vqadd.s16   q1,  q11
++          add         lr, r2
++        vqadd.s16   q2,  q12
++        subs        r3, #1
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0-q1}, [r0], r2
++        vst1.16     {q2-q3}, [ip], r2
++        bne         1b
++        pop         {pc}
++
++endfunc
++
++@ add_residual16x16_dc_c(
++@  uint16_t *_dst,    [r0]
++@  ptrdiff_t stride,  [r1]
++@  int dc_uv)         [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_16x16_dc_c_neon_, BIT_DEPTH), export=1
++        mov         r3, #16
++        vdup.32     q15, r2
++        b           9f
++endfunc
++
++@ add_residual32x32_dc(
++@  uint16_t *_dst,    [r0]
++@  ptrdiff_t stride,  [r1]
++@  int dc)            [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_32x32_dc_neon_, BIT_DEPTH), export=1
++        vdup.16     q15, r2
++        mov         r3, #32
++9:
++        vmov.i16    q8,  #0
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
++1:
++        vldm        r0,  {q0-q3}
++        vqadd.s16   q0,  q15
++        subs        r3, #1
++        vqadd.s16   q1,  q15
++        vqadd.s16   q2,  q15
++        vqadd.s16   q3,  q15
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst1.16     {q0-q1}, [r0], r1
++        vst1.16     {q2-q3}, [ip], r1
++        bne         1b
++        bx          lr
++
++endfunc
++
++@ ============================================================================
++@ U add
++
++@ add_residual4x4_u(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_4x4_u_neon_, BIT_DEPTH), export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        vld1.16     {q10, q11}, [r1 :256]
++        lsl         r2, #1
++        vld2.16     {d0, d2}, [r0 :128], r2
++        vld2.16     {d1, d3}, [ip :128], r2
++        vld2.16     {d4, d6}, [r0 :128]
++        vld2.16     {d5, d7}, [ip :128]
++        sub         r0, r2
++        vmov.i16    q8,  #0
++        sub         ip, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++
++        vqadd.s16   q0,  q10
++        vqadd.s16   q1,  q15
++        vqadd.s16   q2,  q11
++        vqadd.s16   q3,  q15
++        clip16_4    q0, q1, q2, q3, q8, q9
++
++        vst2.16     {d0, d2}, [r0 :128], r2
++        vst2.16     {d1, d3}, [ip :128], r2
++        vst2.16     {d4, d6}, [r0 :128]
++        vst2.16     {d5, d7}, [ip :128]
++        bx          lr
++endfunc
++
++@ add_residual8x8_u(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_8x8_u_neon_, BIT_DEPTH), export=1
++        vdup.16     q15, r3
++        mov         r3, #8
++        vmov.i16    q8,  #0
++        add         ip, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r2, #1
++1:
++        vld2.16     {q0, q1}, [r0 :256]
++        subs        r3, #2
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
++        vqadd.s16   q0,  q10
++        vqadd.s16   q1,  q15
++        vqadd.s16   q2,  q11
++        vqadd.s16   q3,  q15
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        bx          lr
++endfunc
++
++@ add_residual16x16_u(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_16x16_u_neon_, BIT_DEPTH), export=1
++        push        {lr}
++        vdup.16     q15, r3
++        mov         r3, #16
++        vmov.i16    q8,  #0
++        add         lr, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
++1:
++        vld2.16     {q0, q1}, [r0 :256]
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
++        vqadd.s16   q0,  q10
++          pldw        [lr]
++        vqadd.s16   q1,  q15
++          add         lr, r2
++        vqadd.s16   q2,  q11
++        subs        r3, #1
++        vqadd.s16   q3,  q15
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        pop         {pc}
++endfunc
++
++@ ============================================================================
++@ V add
++
++@ add_residual4x4_v(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_4x4_v_neon_, BIT_DEPTH), export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        vld1.16     {q10, q11}, [r1 :256]
++        lsl         r2, #1
++        vld2.16     {d0, d2}, [r0 :128], r2
++        vld2.16     {d1, d3}, [ip :128], r2
++        vld2.16     {d4, d6}, [r0 :128]
++        vld2.16     {d5, d7}, [ip :128]
++        sub         r0, r2
++        vmov.i16    q8,  #0
++        sub         ip, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++
++        vqadd.s16   q0,  q15
++        vqadd.s16   q1,  q10
++        vqadd.s16   q2,  q15
++        vqadd.s16   q3,  q11
++        clip16_4    q0, q1, q2, q3, q8, q9
++
++        vst2.16     {d0, d2}, [r0 :128], r2
++        vst2.16     {d1, d3}, [ip :128], r2
++        vst2.16     {d4, d6}, [r0 :128]
++        vst2.16     {d5, d7}, [ip :128]
++        bx          lr
++endfunc
++
++@ add_residual8x8_v(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_8x8_v_neon_, BIT_DEPTH), export=1
++        vdup.16     q15, r3
++        mov         r3, #8
++        vmov.i16    q8,  #0
++        add         ip, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r2, #1
++1:
++        vld2.16     {q0, q1}, [r0 :256]
++        subs        r3, #2
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
++        vqadd.s16   q0,  q15
++        vqadd.s16   q1,  q10
++        vqadd.s16   q2,  q15
++        vqadd.s16   q3,  q11
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        bx          lr
++endfunc
++
++@ add_residual16x16_v(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc)               [r3]
++
++function JOIN(ff_hevc_rpi_add_residual_16x16_v_neon_, BIT_DEPTH), export=1
++        push        {lr}
++        vdup.16     q15, r3
++        mov         r3, #16
++        vmov.i16    q8,  #0
++        add         lr, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
++1:
++        vld2.16     {q0, q1}, [r0 :256]
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
++        vqadd.s16   q0,  q15
++          pldw        [lr]
++        vqadd.s16   q1,  q10
++          add         lr, r2
++        vqadd.s16   q2,  q15
++        subs        r3, #1
++        vqadd.s16   q3,  q11
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        pop         {pc}
++endfunc
++
++@ ============================================================================
++@ U & V add
++
++@ add_residual4x4_c(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_4x4_c_neon_, BIT_DEPTH), export=1
++        vmov.i16    q8,  #0
++        add         ip, r0, r2
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        lsl         r2, #1
++        vldm        r1, {q10-q13}
++        vld2.16     {d0, d2}, [r0 :128], r2
++        vld2.16     {d1, d3}, [ip :128], r2
++        vld2.16     {d4, d6}, [r0 :128]
++        vld2.16     {d5, d7}, [ip :128]
++
++        sub         r0, r2
++        vqadd.s16   q0,  q10
++        sub         ip, r2
++        vqadd.s16   q1,  q12
++        vqadd.s16   q2,  q11
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++
++        vst2.16     {d0, d2}, [r0 :128], r2
++        vst2.16     {d1, d3}, [ip :128], r2
++        vst2.16     {d4, d6}, [r0 :128]
++        vst2.16     {d5, d7}, [ip :128]
++        bx          lr
++endfunc
++
++@ add_residual8x8_c(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_8x8_c_neon_, BIT_DEPTH), export=1
++        push        {lr}
++        add         ip, r0, r2
++        lsl         r2, #1
++        vmov.i16    q8,  #0
++        add         r3, r1, #(8*8*2)  @ Offset to V
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        mov         lr, #8
++1:
++        vld1.16     {q10, q11}, [r1 :256]!
++        subs        lr, #2
++        vld2.16     {q0, q1}, [r0 :256]
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q12, q13}, [r3 :256]!
++        vqadd.s16   q0,  q10
++        vqadd.s16   q1,  q12
++        vqadd.s16   q2,  q11
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        pop         {pc}
++endfunc
++
++@ add_residual16x16_c(
++@   uint16_t *_dst,       [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function JOIN(ff_hevc_rpi_add_residual_16x16_c_neon_, BIT_DEPTH), export=1
++        push        {r4, lr}
++        vmov.i16    q8,  #0
++        add         r3,  r1, #(16*16*2)  @ Offset to V
++        vmov.i16    q9,  #(1 << BIT_DEPTH) - 1
++        add         ip, r0, #32
++        add         r4, r0, r2
++        mov         lr, #16
++1:
++        vld2.16     {q0, q1}, [r0 :256]
++        vld2.16     {q2, q3}, [ip :256]
++        vld1.16     {q10, q11}, [r1 :256]!
++        vld1.16     {q12, q13}, [r3 :256]!
++        vqadd.s16   q0,  q10
++          pldw        [r4]
++        vqadd.s16   q1,  q12
++          add         r4, r2
++        vqadd.s16   q2,  q11
++        subs        lr, #1
++        vqadd.s16   q3,  q13
++        clip16_4    q0, q1, q2, q3, q8, q9
++        vst2.16     {q0, q1}, [r0 :256], r2
++        vst2.16     {q2, q3}, [ip :256], r2
++        bne         1b
++        pop         {r4,pc}
++endfunc
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_res8_neon.S
+@@ -0,0 +1,741 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++ .arch_extension mp @ enable PLDW
++
++@ General notes:
++@
++@ Residual is generally only guaranteed to be clipped to 16 bits.
++@ This means that we do need to do vmovl, vqadd, vqmovun
++@ rather than vaddw, vqmovun (if we were clipped to 15 then we could get away
++@ with this).
++@
++@ There is an exception for the DC case because its transform is guaranteed
++@ to be small enough that overflow cannot occur during the first add.
++
++@ ============================================================================
++@ Y add
++
++function ff_hevc_rpi_add_residual_4x4_neon_8, export=1
++        add         ip, r0, r2
++        vld1.16     {q0, q1}, [r1]
++        lsl         r2, #1
++        vld1.32     d4[0], [r0], r2
++        rsb         r3, r2, #0
++        vld1.32     d4[1], [ip], r2
++        vld1.32     d5[0], [r0], r3
++        vld1.32     d5[1], [ip], r3
++        vmovl.u8    q8, d4
++        vmovl.u8    q9, d5
++        vqadd.s16   q0, q8
++        vqadd.s16   q1, q9
++        vqmovun.s16 d0, q0
++        vqmovun.s16 d1, q1
++        vst1.32     d0[0], [r0], r2
++        vst1.32     d0[1], [ip], r2
++        vst1.32     d1[0], [r0]
++        vst1.32     d1[1], [ip]
++        bx          lr
++endfunc
++
++function ff_hevc_rpi_add_residual_8x8_neon_8, export=1
++        push        {r4, lr}
++        vld1.16     {q0, q1}, [r1]!
++        add         ip, r0, r2
++        vld1.8      {d6}, [r0]
++        add         r4, r0, r2, lsl #1
++        vld1.8      {d7}, [ip]
++        add         lr, ip, r2, lsl #1
++        lsl         r2, #1
++        mov         r3, #8-2
++        vmovl.u8    q2, d6
++        vmovl.u8    q3, d7
++        vqadd.s16   q2, q0
++        vqadd.s16   q3, q1
++1:
++          vld1.16     {q0, q1}, [r1]!
++        subs        r3, #2
++        vqmovun.s16 d4, q2
++        vqmovun.s16 d5, q3
++          vld1.8      {d6}, [r4], r2
++          vld1.8      {d7}, [lr], r2
++        vst1.8      {d4}, [r0], r2
++        vst1.8      {d5}, [ip], r2
++          vmovl.u8    q2, d6
++            pldw        [r4]
++          vmovl.u8    q3, d7
++          vqadd.s16   q2, q0
++          vqadd.s16   q3, q1
++        bne         1b
++
++          vqmovun.s16 d4, q2
++          vqmovun.s16 d5, q3
++          vst1.8      {d4}, [r0]
++          vst1.8      {d5}, [ip]
++          pop         {r4, pc}
++endfunc
++
++function ff_hevc_rpi_add_residual_16x16_neon_8, export=1
++        vld1.16     {q0, q1}, [r1]!
++        add         ip, r0, r2
++        vld1.8      {q3}, [r0]
++        mov         r3, #16-1
++        vmovl.u8    q2, d6
++        vmovl.u8    q3, d7
++        vqadd.s16   q2, q0
++        vqadd.s16   q3, q1
++1:
++          vld1.16     {q0, q1}, [r1]!
++        subs        r3, #1
++        vqmovun.s16 d4, q2
++        vqmovun.s16 d5, q3
++          vld1.8      {q3}, [ip], r2
++        vst1.8      {q2}, [r0], r2
++          vmovl.u8    q2, d6
++            pldw        [ip]
++          vmovl.u8    q3, d7
++          vqadd.s16   q2, q0
++          vqadd.s16   q3, q1
++        bne         1b
++
++          vqmovun.s16 d4, q2
++          vqmovun.s16 d5, q3
++          vst1.8      {q2}, [r0]
++          bx          lr
++endfunc
++
++function ff_hevc_rpi_add_residual_32x32_neon_8, export=1
++        vldm        r1!, {q0-q3}
++        vld1.8      {q8, q9}, [r0]
++        add         ip, r0, r2
++        vmovl.u8    q10, d16
++        mov         r3, #32-1
++        vmovl.u8    q11, d17
++        vmovl.u8    q12, d18
++        vmovl.u8    q13, d19
++        vqadd.s16   q10, q0
++        vqadd.s16   q11, q1
++        vqadd.s16   q12, q2
++        vqadd.s16   q13, q3
++1:
++          vldm        r1!, {q0-q3}
++        vqmovun.s16 d20, q10
++        vqmovun.s16 d21, q11
++        vqmovun.s16 d22, q12
++        vqmovun.s16 d23, q13
++          vld1.8      {q8, q9}, [ip], r2
++        subs        r3, #1
++        vst1.8      {q10, q11}, [r0], r2
++          vmovl.u8    q10, d16
++            pldw        [ip]
++          vmovl.u8    q11, d17
++          vmovl.u8    q12, d18
++          vmovl.u8    q13, d19
++          vqadd.s16   q10, q0
++          vqadd.s16   q11, q1
++          vqadd.s16   q12, q2
++          vqadd.s16   q13, q3
++        bne     1b
++
++          vqmovun.s16 d20, q10
++          vqmovun.s16 d21, q11
++          vqmovun.s16 d22, q12
++          vqmovun.s16 d23, q13
++          vst1.8      {q10, q11}, [r0]
++          bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_add_residual_4x4_dc_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_4x4_dc_neon_8, export=1
++        add         ip, r0, r1
++        vdup.16     q15, r2
++        lsl         r1, #1
++        vld1.32     d4[0], [r0], r1
++        rsb         r3, r1, #0
++        vld1.32     d4[1], [ip], r1
++        vld1.32     d5[0], [r0], r3
++        vld1.32     d5[1], [ip], r3
++        vaddw.u8    q0, q15, d4
++        vaddw.u8    q1, q15, d5
++        vqmovun.s16 d0, q0
++        vqmovun.s16 d1, q1
++        vst1.32     d0[0], [r0], r1
++        vst1.32     d0[1], [ip], r1
++        vst1.32     d1[0], [r0]
++        vst1.32     d1[1], [ip]
++        bx          lr
++endfunc
++
++@ ============================================================================
++@ DC Y or C add
++
++@ ff_hevc_rpi_add_residual_4x4_dc_c_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_4x4_dc_c_neon_8, export=1
++        mov         r3,  #4-2
++        vdup.32     q15, r2
++        b           1f
++endfunc
++
++@ ff_hevc_rpi_add_residual_8x8_dc_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_8x8_dc_neon_8, export=1
++        vdup.16     q15, r2
++        mov         r3, #8-2
++1:      vld1.8      d16, [r0]
++        add         ip, r0, r1
++        push        {r4, lr}
++        vld1.8      d17, [ip]
++        add         r4, r0, r1, lsl #1
++        vaddw.u8    q0, q15, d16
++        lsl         r1, #1
++        vaddw.u8    q1, q15, d17
++        add         lr, ip, r1
++1:
++          vld1.8      {d16}, [r4], r1
++          vld1.8      {d17}, [lr], r1
++        subs        r3, #2
++        vqmovun.s16 d4, q0
++        vqmovun.s16 d5, q1
++          vaddw.u8    q0, q15, d16
++          vaddw.u8    q1, q15, d17
++        vst1.8      {d4}, [r0], r1
++        vst1.8      {d5}, [ip], r1
++        bne         1b
++
++          vqmovun.s16 d4, q0
++          vqmovun.s16 d5, q1
++          vst1.8      {d4}, [r0]
++          vst1.8      {d5}, [ip]
++          pop         {r4, pc}
++endfunc
++
++
++@ ff_hevc_rpi_add_residual_8x8_dc_c_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_8x8_dc_c_neon_8, export=1
++        mov         r3,  #8-1
++        vdup.32     q15, r2
++        b           1f
++endfunc
++
++@ ff_hevc_rpi_add_residual_16x16_dc_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_16x16_dc_neon_8, export=1
++        vdup.16     q15, r2
++        mov         r3,  #16-1
++1:      vld1.8      {q8}, [r0]
++        add         ip, r0, r1
++        vaddw.u8    q0, q15, d16
++        vaddw.u8    q1, q15, d17
++1:
++          vld1.8      {q8}, [ip], r1
++        subs        r3, #1
++        vqmovun.s16 d4, q0
++        vqmovun.s16 d5, q1
++          vaddw.u8    q0, q15, d16
++          vaddw.u8    q1, q15, d17
++        vst1.8      {q2}, [r0], r1
++        bne         1b
++
++          vqmovun.s16 d4, q0
++          vqmovun.s16 d5, q1
++          vst1.8      {q2}, [r0]
++          bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_add_residual_16x16_dc_c_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_16x16_dc_c_neon_8, export=1
++        mov         r3,  #16-1
++        vdup.32     q15, r2
++        b           1f
++endfunc
++
++@ ff_hevc_rpi_add_residual_32x32_dc_neon_8(
++@   uint8_t * dst,              // [r0]
++@   unsigned int stride,        // [r1]
++@   int dc)                     // [r2]
++
++function ff_hevc_rpi_add_residual_32x32_dc_neon_8, export=1
++        vdup.16     q15, r2
++        mov         r3, #32-1
++1:      vld1.8      {q8, q9}, [r0]
++        add         ip, r0, r1
++        vaddw.u8    q0, q15, d16
++        vaddw.u8    q1, q15, d17
++        vaddw.u8    q2, q15, d18
++        vaddw.u8    q3, q15, d19
++1:
++        vqmovun.s16 d20, q0
++        vqmovun.s16 d21, q1
++        vqmovun.s16 d22, q2
++        vqmovun.s16 d23, q3
++          vld1.8      {q8, q9}, [ip], r1
++        subs        r3, #1
++          vaddw.u8    q0, q15, d16
++          vaddw.u8    q1, q15, d17
++          vaddw.u8    q2, q15, d18
++          vaddw.u8    q3, q15, d19
++        vst1.8      {q10, q11}, [r0], r1
++        bne     1b
++
++          vqmovun.s16 d20, q0
++          vqmovun.s16 d21, q1
++          vqmovun.s16 d22, q2
++          vqmovun.s16 d23, q3
++          vst1.8      {q10, q11}, [r0]
++          bx          lr
++endfunc
++
++@ ============================================================================
++@ U add
++
++@ add_residual4x4_c(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride,     [r2]
++@   int dc_v)             [r3]
++
++function ff_hevc_rpi_add_residual_4x4_u_neon_8, export=1
++        add         ip, r0, r2
++        vld1.16     {q0, q1}, [r1]
++        lsl         r2, #1
++        vld1.8      {d16}, [r0 :64], r2
++        vld1.8      {d17}, [ip :64], r2
++        vld1.8      {d18}, [r0 :64]
++        sub         r0, r2
++        vld1.8      {d19}, [ip :64]
++        sub         ip, r2
++        vdup.16     q2, r3
++        vdup.16     q3, r3
++        vmovl.u8    q10, d16
++        vmovl.u8    q11, d17
++        vmovl.u8    q12, d18
++        vmovl.u8    q13, d19
++        vzip.16     q0, q2
++        vzip.16     q1, q3
++        vqadd.s16   q0,  q10
++        vqadd.s16   q2,  q11
++        vqadd.s16   q1,  q12
++        vqadd.s16   q3,  q13
++        vqmovun.s16 d0,  q0
++        vqmovun.s16 d1,  q2
++        vqmovun.s16 d2,  q1
++        vqmovun.s16 d3,  q3
++        vst1.8      {d0}, [r0 :64], r2
++        vst1.8      {d1}, [ip :64], r2
++        vst1.8      {d2}, [r0 :64]
++        vst1.8      {d3}, [ip :64]
++        bx          lr
++endfunc
++
++@ add_residual8x8_c(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++@   int dc_v)             [r3]
++
++function ff_hevc_rpi_add_residual_8x8_u_neon_8, export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        push        {r4, lr}
++        vld2.8      {d16, d17}, [r0 :128]
++        lsl         r2, #1
++        vld2.8      {d18, d19}, [ip :128]
++        mov         r3, #8-2
++        vld1.16     {q0, q1}, [r1 :256]!
++        add         r4, r0, r2
++        vmovl.u8    q10, d16
++        add         lr, ip, r2
++        vmovl.u8    q11, d18
++        vqadd.s16   q0,  q10
++        vaddw.u8    q2,  q15, d17
++        vqadd.s16   q1,  q11
++        vaddw.u8    q3,  q15, d19
++1:
++        vqmovun.s16 d20,  q0
++        vqmovun.s16 d21,  q2
++          vld2.8      {d16, d17}, [r4 :128], r2
++        subs        r3, #2
++        vqmovun.s16 d22,  q1
++        vqmovun.s16 d23,  q3
++        vst2.8      {d20, d21}, [r0 :128], r2
++          vld2.8      {d18, d19}, [lr :128], r2
++        vst2.8      {d22, d23}, [ip :128], r2
++          vld1.16     {q0, q1}, [r1 :256]!
++          vmovl.u8    q10, d16
++          vmovl.u8    q11, d18
++          vqadd.s16   q0,  q10
++          vaddw.u8    q2,  q15, d17
++          vqadd.s16   q1,  q11
++          vaddw.u8    q3,  q15, d19
++        bne         1b
++
++          vqmovun.s16 d20,  q0
++          vqmovun.s16 d21,  q2
++          vqmovun.s16 d22,  q1
++          vqmovun.s16 d23,  q3
++          vst2.8      {d20, d21}, [r0 :128]
++          vst2.8      {d22, d23}, [ip :128]
++          pop         {r4, pc}
++endfunc
++
++@ add_residual16x16_u(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++@   int dc_v)             [r3]
++
++function ff_hevc_rpi_add_residual_16x16_u_neon_8, export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        vld2.8      {q8, q9}, [r0 :256]
++        mov         r3, #16-1
++        vld1.16     {q0, q1}, [r1 :256]!
++        vmovl.u8    q11, d16
++        vmovl.u8    q12, d17
++        vqadd.s16   q0,  q11
++        vaddw.u8    q11, q15, d18
++        vqadd.s16   q1,  q12
++        vaddw.u8    q12, q15, d19
++1:
++          vld2.8      {q8, q9}, [ip :256], r2
++        subs        r3, #1
++        vqmovun.s16 d20, q0
++        vqmovun.s16 d22, q11
++        vqmovun.s16 d21, q1
++        vqmovun.s16 d23, q12
++          vld1.16     {q0, q1}, [r1 :256]!
++        vst2.8      {q10, q11}, [r0 :256], r2
++          vmovl.u8    q11, d16
++            pldw        [ip]
++          vmovl.u8    q12, d17
++          vqadd.s16   q0,  q11
++          vaddw.u8    q11, q15, d18
++          vqadd.s16   q1,  q12
++          vaddw.u8    q12, q15, d19
++        bne         1b
++
++          vqmovun.s16 d20, q0
++          vqmovun.s16 d22, q11
++          vqmovun.s16 d21, q1
++          vqmovun.s16 d23, q12
++          vst2.8      {q10, q11}, [r0 :256]
++          bx          lr
++endfunc
++
++@ ============================================================================
++@ V add
++
++@ add_residual4x4_v(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function ff_hevc_rpi_add_residual_4x4_v_neon_8, export=1
++        add         ip, r0, r2
++        vld1.16     {q2, q3}, [r1]
++        lsl         r2, #1
++        vld1.8      {d16}, [r0 :64], r2
++        vld1.8      {d17}, [ip :64], r2
++        vld1.8      {d18}, [r0 :64]
++        sub         r0, r2
++        vld1.8      {d19}, [ip :64]
++        sub         ip, r2
++        vdup.16     q0, r3
++        vdup.16     q1, r3
++        vmovl.u8    q10, d16
++        vmovl.u8    q11, d17
++        vmovl.u8    q12, d18
++        vmovl.u8    q13, d19
++        vzip.16     q0, q2
++        vzip.16     q1, q3
++        vqadd.s16   q0,  q10
++        vqadd.s16   q2,  q11
++        vqadd.s16   q1,  q12
++        vqadd.s16   q3,  q13
++        vqmovun.s16 d0,  q0
++        vqmovun.s16 d1,  q2
++        vqmovun.s16 d2,  q1
++        vqmovun.s16 d3,  q3
++        vst1.8      {d0}, [r0 :64], r2
++        vst1.8      {d1}, [ip :64], r2
++        vst1.8      {d2}, [r0 :64]
++        vst1.8      {d3}, [ip :64]
++        bx          lr
++endfunc
++
++@ add_residual8x8_v(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function ff_hevc_rpi_add_residual_8x8_v_neon_8, export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        push        {r4, lr}
++        vld2.8      {d16, d17}, [r0 :128]
++        lsl         r2, #1
++        vld2.8      {d18, d19}, [ip :128]
++        mov         r3, #8-2
++        vld1.16     {q0, q1}, [r1 :256]!
++        add         r4, r0, r2
++        vmovl.u8    q10, d17
++        add         lr, ip, r2
++        vmovl.u8    q11, d19
++        vqadd.s16   q0,  q10
++        vaddw.u8    q2,  q15, d16
++        vqadd.s16   q1,  q11
++        vaddw.u8    q3,  q15, d18
++1:
++        vqmovun.s16 d20,  q2
++        vqmovun.s16 d21,  q0
++          vld2.8      {d16, d17}, [r4 :128], r2
++        subs        r3, #2
++        vqmovun.s16 d22,  q3
++        vqmovun.s16 d23,  q1
++        vst2.8      {d20, d21}, [r0 :128], r2
++          vld2.8      {d18, d19}, [lr :128], r2
++        vst2.8      {d22, d23}, [ip :128], r2
++          vld1.16     {q0, q1}, [r1 :256]!
++          vmovl.u8    q10, d17
++          vmovl.u8    q11, d19
++          vqadd.s16   q0,  q10
++          vaddw.u8    q2,  q15, d16
++          vqadd.s16   q1,  q11
++          vaddw.u8    q3,  q15, d18
++        bne         1b
++
++          vqmovun.s16 d20,  q2
++          vqmovun.s16 d21,  q0
++          vqmovun.s16 d22,  q3
++          vqmovun.s16 d23,  q1
++          vst2.8      {d20, d21}, [r0 :128]
++          vst2.8      {d22, d23}, [ip :128]
++          pop         {r4, pc}
++endfunc
++
++@ add_residual16x16_v(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function ff_hevc_rpi_add_residual_16x16_v_neon_8, export=1
++        vdup.16     q15, r3
++        add         ip, r0, r2
++        vld2.8      {q8, q9}, [r0 :256]
++        mov         r3, #16-1
++        vld1.16     {q0, q1}, [r1 :256]!
++        vmovl.u8    q11, d18
++        vmovl.u8    q12, d19
++        vqadd.s16   q0,  q11
++        vaddw.u8    q11, q15, d16
++        vqadd.s16   q1,  q12
++        vaddw.u8    q12, q15, d17
++1:
++          vld2.8      {q8, q9}, [ip :256], r2
++        subs        r3, #1
++        vqmovun.s16 d20, q11
++        vqmovun.s16 d22, q0
++        vqmovun.s16 d21, q12
++        vqmovun.s16 d23, q1
++          vld1.16     {q0, q1}, [r1 :256]!
++        vst2.8      {q10, q11}, [r0 :256], r2
++          vmovl.u8    q11, d18
++            pldw        [ip]
++          vmovl.u8    q12, d19
++          vqadd.s16   q0,  q11
++          vaddw.u8    q11, q15, d16
++          vqadd.s16   q1,  q12
++          vaddw.u8    q12, q15, d17
++        bne         1b
++
++          vqmovun.s16 d20, q11
++          vqmovun.s16 d22, q0
++          vqmovun.s16 d21, q12
++          vqmovun.s16 d23, q1
++          vst2.8      {q10, q11}, [r0 :256]
++          bx          lr
++endfunc
++
++@ ============================================================================
++@ U & V add
++
++@ add_residual4x4_c(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function ff_hevc_rpi_add_residual_4x4_c_neon_8, export=1
++        add         ip, r0, r2
++        vld1.16     {q0, q1}, [r1]!       @ all of U
++        lsl         r2, #1
++        vld1.8      {d16}, [r0 :64], r2
++        rsb         r3, r2, #0
++        vld1.8      {d17}, [ip :64], r2
++        vld1.16     {q2, q3}, [r1]        @ all of V
++        vld1.8      {d18}, [r0 :64], r3
++        vld1.8      {d19}, [ip :64], r3
++        vmovl.u8    q10, d16
++        vmovl.u8    q11, d17
++        vmovl.u8    q12, d18
++        vmovl.u8    q13, d19
++        vzip.16     q0, q2
++        vzip.16     q1, q3
++        vqadd.s16   q0,  q10
++        vqadd.s16   q2,  q11
++        vqadd.s16   q1,  q12
++        vqadd.s16   q3,  q13
++        vqmovun.s16 d0,  q0
++        vqmovun.s16 d1,  q2
++        vqmovun.s16 d2,  q1
++        vqmovun.s16 d3,  q3
++        vst1.8      {d0}, [r0 :64], r2
++        vst1.8      {d1}, [ip :64], r2
++        vst1.8      {d2}, [r0 :64]
++        vst1.8      {d3}, [ip :64]
++        bx          lr
++endfunc
++
++@ add_residual8x8_c(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function ff_hevc_rpi_add_residual_8x8_c_neon_8, export=1
++        vld2.8      {d16, d17}, [r0 :128]
++        add         r3, r1, #(8*8*2)  @ Offset to V
++        vld1.16     {q0}, [r1 :128]!
++        add         ip, r0, r2
++        vld1.16     {q1}, [r3 :128]!
++        vmovl.u8    q10, d16
++        push        {lr}
++        vmovl.u8    q8,  d17
++        mov         lr, #8-1
++        vqadd.s16   q10, q0
++        vqadd.s16   q1,  q8
++1:
++          vld2.8      {d16, d17}, [ip :128], r2
++        subs        lr, #1
++          vld1.16     {q0}, [r1 :128]!
++        vqmovun.s16 d20, q10
++        vqmovun.s16 d21, q1
++          vld1.16     {q1}, [r3 :128]!
++        vst2.8      {d20, d21}, [r0 :128], r2
++          vmovl.u8    q10, d16
++            pldw        [ip]
++          vmovl.u8    q8,  d17
++          vqadd.s16   q10, q0
++          vqadd.s16   q1,  q8
++        bne         1b
++
++          vqmovun.s16 d20, q10
++          vqmovun.s16 d21, q1
++          vst2.8      {d20, d21}, [r0 :128]
++          pop         {pc}
++endfunc
++
++@ add_residual16x16_c(
++@   uint8_t *_dst,        [r0]
++@   const int16_t *res,   [r1]
++@   ptrdiff_t stride)     [r2]
++
++function ff_hevc_rpi_add_residual_16x16_c_neon_8, export=1
++        vld2.8      {q8, q9}, [r0 :256]
++        add         r3, r1, #(16*16*2)  @ Offset to V
++        vld1.16     {q0, q1}, [r1 :256]!
++        add         ip, r0, r2
++        vld1.16     {q2, q3}, [r3 :256]!
++        vmovl.u8    q10, d16
++        push        {lr}
++        vmovl.u8    q8,  d17
++        mov         lr, #16-1
++        vmovl.u8    q11, d18
++        vmovl.u8    q9,  d19
++        vqadd.s16   q0,  q10
++        vqadd.s16   q1,  q8
++        vqadd.s16   q2,  q11
++        vqadd.s16   q3,  q9
++1:
++          vld2.8      {q8, q9}, [ip :256], r2
++        subs        lr, #1
++        vqmovun.s16 d20, q0
++        vqmovun.s16 d22, q2
++        vqmovun.s16 d21, q1
++        vqmovun.s16 d23, q3
++          vld1.16     {q0, q1}, [r1 :256]!
++        vst2.8      {d20-d23}, [r0 :256], r2
++          vld1.16     {q2, q3}, [r3 :256]!
++          vmovl.u8    q10, d16
++            pldw        [ip]
++          vmovl.u8    q8,  d17
++          vmovl.u8    q11, d18
++          vmovl.u8    q9,  d19
++          vqadd.s16   q0,  q10
++          vqadd.s16   q1,  q8
++          vqadd.s16   q2,  q11
++          vqadd.s16   q3,  q9
++        bne         1b
++
++          vqmovun.s16 d20, q0
++          vqmovun.s16 d22, q2
++          vqmovun.s16 d21, q1
++          vqmovun.s16 d23, q3
++          vst2.8      {d20-d23}, [r0 :256]
++          pop         {pc}
++endfunc
++
++@ 32x32 chroma never occurs so NIF
++
++@ ============================================================================
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcdsp_sao_neon.S
+@@ -0,0 +1,2245 @@
++/*
++ * Copyright (c) 2014 - 2015 Seppo Tomperi <seppo.tomperi@vtt.fi>
++ *               2017 John Cox <jc@kynesim.co.uk> (for Raspberry Pi)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++.set EDGE_SRC_STRIDE, 160
++
++@ PIC jump tables are fractionally more expensive than absolute in our code
++.set jent_pic, CONFIG_PIC
++
++
++.macro sao_band_64b_8 XLAT0, XLAT1, Q_K128, I1, I2, I3, I4
++        vshr.u8   q12, q8, #3
++        \I1
++        vadd.i8   q8, \Q_K128
++        \I2
++        vshr.u8   q13, q9, #3
++        \I3
++        vadd.i8   q9, \Q_K128
++        \I4
++        vtbl.8    d24, \XLAT0, d24
++        vtbl.8    d25, \XLAT0, d25
++        vtbl.8    d26, \XLAT1, d26
++        vtbl.8    d27, \XLAT1, d27
++
++        vqadd.s8  q8, q12
++        vshr.u8   q12, q10, #3
++        vadd.i8   q10, \Q_K128
++        vqadd.s8  q9, q13
++        vshr.u8   q13, q11, #3
++        vadd.i8   q11, \Q_K128
++
++        vtbl.8    d24, \XLAT0, d24
++        vtbl.8    d25, \XLAT0, d25
++        vtbl.8    d26, \XLAT1, d26
++        vtbl.8    d27, \XLAT1, d27
++        vqadd.s8  q10, q12
++        vsub.i8   q8, \Q_K128
++        vqadd.s8  q11, q13
++        vsub.i8   q9, \Q_K128
++        vsub.i8   q10, \Q_K128
++        vsub.i8   q11, \Q_K128
++.endm
++
++.macro sao_band_16b_8 XLAT0, XLAT1, Q_K128, L1, L2, L3, L4, L5, S1, S2, S3, S4
++        \L1
++        \L2
++        \L3
++        \L4
++        \L5
++        vadd.i8   q12, q8, \Q_K128
++        vshr.u8   q8, #3
++        vtbl.8    d16, \XLAT0, d16
++        vtbl.8    d17, \XLAT1, d17
++        vqadd.s8  q12, q8
++        bmi       2f
++1:        \L1
++          \L2
++          \L3
++          \L4
++          \L5
++        vsub.i8   q13, q12, \Q_K128
++          vadd.i8   q12, q8, \Q_K128
++          vshr.u8   q8, #3
++        \S1
++        \S2
++        \S3
++        \S4
++          vtbl.8    d16, \XLAT0, d16
++          vtbl.8    d17, \XLAT1, d17
++          vqadd.s8  q12, q8
++          bpl       1b
++2:        vsub.i8   q13, q12, \Q_K128
++          \S1
++          \S2
++          \S3
++          \S4
++.endm
++
++
++.macro clip16_4 Q0, Q1, Q2, Q3, Q_MIN, Q_MAX
++        vmax.s16  \Q0, \Q_MIN
++        vmax.s16  \Q1, \Q_MIN
++        vmax.s16  \Q2, \Q_MIN
++        vmax.s16  \Q3, \Q_MIN
++        vmin.s16  \Q0, \Q_MAX
++        vmin.s16  \Q1, \Q_MAX
++        vmin.s16  \Q2, \Q_MAX
++        vmin.s16  \Q3, \Q_MAX
++.endm
++
++@ Clobbers q12, q13
++.macro sao_band_64b_16  Q0, Q1, Q2, Q3, XLAT0, XLAT1, Q_MIN, Q_MAX, bit_depth, I1, I2
++        vshrn.i16 d24, \Q0, #(\bit_depth - 5)
++        vshrn.i16 d25, \Q1, #(\bit_depth - 5)
++        vshrn.i16 d26, \Q2, #(\bit_depth - 5)
++        \I1
++        vtbl.8    d24, \XLAT0, d24
++        vshrn.i16 d27, \Q3, #(\bit_depth - 5)
++        vtbl.8    d25, \XLAT1, d25
++        \I2
++        vtbl.8    d26, \XLAT0, d26
++        vtbl.8    d27, \XLAT1, d27
++        vaddw.s8  \Q0, d24
++        vaddw.s8  \Q1, d25
++        vaddw.s8  \Q2, d26
++        vaddw.s8  \Q3, d27
++        clip16_4   \Q0, \Q1, \Q2, \Q3, \Q_MIN, \Q_MAX
++.endm
++
++@ Clobbers q10, q11, q12
++.macro sao_band_32b_16 Q0, Q1, XLAT0, XLAT1, Q_MIN, Q_MAX, bit_depth, L1, L2, L3, L4, L5, S1, S2, S3, S4
++        \L1
++        \L2
++        \L3
++        \L4
++        \L5
++        vshrn.i16 d24, \Q0, #\bit_depth - 5
++        vshrn.i16 d25, \Q1, #\bit_depth - 5
++        vtbl.8    d24, \XLAT0, d24
++        vtbl.8    d25, \XLAT1, d25
++        vaddw.s8  q10, \Q0, d24
++        vaddw.s8  q11, \Q1, d25
++        bmi       2f
++1:        \L1
++          \L2
++          \L3
++          \L4
++          \L5
++        vmax.s16  q10, \Q_MIN
++        vmax.s16  q11, \Q_MIN
++          vshrn.i16 d24, \Q0, #\bit_depth - 5
++          vshrn.i16 d25, \Q1, #\bit_depth - 5
++        vmin.s16  q10, \Q_MAX
++        vmin.s16  q11, \Q_MAX
++        \S1
++        \S2
++        \S3
++        \S4
++          vtbl.8    d24, \XLAT0, d24
++          vtbl.8    d25, \XLAT1, d25
++          vaddw.s8  q10, \Q0, d24
++          vaddw.s8  q11, \Q1, d25
++          bpl       1b
++2:        vmax.s16  q10, \Q_MIN
++          vmax.s16  q11, \Q_MIN
++          vmin.s16  q10, \Q_MAX
++          vmin.s16  q11, \Q_MAX
++          \S1
++          \S2
++          \S3
++          \S4
++.endm
++
++
++@ Standard coding rules for sao_offset_abs limit it to 0-31 (Table 9-38)
++@ so we are quite safe stuffing it into a byte array
++@ There may be a subsequent shl by log2_sao_offset_scale_luma/chroma
++@ (7.4.3.3.2 && 7-70) but we should still be safe to at least 12 bits of
++@ precision
++
++@ This, somewhat nasty, bit of code builds the {d0-d3} translation
++@ array via the stack
++@ Given that sao_left_class > 28 can cause wrap we can't just poke
++@ all 4 bytes in at once
++@
++@ It also loads other common regs
++
++@ Beware that the offset read here overrreads by 6 bytes so source must be sized appropriately
++function band_load_y
++        ldr       ip, [sp, #16]         @ &sao_offset_val[0]
++        ldr       r4, [sp, #20]         @ sao_left_class
++        vmov.i64  d4, #0
++        vmov.i64  q0, #0
++        pld       [r1]
++        vld2.8    {q8}, [ip]
++        sub       ip, sp, #8*5
++        vmov.i64  q1, #0
++        add       r4, ip, r4
++        vpush     {d0-d4}               @ Put zero array on stack
++        vshr.u64  d16, d16, #8          @ 1st interesting val is [1]
++        ldr       ip, [ip, #8*5 + 28]   @ height
++        vst1.32   {d16[0]}, [r4]
++        add       r4, r1, r3
++        vpop      {d0-d4}               @ Pop modified array
++        sub       ip, ip, #1
++        vorr      d0, d0, d4
++        bx        lr
++endfunc
++
++@ Beware that offset reads here overrread by 6 bytes so source must be sized appropriately
++function band_load_c
++        ldr       ip, [sp, #16]         @ &sao_offset_val1[0]
++        ldr       r4, [sp, #20]         @ sao_left_class1
++        vmov.i64  d24, #0
++        vmov.i64  q10, #0
++        pld       [r1]
++        vld2.8    {q8}, [ip]
++        sub       ip, sp, #8*5
++        vmov.i64  q11, #0
++        add       r4, ip, r4
++        ldr       ip, [sp, #24]         @ &sao_offset_val2[0]
++        vpush     {d20-d24}             @ Put zero array on stack
++        vld2.8    {q9}, [ip]
++        vshr.u64  d16, d16, #8          @ 1st interesting val is [1]
++        ldr       ip, [sp, #8*5 + 28]   @ sao_left_class2
++        vst1.32   {d16[0]}, [r4]
++        add       ip, sp, ip
++        vshr.u64  d18, d18, #8          @ 1st interesting val is [1]
++        vldmia    sp, {d0-d3}           @ Load modified array
++        vldr      d16, [sp, #8*4]
++        add       r4, r1, r3
++        vstmia    sp, {d20-d24}         @ Put zero array on stack (again)
++        vst1.32   {d18[0]}, [ip]
++        vorr      d0, d0, d16
++        vldmia    sp, {d4-d7}           @ Load modified array
++        vldr      d18, [sp, #8*4]
++        ldr       ip, [sp, #8*5 + 36]   @ height
++        add       sp, sp, #8*5
++        vorr      d4, d4, d18
++        sub       ip, ip, #1
++        bx        lr
++endfunc
++
++
++@ ff_hevc_rpi_sao_band_64_neon_8 (
++@   uint8_t *_dst,              [r0]
++@   uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,       [r2]
++@   ptrdiff_t stride_src,       [r3]
++@   int16_t *sao_offset_val,    [sp, #0]
++@   int sao_left_class,         [sp, #4]
++@   int width,                  [sp, #8]
++@   int height)                 [sp, #12]
++
++function ff_hevc_rpi_sao_band_64_neon_8, export=1
++        push      {r4-r6, lr}
++        vmov.u8   q15, #128
++        bl        band_load_y
++
++1:      vldmia    r1, {q8-q11}
++        sao_band_64b_8 {d0-d3}, {d0-d3}, q15, \
++            "pld       [r4]",                 \
++            "subs      ip, #1",               \
++            "it ne; addne r4, r3",            \
++            "add       r1, r3"
++        vstmia    r0, {q8-q11}
++        add       r0, r2
++        bpl       1b
++
++        pop       {r4-r6, pc}
++endfunc
++
++@ ff_hevc_rpi_sao_band_32_neon_8 (
++@   uint8_t *_dst,              [r0]
++@   uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,       [r2]
++@   ptrdiff_t stride_src,       [r3]
++@   int16_t *sao_offset_val,    [sp, #0]
++@   int sao_left_class,         [sp, #4]
++@   int width,                  [sp, #8]
++@   int height)                 [sp, #12]
++
++function ff_hevc_rpi_sao_band_32_neon_8, export=1
++        push      {r4-r6, lr}
++        add       r5, r0, r2
++        add       r6, r1, r3
++        lsl       r2, #1
++        lsl       r3, #1
++        vmov.u8   q15, #128
++        bl        band_load_y
++
++1:      vld1.8    { q8, q9 }, [r1, :128], r3
++        subs      ip, #2
++        vld1.8    {q10, q11}, [r6, :128], r3
++
++        sao_band_64b_8 {d0-d3}, {d0-d3}, q15
++
++        vst1.8    { q8, q9 }, [r0, :128], r2
++        vst1.8    {q10, q11}, [r5, :128], r2
++        bpl       1b
++
++        pop       {r4-r6, pc}
++endfunc
++
++@ ff_hevc_rpi_sao_band_16_neon_8 (
++@   uint8_t *_dst,              [r0]
++@   uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,       [r2]
++@   ptrdiff_t stride_src,       [r3]
++@   int16_t *sao_offset_val,    [sp, #0]
++@   int sao_left_class,         [sp, #4]
++@   int width,                  [sp, #8]
++@   int height)                 [sp, #12]
++
++function ff_hevc_rpi_sao_band_16_neon_8, export=1
++        push      {r4-r6, lr}
++        add       r5, r0, r2
++        add       r6, r1, r3
++        lsl       r2, #1
++        lsl       r3, #1
++        vmov.u8   q15, #128
++        bl        band_load_y
++
++1:      vld1.8    { q8}, [r1, :128], r3
++        subs      ip, #4
++        vld1.8    { q9}, [r6, :128], r3
++        vld1.8    {q10}, [r1, :128], r3
++        vld1.8    {q11}, [r6, :128], r3
++
++        sao_band_64b_8 {d0-d3}, {d0-d3}, q15
++
++        vst1.8    { q8}, [r0, :128], r2
++        vst1.8    { q9}, [r5, :128], r2
++        vst1.8    {q10}, [r0, :128], r2
++        vst1.8    {q11}, [r5, :128], r2
++        bpl       1b
++
++        pop       {r4-r6, pc}
++endfunc
++
++@ ff_hevc_rpi_sao_band_8_neon_8 (
++@   uint8_t *_dst,              [r0]
++@   uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,       [r2]
++@   ptrdiff_t stride_src,       [r3]
++@   int16_t *sao_offset_val,    [sp, #0]
++@   int sao_left_class,         [sp, #4]
++@   int width,                  [sp, #8]
++@   int height)                 [sp, #12]
++
++function ff_hevc_rpi_sao_band_8_neon_8, export=1
++        ldr       ip, [sp, #8]          @ width
++        push      {r4-r6, lr}
++        vmov.u8   q15, #128
++        cmp       ip, #8
++        bl        band_load_y
++        add       r5, r0, r2
++        add       r6, r1, r3
++        lsl       r2, #1
++        lsl       r3, #1
++        blt       4f
++
++        sao_band_16b_8 {d0-d3}, {d0-d3}, q15, \
++            "vld1.8    {d16}, [r1, :64], r3", \
++            "subs      ip, #2",               \
++            "vld1.8    {d17}, [r6, :64], r3", \
++            "",                               \
++            "",                               \
++            "vst1.8 {d26}, [r0, :64], r2",    \
++            "vst1.8 {d27}, [r5, :64], r2"
++        pop       {r4-r6, pc}
++4:
++        sao_band_16b_8 {d0-d3}, {d0-d3}, q15,    \
++            "vld1.32   {d16[0]}, [r1, :32], r3", \
++            "subs      ip, #4",                  \
++            "vld1.32   {d16[1]}, [r6, :32], r3", \
++            "vld1.32   {d17[0]}, [r1, :32], r3", \
++            "vld1.32   {d17[1]}, [r6, :32], r3", \
++            "vst1.32   {d26[0]}, [r0, :32], r2", \
++            "vst1.32   {d26[1]}, [r5, :32], r2", \
++            "vst1.32   {d27[0]}, [r0, :32], r2", \
++            "vst1.32   {d27[1]}, [r5, :32], r2"
++        pop       {r4-r6, pc}
++endfunc
++
++@ ff_hevc_rpi_sao_band_c_32_neon_8(
++@   uint8_t * dst          [r0]
++@   uint8_t * src          [r1]
++@   uint32_t dst_stride    [r2]
++@   uint32_t src_stride    [r3]
++@   const int16_t * table1 sp[0]
++@   uint32_t offset1       sp[4]
++@   const int16_t * table2 sp[8]
++@   uint32_t offset2       sp[12]
++@   int width              sp[16]
++@   int height             sp[20]
++
++function ff_hevc_rpi_sao_band_c_32_neon_8, export=1
++        push      {r4-r6, lr}
++        add       r5, r0, #32
++        add       r6, r1, #32
++        vmov.u8   q15, #128
++        bl        band_load_c
++
++1:      vld2.8    { q8, q9 }, [r1, :128], r3
++        subs      ip, #1
++        vld2.8    {q10, q11}, [r6, :128], r3
++
++        sao_band_64b_8 {d0-d3}, {d4-d7}, q15, \
++            "pld       [r4]",                 \
++            "it ne; addne r4, r3"
++
++        vst2.8    { q8, q9 }, [r0, :128], r2
++        vst2.8    {q10, q11}, [r5, :128], r2
++        bpl       1b
++
++        pop     {r4-r6, pc}
++endfunc
++
++@ ff_hevc_rpi_sao_band_c_16_neon_8(
++@   uint8_t * dst          [r0]
++@   uint8_t * src          [r1]
++@   uint32_t dst_stride    [r2]
++@   uint32_t src_stride    [r3]
++@   const int16_t * table1 sp[0]
++@   uint32_t offset1       sp[4]
++@   const int16_t * table2 sp[8]
++@   uint32_t offset2       sp[12]
++@   int width              sp[16]
++@   int height             sp[20]
++
++function ff_hevc_rpi_sao_band_c_16_neon_8, export=1
++        push      {r4-r6, lr}
++        add       r5, r0, r2
++        add       r6, r1, r3
++        lsl       r2, #1
++        lsl       r3, #1
++        vmov.u8   q15, #128
++        bl        band_load_c
++
++1:      vld2.8    { q8, q9 }, [r1, :128], r3
++        subs      ip, #2
++        vld2.8    {q10, q11}, [r6, :128], r3
++
++        sao_band_64b_8 {d0-d3}, {d4-d7}, q15
++
++        vst2.8    { q8, q9 }, [r0, :128], r2
++        vst2.8    {q10, q11}, [r5, :128], r2
++        bpl       1b
++
++        pop     {r4-r6, pc}
++endfunc
++
++@ ff_hevc_rpi_sao_band_c_8_neon_8(
++@   uint8_t * dst          [r0]
++@   uint8_t * src          [r1]
++@   uint32_t dst_stride    [r2]
++@   uint32_t src_stride    [r3]
++@   const int16_t * table1 sp[0]
++@   uint32_t offset1       sp[4]
++@   const int16_t * table2 sp[8]
++@   uint32_t offset2       sp[12]
++@   int width              sp[16]
++@   int height             sp[20]
++
++function ff_hevc_rpi_sao_band_c_8_neon_8, export=1
++        ldr       ip, [sp, #16]         @ width
++        push      {r4-r6, lr}
++        vmov.u8   q15, #128
++        cmp       ip, #8
++        bl        band_load_c
++        blt       4f
++
++        sao_band_16b_8 {d0-d3}, {d4-d7}, q15,      \
++            "vld2.8    {d16-d17}, [r1, :128], r3", \
++            "subs      ip, #1",                    \
++            "",                                    \
++            "",                                    \
++            "",                                    \
++            "vst2.8    {d26-d27}, [r0, :128], r2"
++        pop       {r4-r6, pc}
++4:
++        add       r5, r0, r2
++        add       r6, r1, r3
++        lsl       r2, #1
++        lsl       r3, #1
++        sao_band_16b_8 {d0-d3}, {d4-d7}, q15, \
++            "vld1.8    {d16}, [r1, :64], r3", \
++            "subs      ip, #2",               \
++            "vld1.8    {d17}, [r6, :64], r3", \
++            "vuzp.8    d16, d17",             \
++            "",                               \
++            "vzip.8    d26, d27",             \
++            "vst1.8    {d26}, [r0, :64], r2", \
++            "vst1.8    {d27}, [r5, :64], r2"
++        pop       {r4-r6, pc}
++endfunc
++
++
++@ ff_hevc_rpi_sao_band_64_neon_10 (
++@   uint8_t *_dst,              [r0]
++@   uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,       [r2]
++@   ptrdiff_t stride_src,       [r3]
++@   int16_t *sao_offset_val,    [sp, #0]
++@   int sao_left_class,         [sp, #4]
++@   int width,                  [sp, #8]
++@   int height)                 [sp, #12]
++
++.macro band_64_16 bit_depth
++        push      {r4-r6, lr}
++        vmov.i64  q2, #0
++        vmov.i16  q3, #(1 << \bit_depth) - 1
++        bl        band_load_y
++        vpush     {q4-q7}
++
++1:      vldm      r1, {q4-q11}
++        sao_band_64b_16 q4,  q5,  q6,  q7, {d0-d3}, {d0-d3}, q2, q3, \bit_depth, \
++            "subs      ip, #1",                                                  \
++            "add       r1, r3"
++        sao_band_64b_16 q8,  q9, q10, q11, {d0-d3}, {d0-d3}, q2, q3, \bit_depth
++        vstm      r0, {q4-q11}
++        add       r0, r2
++        bpl       1b
++
++        vpop      {q4-q7}
++        pop       {r4-r6, pc}
++.endm
++
++function ff_hevc_rpi_sao_band_64_neon_10, export=1
++        band_64_16 10
++endfunc
++
++@ ff_hevc_rpi_sao_band_32_neon_10 (
++@   uint8_t *_dst,              [r0]
++@   uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,       [r2]
++@   ptrdiff_t stride_src,       [r3]
++@   int16_t *sao_offset_val,    [sp, #0]
++@   int sao_left_class,         [sp, #4]
++@   int width,                  [sp, #8]
++@   int height)                 [sp, #12]
++
++.macro band_32_16 bit_depth
++        push      {r4-r6, lr}
++        vmov.i64  q2, #0
++        vmov.i16  q3, #(1 << \bit_depth) - 1
++        bl        band_load_y
++
++1:      vldm      r1, {q8-q11}
++        sao_band_64b_16 q8,  q9,  q10, q11, {d0-d3}, {d0-d3}, q2, q3, \bit_depth, \
++            "subs      ip, #1",                                                   \
++            "add       r1, r3"
++        vstm      r0, {q8-q11}
++        add       r0, r2
++        bpl       1b
++
++        pop       {r4-r6, pc}
++.endm
++
++function ff_hevc_rpi_sao_band_32_neon_10, export=1
++        band_32_16 10
++endfunc
++
++@ ff_hevc_rpi_sao_band_16_neon_10 (
++@   uint8_t *_dst,              [r0]
++@   uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,       [r2]
++@   ptrdiff_t stride_src,       [r3]
++@   int16_t *sao_offset_val,    [sp, #0]
++@   int sao_left_class,         [sp, #4]
++@   int width,                  [sp, #8]
++@   int height)                 [sp, #12]
++
++.macro band_16_16 bit_depth
++        push      {r4-r6, lr}
++        add       r5, r0, r2
++        add       r6, r1, r3
++        lsl       r2, #1
++        lsl       r3, #1
++        vmov.i64  q14, #0
++        vmov.i16  q15, #(1 << \bit_depth) - 1
++        bl        band_load_y
++
++1:      vld1.16   { q8, q9 }, [r1, :128], r3
++        subs      r12, #2
++        vld1.16   {q10, q11}, [r6, :128], r3
++        sao_band_64b_16 q8,  q9,  q10, q11, {d0-d3}, {d0-d3}, q14, q15, \bit_depth
++        vst1.16   { q8, q9 }, [r0, :128], r2
++        vst1.16   {q10, q11}, [r5, :128], r2
++        bpl       1b
++
++        pop       {r4-r6, pc}
++.endm
++
++function ff_hevc_rpi_sao_band_16_neon_10, export=1
++        band_16_16 10
++endfunc
++
++@ ff_hevc_rpi_sao_band_8_neon_10 (
++@   uint8_t *_dst,              [r0]
++@   uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,       [r2]
++@   ptrdiff_t stride_src,       [r3]
++@   int16_t *sao_offset_val,    [sp, #0]
++@   int sao_left_class,         [sp, #4]
++@   int width,                  [sp, #8]
++@   int height)                 [sp, #12]
++
++.macro band_8_16 bit_depth
++        ldr       ip, [sp, #8]          @ width
++        push      {r4-r6, lr}
++        vmov.i64  q14, #0
++        cmp       ip, #8
++        vmov.i16  q15, #(1 << \bit_depth) - 1
++        bl        band_load_y
++        add       r5, r0, r2
++        add       r6, r1, r3
++        lsl       r2, #1
++        lsl       r3, #1
++        blt       4f
++
++        sao_band_32b_16 q8, q9, {d0-d3}, {d0-d3}, q14, q15, \bit_depth, \
++            "vld1.16   {q8}, [r1, :128], r3",                           \
++            "subs      ip, #2",                                         \
++            "vld1.16   {q9}, [r6, :128], r3",                           \
++            "",                                                         \
++            "",                                                         \
++            "vst1.16   {q10}, [r0, :128], r2",                          \
++            "vst1.16   {q11}, [r5, :128], r2"
++        pop       {r4-r6, pc}
++4:
++        sao_band_32b_16 q8, q9, {d0-d3}, {d0-d3}, q14, q15, \bit_depth, \
++            "vld1.16   {d16}, [r1, :64], r3",                           \
++            "subs      ip, #4",                                         \
++            "vld1.16   {d17}, [r6, :64], r3",                           \
++            "vld1.16   {d18}, [r1, :64], r3",                           \
++            "vld1.16   {d19}, [r6, :64], r3",                           \
++            "vst1.16   {d20}, [r0, :64], r2",                           \
++            "vst1.16   {d21}, [r5, :64], r2",                           \
++            "vst1.16   {d22}, [r0, :64], r2",                           \
++            "vst1.16   {d23}, [r5, :64], r2"
++        pop       {r4-r6, pc}
++.endm
++
++function ff_hevc_rpi_sao_band_8_neon_10, export=1
++        band_8_16 10
++endfunc
++
++
++@ ff_hevc_rpi_sao_band_c_32_neon_10(
++@   uint8_t * dst          [r0]
++@   uint8_t * src          [r1]
++@   uint32_t dst_stride    [r2]
++@   uint32_t src_stride    [r3]
++@   const int16_t * table1 sp[0]
++@   uint32_t offset1       sp[4]
++@   const int16_t * table2 sp[8]
++@   uint32_t offset2       sp[12]
++@   int width              sp[16]
++@   int height             sp[20]
++
++.macro band_c_32_16 bit_depth
++        push      {r4-r6, lr}
++        add       r5, r0, #32
++        add       r6, r1, #32
++        sub       r2, #64
++        sub       r3, #64
++        vmov.i64  q14, #0
++        vmov.i16  q15, #(1 << \bit_depth) - 1
++        bl        band_load_c
++        mov       lr, #64
++        vpush     {q4-q7}
++
++1:      vld2.16   { q4, q5 }, [r1, :128], lr
++        subs      ip, #1
++        vld2.16   { q6, q7 }, [r6, :128], lr
++        vld2.16   { q8, q9 }, [r1, :128], r3
++        vld2.16   {q10, q11}, [r6, :128], r3
++
++        sao_band_64b_16 q4,  q5,  q6,  q7, {d0-d3}, {d4-d7}, q14, q15, \bit_depth, \
++            "pld       [r4]",                                                      \
++            "it ne; addne r4, r3"
++        sao_band_64b_16 q8,  q9, q10, q11, {d0-d3}, {d4-d7}, q14, q15, \bit_depth
++
++        vst2.16   { q4, q5 }, [r0, :128], lr
++        vst2.16   { q6, q7 }, [r5, :128], lr
++        vst2.16   { q8, q9 }, [r0, :128], r2
++        vst2.16   {q10, q11}, [r5, :128], r2
++
++        bpl       1b
++
++        vpop      {q4-q7}
++        pop       {r4-r6, pc}
++.endm
++
++function ff_hevc_rpi_sao_band_c_32_neon_10, export=1
++        band_c_32_16 10
++endfunc
++
++
++@ ff_hevc_rpi_sao_band_c_16_neon_10(
++@   uint8_t * dst          [r0]
++@   uint8_t * src          [r1]
++@   uint32_t dst_stride    [r2]
++@   uint32_t src_stride    [r3]
++@   const int16_t * table1 sp[0]
++@   uint32_t offset1       sp[4]
++@   const int16_t * table2 sp[8]
++@   uint32_t offset2       sp[12]
++@   int width              sp[16]
++@   int height             sp[20]
++
++.macro band_c_16_16 bit_depth
++        push      {r4-r6, lr}
++        add       r5, r0, #32
++        add       r6, r1, #32
++        vmov.i64  q14, #0
++        vmov.i16  q15, #(1 << \bit_depth) - 1
++        bl        band_load_c
++
++1:      vld2.16   { q8, q9 }, [r1, :128], r3
++        subs      ip, #1
++        vld2.16   {q10, q11}, [r6, :128], r3
++
++        sao_band_64b_16 q4,  q5,  q6,  q7, {d0-d3}, {d4-d7}, q14, q15, \bit_depth
++        sao_band_64b_16 q8,  q9, q10, q11, {d0-d3}, {d4-d7}, q14, q15, \bit_depth
++
++        vst2.16   { q8, q9 }, [r0, :128], r2
++        vst2.16   {q10, q11}, [r5, :128], r2
++
++        bpl       1b
++        pop       {r4-r6, pc}
++.endm
++
++function ff_hevc_rpi_sao_band_c_16_neon_10, export=1
++        band_c_16_16 10
++endfunc
++
++
++@ ff_hevc_rpi_sao_band_c_8_neon_10(
++@   uint8_t * dst          [r0]
++@   uint8_t * src          [r1]
++@   uint32_t dst_stride    [r2]
++@   uint32_t src_stride    [r3]
++@   const int16_t * table1 sp[0]
++@   uint32_t offset1       sp[4]
++@   const int16_t * table2 sp[8]
++@   uint32_t offset2       sp[12]
++@   int width              sp[16]
++@   int height             sp[20]
++
++.macro band_c_8_16 bit_depth
++        ldr       ip, [sp, #16]         @ width
++        push      {r4-r6, lr}
++        vmov.i64  q14, #0
++        cmp       ip, #8
++        vmov.i16  q15, #(1 << \bit_depth) - 1
++        bl        band_load_c
++        blt       4f
++
++        sao_band_32b_16 q8, q9, {d0-d3}, {d4-d7}, q14, q15, \bit_depth, \
++            "vld2.16   {q8,q9}, [r1, :128], r3",                        \
++            "subs      ip, #1",                                         \
++            "",                                                         \
++            "",                                                         \
++            "",                                                         \
++            "vst2.16   {q10,q11}, [r0, :128], r2"
++        pop       {r4-r6, pc}
++4:
++        add       r5, r0, r2
++        add       r6, r1, r3
++        lsl       r2, #1
++        lsl       r3, #1
++        sao_band_32b_16 q8, q9, {d0-d3}, {d4-d7}, q14, q15, \bit_depth, \
++            "vld2.16   {d16,d18}, [r1, :128], r3",                      \
++            "subs      ip, #2",                                         \
++            "vld2.16   {d17,d19}, [r6, :128], r3",                      \
++            "",                                                         \
++            "",                                                         \
++            "vst2.16   {d20,d22}, [r0, :128], r2",                      \
++            "vst2.16   {d21,d23}, [r5, :128], r2"
++        pop       {r4-r6, pc}
++.endm
++
++function ff_hevc_rpi_sao_band_c_8_neon_10, export=1
++        band_c_8_16 10
++endfunc
++
++
++@ =============================================================================
++@ SAO EDGE
++
++@ r0    destination address
++@ r2    stride to post-increment r0 with
++@ [r5]  translate values
++@
++@ a <- c <- b
++@ a in q0 - q3
++@ c in q4 - q7
++@ b in q8 - q11
++@
++@ q12-15 used as temp
++@
++@ Can be used for both Y & C as we unzip/zip the deltas and
++@ transform "u/v" separately via d26/d27.  For Y d26=d27
++
++function edge_64b_body_8
++
++        vcgt.u8 q12,  q4,  q0   @ c > a -> -1 , otherwise 0
++        vcgt.u8 q13,  q5,  q1
++        vcgt.u8 q14,  q6,  q2
++        vcgt.u8 q15,  q7,  q3
++
++        vcgt.u8  q0,  q4        @ a > c -> -1 , otherwise 0
++        vcgt.u8  q1,  q5
++        vcgt.u8  q2,  q6
++        vcgt.u8  q3,  q7
++
++        vsub.s8  q0,  q12       @ a = sign(c-a)
++        vsub.s8  q1,  q13
++        vsub.s8  q2,  q14
++        vsub.s8  q3,  q15
++
++        vcgt.u8  q12, q4,  q8   @ c > b -> -1 , otherwise 0
++        vcgt.u8  q13, q5,  q9
++        vcgt.u8  q14, q6,  q10
++        vcgt.u8  q15, q7,  q11
++
++        vsub.s8  q0,  q12
++        vsub.s8  q1,  q13
++        vsub.s8  q2,  q14
++        vsub.s8  q3,  q15
++
++        vcgt.u8  q12, q8,  q4   @ c < b -> -1 , otherwise 0
++        vcgt.u8  q13, q9,  q5
++        vcgt.u8  q14, q10, q6
++        vcgt.u8  q15, q11, q7
++
++        vadd.s8  q0,  q12       @ a = sign(c-a) + sign(c-b)
++        vadd.s8  q1,  q13
++        vmov.u8  q12, #2
++        vadd.s8  q2,  q14
++        vadd.s8  q3,  q15
++
++        vadd.s8  q0,  q12
++        vadd.s8  q1,  q12
++
++        vld1.8   {d26, d27}, [r5]
++
++        vadd.s8  q2,  q12
++        vuzp.8   q0,  q1
++        vmov.u8  q15, #128
++        vadd.s8  q3,  q12       @ a = 2 + sign(c-a) + sign(c-b)
++
++        vtbl.8   d0,  {d26}, d0
++        vadd.s8  q12, q4, q15   @ Add -128 so we can use saturating signed add
++
++        vtbl.8   d1,  {d26}, d1
++        vadd.s8  q14, q5, q15
++
++        vtbl.8   d2,  {d27}, d2
++        vuzp.8   q2,  q3
++
++        vtbl.8   d3,  {d27}, d3
++
++        vtbl.8   d4,  {d26}, d4
++        vzip.8   q0,  q1
++
++        vtbl.8   d5,  {d26}, d5
++        vqadd.s8 q0,  q12
++        vqadd.s8 q1,  q14
++        vadd.s8  q12, q6, q15   @ Add -128 so we can use saturating signed add
++
++        vtbl.8   d6,  {d27}, d6
++        vtbl.8   d7,  {d27}, d7
++        vadd.s8  q14, q7, q15   @ Add -128 so we can use saturating signed add
++        vzip.8   q2,  q3
++
++        vsub.s8  q0,  q15
++        vqadd.s8 q2,  q12
++        vqadd.s8 q3,  q14
++        vsub.s8  q1,  q15
++        vsub.s8  q2,  q15
++        vsub.s8  q3,  q15
++
++        bx      lr
++endfunc
++
++@ r0    destination address
++@ r2    stride to post-increment r0 with
++@ r4    upper clip value
++@ [r5]  translate values
++@
++@ a <- c <- b
++@ a in q0 - q3
++@ c in q4 - q7
++@ b in q8 - q11
++@
++@ q12-15 used as temp
++@
++@ Can be used for both Y & C as we unzip/zip the deltas and
++@ transform "u/v" separately via d26/d27.  For Y d26=d27
++
++function edge_64b_body_16
++
++        vcgt.u16 q12, q4, q0  // c > a -> -1 , otherwise 0
++        vcgt.u16 q13, q5, q1
++        vcgt.u16 q14, q6, q2
++        vcgt.u16 q15, q7, q3
++
++        vcgt.u16 q0, q0, q4  // a > c -> -1 , otherwise 0
++        vcgt.u16 q1, q1, q5
++        vcgt.u16 q2, q2, q6
++        vcgt.u16 q3, q3, q7
++
++        vsub.s16 q0, q0, q12 // a = sign(c-a)
++        vsub.s16 q1, q1, q13
++        vsub.s16 q2, q2, q14
++        vsub.s16 q3, q3, q15
++
++        vcgt.u16 q12, q4, q8  // c > b -> -1 , otherwise 0
++        vcgt.u16 q13, q5, q9
++        vcgt.u16 q14, q6, q10
++        vcgt.u16 q15, q7, q11
++
++        vsub.s16 q0, q0, q12
++        vsub.s16 q1, q1, q13
++        vsub.s16 q2, q2, q14
++        vsub.s16 q3, q3, q15
++
++        vcgt.u16 q12, q8, q4  // c < b -> -1 , otherwise 0
++        vcgt.u16 q13, q9, q5
++        vcgt.u16 q14, q10, q6
++        vcgt.u16 q15, q11, q7
++
++        vadd.s16 q0, q0, q12  // a = sign(c-a) + sign(c-b)
++        vadd.s16 q1, q1, q13
++        vadd.s16 q2, q2, q14
++        vadd.s16 q3, q3, q15
++
++        vmov.u8  q12, #2
++
++        vmovn.s16 d0, q0
++        vmovn.s16 d1, q1
++        vmovn.s16 d2, q2
++        vmovn.s16 d3, q3
++
++        vldr     d26, [r5]
++
++        vuzp.8   q0, q1
++
++        vldr     d27, [r5, #8]
++
++        vadd.s8  q0, q0, q12
++        vadd.s8  q1, q1, q12
++
++        vmov.i64 q12, #0
++
++        vtbl.8   d0, {d26}, d0
++        vtbl.8   d1, {d26}, d1
++        vtbl.8   d2, {d27}, d2
++        vtbl.8   d3, {d27}, d3
++
++        vdup.i16 q13, r4
++
++        vzip.8   q0, q1
++
++        @ Avoid overwrite whilst widening
++        vaddw.s8 q2, q6, d2
++        vaddw.s8 q3, q7, d3
++        vaddw.s8 q1, q5, d1
++        vaddw.s8 q0, q4, d0
++
++        @ now clip
++        clip16_4 q2, q3, q1, q0, q12, q13
++
++        bx       lr
++endfunc
++
++
++@ a <- c <- b
++@ a in q0
++@ c in q1
++@ b in q2
++@ Temp q3, q9, q10
++@
++@ d16, d17 (q8) xlat U, V
++@ q14.u8 #2
++@ q15.u8 #128
++
++function edge_16b_body_8
++        vcgt.u8  q9,  q0,  q1   @ a > c -> -1 , otherwise 0
++        vadd.u8  q9,  q14, q9
++        vcgt.u8  q0,  q1,  q0   @ c > a -> -1 , otherwise 0
++        vsub.u8  q9,  q9,  q0
++        vcgt.u8  q0,  q2,  q1   @ c < b -> -1 , otherwise 0
++        vadd.u8  q9,  q9,  q0
++        vcgt.u8  q0,  q1,  q2   @ c > b -> -1 , otherwise 0
++        vsub.u8  q0,  q9,  q0
++
++        vadd.s8  q3,  q1, q15   @ Add -128 so we can use saturating signed add
++
++        vuzp.8   d0,  d1
++
++        vtbl.8   d0,  {d16}, d0
++        vtbl.8   d1,  {d17}, d1
++
++        vzip.8   d0,  d1
++        vqadd.s8 q0,  q3
++        vsub.s8  q0,  q15
++
++        bx      lr
++endfunc
++
++@ a <- c <- b
++@ a in q0
++@ c in q1
++@ b in q2
++@ Temp q3
++@
++@ q12, #0
++@ d16, d17 xlat U, V
++@ q14.u8 #2
++@ q15.u16 max
++function edge_16b_body_16
++        vcgt.u16 q9, q0, q1     @ a > c -> -1 , otherwise 0
++        vadd.u16 q9, q14, q9
++        vcgt.u16 q0, q1, q0     @ c > a -> -1 , otherwise 0
++        vsub.u16 q9, q9, q0
++        vcgt.u16 q0, q2, q1     @ c < b -> -1 , otherwise 0
++        vadd.u16 q9, q9, q0
++        vcgt.u16 q0, q1, q2     @ c > b -> -1 , otherwise 0
++        vsub.u16 q0, q9, q0
++
++        vmovn.s16 d0, q0
++        @ d1 will have random contents that we transform but
++        @ that doesn't matter as we then discard them
++        vuzp.8   d0, d1
++
++        vtbl.8   d0, {d16}, d0
++        vtbl.8   d1, {d17}, d1
++
++        vzip.8   d0, d1
++
++        vaddw.s8 q0, q1, d0
++
++        @ now clip
++        vmax.s16 q0, q12
++        vmin.s16 q0, q15
++        bx       lr
++endfunc
++
++
++@ ff_hevc_rpi_sao_edge_[c_]xx_neon(
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   const int16_t *_sao_offset_val_u, [r3]
++@   const int16_t *_sao_offset_val_v, [sp, #0]   // Chroma only
++@   int eo,                           [sp, #sp_base + 0]
++@   int width,                        [sp, #sp_base + 4]
++@   int height)                       [sp, #sp_base + 8]
++
++@ Jumps via jump_tab with
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   EDGE_SRC_STRIDE                   [r3]
++@   (1 << \bit_depth) - 1             [r4]
++@   * xlat_table                      [r5]  // setup_64b only
++@   int height                        [r12]
++@
++@   0                                 [q12] // > 8 bit
++@   2                                 [q14]
++@   128                               [q15] // = 8 bit
++@   r4                                [q15] // > 8 bit
++
++.macro  edge_xxb_init, bit_depth, is_chroma, jump_tab, setup_64b = 0, setup_16b = 0, check_w4 = 0, do2 = 0, xjump = 0
++
++@ Build translate registers
++@ As translate values can only be 0-4 we don't care about junk in the rest
++@ of the register
++.if \is_chroma
++        ldr      ip, [sp, #0]
++        push     {r4-r6, lr}    @ 16 bytes
++        vld1.8   {d16[2]}, [r3]
++        add      r3, r3, #2
++        vld1.8   {d17[2]}, [ip]
++        add      ip, ip, #2
++        vld1.8   {d16[0]}, [r3]
++        add      r3, r3, #2
++        vld1.8   {d17[0]}, [ip]
++        add      ip, ip, #2
++        vld1.8   {d16[1]}, [r3]
++        add      r3, r3, #2
++        vld1.8   {d17[1]}, [ip]
++        add      ip, ip, #2
++        vld1.8   {d16[3]}, [r3]
++        add      r3, r3, #2
++        vld1.8   {d17[3]}, [ip]
++        add      ip, ip, #2
++        vld1.8   {d16[4]}, [r3]
++        vld1.8   {d17[4]}, [ip]
++        movw     r3, EDGE_SRC_STRIDE
++.set sp_base, 20
++.else
++        add      ip, r3, #4
++        vld1.8   {d16[1]}, [r3]
++        add      r3, r3, #2
++        vld1.8   {d17[0]}, [ip]
++        add      ip, ip, #2
++        vld1.8   {d16[0]}, [r3]
++        add      r3, r3, #6
++        vld1.8   {d17[1]}, [ip]
++        vld1.8   {d16[2]}, [r3]
++        movw     r3, EDGE_SRC_STRIDE
++        push     {r4-r6, lr}    @ 16 bytes
++        vzip.8   d16, d17
++        vmov     d17, d16
++.set sp_base, 16
++.endif
++
++@ If setup_64b we need the xlat table on the stack
++.if \setup_64b
++        sub      r5, sp, #16
++.endif
++
++@ Get jump address
++@ We have a special case for width 4 as the calling code doesn't detect it
++@ If we may have w4 then we add a 2nd jump table after the 1st
++.if \check_w4
++        ldr      r12, [sp, #sp_base + 4]        @ width
++        adr      r6, \jump_tab
++        ldr      lr, [sp, #sp_base + 0]        @ e0
++        cmp      r12, #8
++        it lt
++        addlt    r6, #16
++.else
++        ldr      lr, [sp, #sp_base + 0]        @ e0
++        adr      r6, \jump_tab
++.endif
++
++        ldr      r12, [sp, #sp_base + 8]        @ height
++
++.if \bit_depth > 8
++        movw     r4, (1 << \bit_depth) - 1
++.endif
++.if \setup_16b
++.if \bit_depth > 8
++        vmov.i64 q12, #0
++        vdup.16  q15, r4
++        vmov.u16 q14, #2
++.else
++        vmov.u8  q15, #128
++        vmov.u8  q14, #2
++.endif
++.endif
++
++@ If setup_64b we need q4-q7 saved.
++.if \setup_64b
++        vpush    {q4-q8}        @ 80 bytes, q8 pushed first
++.set sp_base, sp_base + 80
++.endif
++
++        ldr      r6, [r6, lr, lsl #2]
++
++@ For 16 bit width 64 (or chroma 32) we need to do this in 2 passes
++.if \do2
++        push     {r0, r1, r6, r12}
++.if jent_pic
++        bl       98f
++.else
++        blx      r6
++.endif
++        pop      {r0, r1, r6, r12}
++
++        add      r0, #64
++        add      r1, #64
++.endif
++
++.if jent_pic
++        bl       98f
++.else
++        blx      r6
++.endif
++
++@ Tidy up & return
++.if \setup_64b
++        vpop     {q4-q8}        @ spurious but harmless load of q8
++.endif
++        pop      {r4-r6, pc}
++
++.if jent_pic && !\xjump
++@ Magic label - used as 98b in jent macro
++98:
++        add      pc, r6
++.endif
++.endm
++
++
++.macro  edge_16b_init, bit_depth, is_chroma, check_w4, jump_tab
++        edge_xxb_init \bit_depth, \is_chroma, \jump_tab, check_w4=\check_w4, setup_16b=1
++.endm
++
++.macro  edge_64b_init, bit_depth, is_chroma, do2, jump_tab, xjump=0
++        edge_xxb_init \bit_depth, \is_chroma, \jump_tab, do2=\do2, setup_64b=1, xjump=\xjump
++.endm
++
++
++.macro  edge_64b_e0, body_fn, pb
++        sub      r1, #8
++        mov      r6, lr
++1:      vldm     r1, {d7-d16}
++        // load a
++        vext.8   q0,  q3,  q4, #(16 - \pb)
++        add      r1, r3
++        vext.8   q1,  q4,  q5, #(16 - \pb)
++        subs     r12, #1
++        vext.8   q2,  q5,  q6, #(16 - \pb)
++        vext.8   q3,  q6,  q7, #(16 - \pb)
++        pld      [r1]
++        // load b
++        vext.8   q11, q7,  q8, #\pb     @ Avoid overwrite
++        pld      [r1, #64]
++        vext.8   q8,  q4,  q5, #\pb
++        vext.8   q9,  q5,  q6, #\pb
++        vext.8   q10, q6,  q7, #\pb
++        bl       \body_fn
++        vstm     r0, {q0-q3}
++        add      r0, r0, r2
++        bgt      1b
++        bx       r6
++.endm
++
++.macro  edge_32bx2_e0, body_fn, pb
++        add      r6, r1, r3
++        push     {r7,lr}
++        sub      r1, #8
++        add      r7, r0, r2
++        lsl      r2, #1
++1:      vldmia   r1, {d7-d12}
++        // load a
++        vext.8   q0, q3, q4, #16 - \pb
++        add      r1, r1, r3, lsl #1
++        vext.8   q1, q4, q5, #16 - \pb
++        subs     r12, #2
++        // load b
++        vext.8   q8, q4, q5, #\pb
++        vext.8   q9, q5, q6, #\pb
++        vldr     d25, [r6, #-8]
++        vldmia   r6, {d12-d15}
++        vldr     d26, [r6, #32]
++        // load a
++        vext.8   q2, q12, q6, #16 - \pb
++        add      r6, r6, r3, lsl #1
++        vext.8   q3, q6, q7, #16 - \pb
++        // load b
++        vext.8   q10, q6, q7, #\pb
++        vext.8   q11, q7, q13, #\pb
++        bl       \body_fn
++        vst1.8   {q0-q1}, [r0, :256], r2
++        vst1.8   {q2-q3}, [r7, :256], r2
++        bgt      1b
++        pop      {r7,pc}
++.endm
++
++.macro  edge_16b_e0, body_fn, pb
++        sub      r1, #8
++        mov      r6, lr
++1:      vldmia   r1, {d1-d4}
++        add      r1, r3
++        subs     r12, #1
++        vext.8   q0, q0, q1, #16 - \pb
++        vext.8   q2, q1, q2, #\pb
++
++        bl       \body_fn
++        vst1.8   {q0}, [r0, :128], r2
++        bgt      1b
++        bx       r6
++.endm
++
++.macro  edge_8bx2_e0, body_fn, pb
++        add      r6, r1, r3
++        push     {r7,lr}
++        sub      r1, #8
++        add      r7, r0, r2
++        lsl      r2, #1
++1:      vldmia   r1, {d1-d2}
++        vldmia   r6, {d3-d4}
++        vldr     d6, [r1, #16]
++        subs     r12, #2
++        vldr     d7, [r6, #-8]
++        add      r1, r1, r3, lsl #1
++        vext.8   d0, d1, d2, #8 - \pb
++        add      r6, r6, r3, lsl #1
++        vext.8   d5, d3, d4, #\pb
++        vext.8   d4, d2, d6, #\pb
++        vext.8   d1, d7, d3, #8 - \pb
++
++        bl       \body_fn
++        vst1.8   {d0}, [r0, :64], r2
++        vst1.8   {d1}, [r7, :64], r2
++        bgt      1b
++        pop      {r7,pc}
++.endm
++
++.macro  edge_4bx4_e0, body_fn, pb
++        add      r6, r1, r3
++        push     {r7,lr}
++        add      r7, r0, r2
++        lsl      r2, #1
++
++        tst      r1, #4
++        bne      2f
++1:      // r1 (and assumed r6) are 64-bit aligned
++        vldr     d2, [r1]
++        vldr     d0, [r1, #-8]
++        add      r1, r1, r3, lsl #1
++        vldr     d20, [r6]
++        subs     r12, #4
++        vldr     d18, [r6, #-8]
++        add      r6, r6, r3, lsl #1
++        vldr     d3, [r1]
++        vshr.u64 d4, d2, #\pb * 8
++        vldr     d1, [r1, #-8]
++        add      r1, r1, r3, lsl #1
++        vldr     d21, [r6]
++        vext.8   d0, d0, d2, #8 - \pb
++        vldr     d19, [r6,#-8]
++        add      r6, r6, r3, lsl #1
++        vshr.u64 d22, d20, #\pb * 8
++        vext.8   d18, d18, d20, #8 - \pb
++        vshr.u64 d5, d3, #\pb * 8
++        vext.8   d1, d1, d3, #8 - \pb
++        vshr.u64 d23, d21, #\pb * 8
++        vext.8   d19, d19, d21, #8 - \pb
++        vsli.64  q1, q10, #32
++        vsli.64  q2, q11, #32
++        vsli.64  q0, q9, #32
++
++        bl       \body_fn
++        vst1.32  {d0[0]}, [r0, :32], r2
++        vst1.32  {d0[1]}, [r7, :32], r2
++        vst1.32  {d1[0]}, [r0, :32], r2
++        vst1.32  {d1[1]}, [r7, :32], r2
++        bgt      1b
++        pop      {r7,pc}
++
++2:      // r1 (and assumed r6) are 32-bit but not 64-bit aligned
++        vldr     d20, [r1, #-4]
++        vldr     d22, [r1, #4]
++        add      r1, r1, r3, lsl #1
++        vldr     d2, [r6, #-4]
++        subs     r12, #4
++        vldr     d4, [r6, #4]
++        add      r6, r6, r3, lsl #1
++        vldr     d21, [r1, #-4]
++        vshl.i64 d18, d20, #\pb * 8
++        vldr     d23, [r1, #4]
++        add      r1, r1, r3, lsl #1
++        vldr     d3, [r6, #-4]
++        vext.8   d22, d20, d22, #\pb
++        vldr     d5, [r6, #4]
++        add      r6, r6, r3, lsl #1
++        vshl.i64 d0, d2, #\pb * 8
++        vext.8   d4, d2, d4, #\pb
++        vshl.i64 d19, d21, #\pb * 8
++        vext.8   d23, d21, d23, #\pb
++        vshl.i64 d1, d3, #\pb * 8
++        vext.8   d5, d3, d5, #\pb
++        vsri.64  q1, q10, #32
++        vsri.64  q0, q9, #32
++        vsri.64  q2, q11, #32
++
++        bl       \body_fn
++        vst1.32  {d0[0]}, [r0, :32], r2
++        vst1.32  {d0[1]}, [r7, :32], r2
++        vst1.32  {d1[0]}, [r0, :32], r2
++        vst1.32  {d1[1]}, [r7, :32], r2
++        bgt      2b
++        pop      {r7,pc}
++.endm
++
++
++.macro  edge_64b_e1, body_fn
++        sub      r1, r3
++        push     {lr}
++        add      r6, r1, #32
++        // load a
++        vld1.8   {q0-q1}, [r1, :256], r3
++        vld1.8   {q2-q3}, [r6, :256], r3
++        // load c
++        vld1.8   {q4-q5}, [r1, :256], r3
++        vld1.8   {q6-q7}, [r6, :256], r3
++1:      // load b
++        vld1.8   {q8-q9}, [r1, :256], r3
++        subs     r12, #1
++        vld1.8   {q10-q11}, [r6, :256], r3
++        bl       \body_fn
++        vstm     r0, {q0-q3}
++        // copy c to a
++        vmov.64  q0, q4
++        pld      [r1, r3]
++        vmov.64  q1, q5
++        it       le
++        pople    {lr}
++        vmov.64  q2, q6
++        it       le
++        bxle     lr
++        vmov.64  q3, q7
++        add      r0, r0, r2
++        // copy b to c
++        vmov.64  q4, q8
++        vmov.64  q5, q9
++        vmov.64  q6, q10
++        vmov.64  q7, q11
++        b        1b
++.endm
++
++.macro  edge_32bx2_e1, body_fn
++        sub      r6, r1, r3
++        vld1.8   {q2-q3}, [r1, :256], r3
++        vld1.8   {q0-q1}, [r6, :256]
++        mov      r6, lr
++
++1:      @ Given the data duplication here we could obviously do better than
++        @ using the generic body_fn but it almost certainly isn't worth it
++        vld1.8   {q8-q9}, [r1, :256], r3
++        subs     r12, #2
++        vmov     q4, q2
++        vmov     q5, q3
++        vld1.8   {q10-q11}, [r1, :256], r3
++        vmov     q6, q8
++        vmov     q7, q9
++
++        bl       \body_fn
++
++        vst1.8   {q0-q1}, [r0, :256], r2
++        // copy b to a
++        vmov     q0, q8
++        vmov     q1, q9
++        vst1.8   {q2-q3}, [r0, :256], r2
++        vmov     q2, q10
++        it       le
++        bxle     r6
++        vmov     q3, q11
++        b        1b
++.endm
++
++.macro  edge_16b_e1, body_fn
++        sub      r6, r1, r3
++        // load c
++        vld1.8   {q1}, [r1, :128], r3
++        // load a
++        vld1.8   {q0}, [r6, :128]
++        mov      r6, lr
++1:      // load b
++        vld1.8   {q2}, [r1, :128], r3
++        bl       \body_fn
++        vst1.8   {q0}, [r0, :128], r2
++        subs     r12, #1
++        // copy c to a
++        vmov.64  q0, q1
++        it       le
++        bxle     r6
++        // copy b to c
++        vmov.64  q1, q2
++        b        1b
++.endm
++
++.macro  edge_8bx2_e1, body_fn
++        sub      r6, r1, r3
++        lsl      r3, #1
++        push     {r7, lr}
++        vld1.8   {d1}, [r1, :64], r3
++        vld1.8   {d0}, [r6, :64], r3
++        add      r7, r0, r2
++        lsl      r2, #1
++1:      @ Given the data duplication here we could obviously do better than
++        @ using the generic body_fn but it almost certainly isn't worth it
++        vld1.8   {d4}, [r6, :64], r3
++        vmov     d2, d1
++        vld1.8   {d5}, [r1, :64], r3
++        subs     r12, #2
++        vmov     d3, d4
++
++        bl       \body_fn
++
++        vst1.8   {d0}, [r0, :64], r2
++        vst1.8   {d1}, [r7, :64], r2
++
++        // copy b to a
++        vmov     q0, q2
++        bgt      1b
++        pop      {r7, pc}
++.endm
++
++.macro  edge_4bx4_e1, body_fn
++        sub      r6, r1, r3
++        lsl      r3, #1
++        push     {r7, lr}
++        vld1.32  {d0[1]}, [r1, :32], r3
++        add      r7, r0, r2
++        vld1.32  {d0[0]}, [r6, :32], r3
++        lsl      r2, #1
++        vld1.32  {d4[1]}, [r1, :32], r3
++        vld1.32  {d4[0]}, [r6, :32], r3
++        vld1.32  {d5[1]}, [r1, :32], r3
++        vld1.32  {d5[0]}, [r6, :32], r3
++        vmov     d1, d4
++        vext.32  d2, d0, d4, #1
++        subs     r12, #4
++        vmov     d22, d5
++        vext.32  d3, d4, d5, #1
++        b        2f
++
++1:      vst1.32  {d0[0]}, [r0, :32], r2
++        vext.32  d2, d22, d4, #1
++        vst1.32  {d0[1]}, [r7, :32], r2
++        vmov     d0, d22
++        vst1.32  {d1[0]}, [r0, :32], r2
++        vext.32  d3, d4, d5, #1
++        vst1.32  {d1[1]}, [r7, :32], r2
++        vmov     d1, d4
++        vmov     d22, d5
++2:      @ Given the data duplication here we could probably do better than
++        @ using the generic body_fn but it almost certainly isn't worth it
++        bl       \body_fn
++        ble      3f
++        vld1.32  {d4[0]}, [r6, :32], r3
++        subs     r12, #4
++        vld1.32  {d4[1]}, [r1, :32], r3
++        vld1.32  {d5[0]}, [r6, :32], r3
++        vld1.32  {d5[1]}, [r1, :32], r3
++        b        1b
++
++3:      vst1.32  {d0[0]}, [r0, :32], r2
++        vst1.32  {d0[1]}, [r7, :32], r2
++        vst1.32  {d1[0]}, [r0, :32]
++        vst1.32  {d1[1]}, [r7, :32]
++        pop      {r7, pc}
++.endm
++
++.macro  edge_64b_e2, body_fn, pb
++        push     {lr}
++        sub      r6, r1, r3
++        // load c and a
++        vld1.8   {q4-q5}, [r1, :128]
++        vldr     d25, [r6, #-8]
++        vldmia   r6, {d16-d23}
++        vext.8   q0, q12, q8, #16 - \pb
++        add      r6, r1, #32
++        vext.8   q1, q8, q9, #16 - \pb
++        add      r1, r1, r3
++        vext.8   q2, q9, q10, #16 - \pb
++        vld1.8   {q6-q7}, [r6, :128]
++        sub      r6, r1, r3
++        vext.8   q3, q10, q11, #16 - \pb
++
++1:      // load b
++        vldmia   r1, {d16-d24}
++        vext.8   q8, q8, q9, #\pb
++        pld      [r1, r3]
++        vext.8   q9, q9, q10, #\pb
++        subs     r12, #1
++        vext.8   q10, q10, q11, #\pb
++        vext.8   q11, q11, q12, #\pb
++        bl       \body_fn
++        // next a is mostly available in c
++        vldr     d25, [r6, #-8]
++        vstmia   r0, {q0-q3}
++        vext.8   q3, q6, q7, #16 - \pb
++        it       le
++        pople    {lr}
++        vext.8   q2, q5, q6, #16 - \pb
++        it       le
++        bxle     lr
++        vext.8   q1, q4, q5, #16 - \pb
++        add      r6, r6, r3
++        vext.8   q0, q12, q4, #16 - \pb
++        add      r0, r0, r2
++        // next c is mostly available in b
++        vldr     d8, [r1]
++        vext.8   d9, d16, d17, #8 - \pb
++        vext.8   q5, q8, q9, #16 - \pb
++        add      r1, r1, r3
++        vext.8   q6, q9, q10, #16 - \pb
++        pld      [r6, #-8]
++        vext.8   q7, q10, q11, #16 - \pb
++        b        1b
++.endm
++
++.macro  edge_32bx2_e2, body_fn, pb
++        sub      r6, r1, r3
++        push     {r7, lr}
++        add      r7, r0, r2
++        lsl      r2, #1
++        // load a and first 32b of c
++        vld1.8   {q4-q5}, [r1, :256]
++        vldr     d25, [r6, #-8]
++        vld1.8   {q13-q14}, [r6, :256]
++        vldr     d31, [r1, #-8]
++        add      r6, r6, r3, lsl #1
++        vext.8   q0, q12, q13, #16 - \pb
++        add      r1, r1, r3, lsl #1
++        vext.8   q1, q13, q14, #16 - \pb
++        vext.8   q2, q15, q4, #16 - \pb
++        vext.8   q3, q4, q5, #16 - \pb
++1:
++        // load second 32b of c and second 32b of b
++        vldmia   r6, {d12-d16}
++        vldmia   r1, {d20-d24}
++        // first 32b of b is mostly available in second 32b of c
++        vext.8   q9, q7, q8, #\pb
++        subs     r12, #2
++        vext.8   q8, q6, q7, #\pb
++        vext.8   q10, q10, q11, #\pb
++        vext.8   q11, q11, q12, #\pb
++
++        bl       \body_fn
++
++        vst1.8   {q0-q1}, [r0, :256], r2
++        vst1.8   {q2-q3}, [r7, :256], r2
++        ble      2f
++
++        vldr     d25, [r6, #-8]
++        add      r6, r6, r3, lsl #1
++        vldr     d8, [r1]
++        vext.8   d9, d20, d21, #8 - \pb
++        vldr     d31, [r1, #-8]
++        add      r1, r1, r3, lsl #1
++        // first 32b of a is mostly available in second 32b of c
++        vext.8   q1, q6, q7, #16 - \pb
++        vext.8   q0, q12, q6, #16 - \pb
++        // first 32b of c is mostly available in second 32b of b
++        vext.8   q5, q10, q11, #16 - \pb
++        // second 32b of a is mostly available in first 32b of c
++        vext.8   q2, q15, q4, #16 - \pb
++        vext.8   q3, q4, q5, #16 - \pb
++        b        1b
++
++2:      pop      {r7, pc}
++.endm
++
++.macro  edge_16b_e2, body_fn, pb
++        push     {lr}
++        sub      r6, r1, r3
++        vld1.8   {q1}, [r1, :128], r3
++        vldr     d19, [r6, #-8]
++        vld1.8   {q10}, [r6, :128], r3
++
++1:      vldmia   r1, {d4-d6}
++        vext.8   q0, q9, q10, #16 - \pb
++        subs     r12, #1
++        vext.8   q2, q2, q3, #\pb
++        bl       \body_fn
++        vst1.8   {q0}, [r0, :128], r2
++        ble      2f
++        vmov     q10, q1
++        vldr     d2, [r1]
++        add      r1, r1, r3
++        vldr     d19, [r6, #-8]
++        add      r6, r6, r3
++        vext.8   d3, d4, d5, #8 - \pb
++        b        1b
++
++2:      pop      {pc}
++.endm
++
++.macro  edge_8bx2_e2, body_fn, pb
++        sub      r6, r1, r3
++        push     {r7, lr}
++        add      r7, r0, r2
++        lsl      r2, #1
++        vldr     d18, [r6, #-8]
++        vldr     d19, [r6]
++        add      r6, r6, r3, lsl #1
++        vldr     d20, [r1, #-8]
++        vldr     d2, [r1]
++        add      r1, r1, r3, lsl #1
++        vldmia   r6, {d3-d4}
++        vld1.8   {d21-d22}, [r1, :128]
++
++1:      vext.8   d0, d18, d19, #8 - \pb
++        vext.8   d4, d3, d4, #\pb
++        vext.8   d1, d20, d2, #8 - \pb
++        subs     r12, #2
++        vext.8   d5, d21, d22, #\pb
++
++        bl       \body_fn
++
++        vst1.8   {d0}, [r0, :64], r2
++        vst1.8   {d1}, [r7, :64], r2
++        ble      2f
++
++        vldr     d18, [r6, #-8]
++        add      r6, r6, r3, lsl #1
++        vldr     d20, [r1, #-8]
++        vmov     d19, d3
++        vldr     d2, [r1]
++        add      r1, r1, r3, lsl #1
++        vldmia   r6, {d3-d4}
++        vld1.8   {d21-d22}, [r1, :128]
++        b        1b
++
++2:      pop      {r7, pc}
++.endm
++
++.macro  edge_4bx4_e2, body_fn, pb
++        sub      r6, r1, r3
++        push     {r7-r9, lr}
++        add      r8, r1, r3
++        sub      r6, r6, #\pb
++        add      r8, r8, #\pb
++        add      r7, r0, r2
++        lsl      r2, #1
++
++1:      vld1.32  {d0[0]}, [r6], r3
++        subs     r12, #4
++        vld1.32  {d2[0]}, [r1], r3
++        vld1.32  {d4[0]}, [r8], r3
++        vld1.32  {d0[1]}, [r6], r3
++        vld1.32  {d2[1]}, [r1], r3
++        vld1.32  {d4[1]}, [r8], r3
++        vld1.32  {d1[0]}, [r6], r3
++        vld1.32  {d3[0]}, [r1], r3
++        vld1.32  {d5[0]}, [r8], r3
++        vld1.32  {d1[1]}, [r6], r3
++        vld1.32  {d3[1]}, [r1], r3
++        vld1.32  {d5[1]}, [r8], r3
++
++        bl       \body_fn
++
++        vst1.32  {d0[0]}, [r0, :32], r2
++        vst1.32  {d0[1]}, [r7, :32], r2
++        vst1.32  {d1[0]}, [r0, :32], r2
++        vst1.32  {d1[1]}, [r7, :32], r2
++        bgt      1b
++
++        pop      {r7-r9,pc}
++.endm
++
++.macro  edge_64b_e3, body_fn, pb
++        push     {lr}
++        sub      r6, r1, r3
++        // load c and a
++        vld1.8   {q4-q5}, [r1, :128]
++        vldmia   r6, {d16-d24}
++        vext.8   q0, q8, q9, #\pb
++        add      r6, r1, #32
++        vext.8   q1, q9, q10, #\pb
++        add      r1, r1, r3
++        vext.8   q2, q10, q11, #\pb
++        vld1.8   {q6-q7}, [r6, :128]
++        sub      r6, r1, r3
++        vext.8   q3, q11, q12, #\pb
++
++1:      // load b
++        vldr     d17, [r1, #-8]
++        vldmia   r1, {d18-d25}
++        vext.8   q8, q8, q9, #16 - \pb
++        pld      [r1, r3]
++        vext.8   q9, q9, q10, #16 - \pb
++        subs     r12, #1
++        vext.8   q10, q10, q11, #16 - \pb
++        vext.8   q11, q11, q12, #16 - \pb
++        bl       \body_fn
++        // next a is mostly available in c
++        vldr     d24, [r6, #64]
++        vstmia   r0, {q0-q3}
++        vext.8   q0, q4, q5, #\pb
++        it       le
++        pople    {lr}
++        vext.8   q1, q5, q6, #\pb
++        it       le
++        bxle     lr
++        vext.8   q2, q6, q7, #\pb
++        add      r6, r6, r3
++        vext.8   q3, q7, q12, #\pb
++        add      r0, r0, r2
++        // next c is mostly available in b
++        vext.8   d14, d22, d23, #\pb
++        vldr     d15, [r1, #56]
++        vext.8   q4, q8, q9, #\pb
++        add      r1, r1, r3
++        vext.8   q5, q9, q10, #\pb
++        vext.8   q6, q10, q11, #\pb
++        b        1b
++.endm
++
++.macro  edge_32bx2_e3, body_fn, pb
++        sub      r6, r1, r3
++        push     {r7, lr}
++        add      r7, r0, r2
++        lsl      r2, #1
++        // load a and first 32b of c
++        vldmia   r1, {d8-d12}
++        vldmia   r6, {d24-d28}
++        vext.8   q2, q4, q5, #\pb
++        add      r6, r6, r3, lsl #1
++        vext.8   q3, q5, q6, #\pb
++        add      r1, r1, r3, lsl #1
++        vext.8   q0, q12, q13, #\pb
++        vext.8   q1, q13, q14, #\pb
++1:
++        // load second 32b of c and second 32b of b
++        vldr     d25, [r6, #-8]
++        subs     r12, #2
++        vldmia   r6, {d12-d15}
++        vldr     d27, [r1, #-8]
++        vldmia   r1, {d20-d23}
++        // first 32b of b is mostly available in second 32b of c
++        vext.8   q8, q12, q6, #16 - \pb
++        vext.8   q9, q6, q7, #16 - \pb
++        vext.8   q11, q10, q11, #16 - \pb
++        vext.8   q10, q13, q10, #16 - \pb
++
++        bl       \body_fn
++
++        vst1.8   {q0-q1}, [r0, :256], r2
++        vst1.8   {q2-q3}, [r7, :256], r2
++        ble      2f
++
++        vldr     d24, [r6, #32]
++        add      r6, r6, r3, lsl #1
++        vldr     d11, [r1, #24]
++        vext.8   d10, d22, d23, #\pb
++        vldr     d30, [r1, #32]
++        add      r1, r1, r3, lsl #1
++        // first 32b of a is mostly available in second 32b of c
++        vext.8   q0, q6, q7, #\pb
++        vext.8   q1, q7, q12, #\pb
++        // first 32b of c is mostly available in second 32b of b
++        vext.8   q4, q10, q11, #\pb
++        // second 32b of a is mostly available in first 32b of c
++        vext.8   q3, q5, q15, #\pb
++        vext.8   q2, q4, q5, #\pb
++        b        1b
++
++2:      pop      {r7, pc}
++.endm
++
++.macro  edge_16b_e3, body_fn, pb
++        push     {lr}
++        sub      r6, r1, r3
++        vld1.8   {q1}, [r1, :128], r3
++        vldmia   r6, {d18-d20}
++        add      r6, r6, r3
++
++1:      vldr     d5, [r1, #-8]
++        vld1.8   {q3}, [r1, :128]
++        subs     r12, #1
++        vext.8   q0, q9, q10, #\pb
++        vext.8   q2, q2, q3, #16 - \pb
++        bl       \body_fn
++        vst1.8   {q0}, [r0, :128], r2
++        ble      2f
++        vmov     q9, q1
++        vldr     d3, [r1, #8]
++        add      r1, r1, r3
++        vldr     d20, [r6, #16]
++        add      r6, r6, r3
++        vext.8   d2, d4, d5, #\pb
++        b        1b
++
++2:      pop      {pc}
++.endm
++
++.macro  edge_8bx2_e3, body_fn, pb
++        sub      r6, r1, r3
++        push     {r7, lr}
++        add      r7, r0, r2
++        lsl      r2, #1
++        vld1.8   {d18-d19}, [r6]
++        add      r6, r6, r3, lsl #1
++        vldr     d20, [r1, #8]
++        vldr     d2, [r1]
++        add      r1, r1, r3, lsl #1
++        vldr     d4, [r6, #-8]
++        vldr     d3, [r6]
++        vldr     d21, [r1, #-8]
++        vldr     d22, [r1]
++
++1:      vext.8   d0, d18, d19, #\pb
++        vext.8   d4, d4, d3, #8 - \pb
++        vext.8   d1, d2, d20, #\pb
++        subs     r12, #2
++        vext.8   d5, d21, d22, #8 - \pb
++
++        bl       \body_fn
++
++        vst1.8   {d0}, [r0, :64], r2
++        vst1.8   {d1}, [r7, :64], r2
++        ble      2f
++
++        vldr     d19, [r6, #8]
++        add      r6, r6, r3, lsl #1
++        vldr     d20, [r1, #8]
++        vmov     d18, d3
++        vldr     d2, [r1]
++        add      r1, r1, r3, lsl #1
++        vldr     d4, [r6, #-8]
++        vldr     d3, [r6]
++        vldr     d21, [r1, #-8]
++        vldr     d22, [r1]
++        b        1b
++
++2:      pop      {r7, pc}
++.endm
++
++.macro  edge_4bx4_e3, body_fn, pb
++        @ e3 is the same as e2 but with the X offset reversed
++        edge_4bx4_e2 \body_fn, (-\pb)
++.endm
++
++@ Jump table entry - if in neon mode the bottom bit must be set
++@ ? There is probably a real asm instruction to do this but I haven't found it
++.macro jent lab
++.if jent_pic
++@ Could use .short here but due to A32 not supporting ldrh [lsl#1] it is
++@ simpler and clearer in the code to stick with .word
++T       .word  (0 + \lab) - (4 + 98b)
++A       .word  (0 + \lab) - (8 + 98b)
++.else
++T       .word   1 + \lab
++A       .word   \lab
++.endif
++.endm
++
++.macro edge_64b_bodies, body_fn, pb
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++
++0:      edge_64b_e0     \body_fn, \pb
++10:     edge_64b_e1     \body_fn
++20:     edge_64b_e2     \body_fn, \pb
++30:     edge_64b_e3     \body_fn, \pb
++.endm
++
++.macro edge_32bx2_bodies, body_fn, pb
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++
++0:      edge_32bx2_e0   \body_fn, \pb
++10:     edge_32bx2_e1   \body_fn
++20:     edge_32bx2_e2   \body_fn, \pb
++30:     edge_32bx2_e3   \body_fn, \pb
++.endm
++
++.macro edge_16b_bodies, body_fn, pb
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++
++0:      edge_16b_e0     \body_fn, \pb
++10:     edge_16b_e1     \body_fn
++20:     edge_16b_e2     \body_fn, \pb
++30:     edge_16b_e3     \body_fn, \pb
++.endm
++
++.macro edge_32bx2_16b_bodies, body_fn_64b, body_fn_16b, pb
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++        jent    5f
++        jent    15f
++        jent    25f
++        jent    35f
++
++0:      edge_32bx2_e0   \body_fn_64b, \pb
++10:     edge_32bx2_e1   \body_fn_64b
++20:     edge_32bx2_e2   \body_fn_64b, \pb
++30:     edge_32bx2_e3   \body_fn_64b, \pb
++5:      edge_16b_e0     \body_fn_16b, \pb
++15:     edge_16b_e1     \body_fn_16b
++25:     edge_16b_e2     \body_fn_16b, \pb
++35:     edge_16b_e3     \body_fn_16b, \pb
++.endm
++
++.macro edge_16b_8bx2_bodies, body_fn, pb
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++        jent    5f
++        jent    15f
++        jent    25f
++        jent    35f
++
++0:      edge_16b_e0     \body_fn, \pb
++10:     edge_16b_e1     \body_fn
++20:     edge_16b_e2     \body_fn, \pb
++30:     edge_16b_e3     \body_fn, \pb
++5:      edge_8bx2_e0    \body_fn, \pb
++15:     edge_8bx2_e1    \body_fn
++25:     edge_8bx2_e2    \body_fn, \pb
++35:     edge_8bx2_e3    \body_fn, \pb
++.endm
++
++.macro edge_8bx2_4bx4_bodies, body_fn, pb
++        jent    0f
++        jent    10f
++        jent    20f
++        jent    30f
++        jent    5f
++        jent    15f
++        jent    25f
++        jent    35f
++
++0:      edge_8bx2_e0    \body_fn, \pb
++10:     edge_8bx2_e1    \body_fn
++20:     edge_8bx2_e2    \body_fn, \pb
++30:     edge_8bx2_e3    \body_fn, \pb
++5:      edge_4bx4_e0    \body_fn, \pb
++15:     edge_4bx4_e1    \body_fn
++25:     edge_4bx4_e2    \body_fn, \pb
++35:     edge_4bx4_e3    \body_fn, \pb
++.endm
++
++@ void ff_hevc_rpi_sao_edge_8_neon_8(
++@   uint8_t *_dst,            [r0]
++@   uint8_t *_src,            [r1]
++@   int  stride_dst,          [r2]
++@   int16_t *_sao_offset_val, [r3]
++@   int eo,                   [sp, #0]
++@   int width,                [sp, #4]
++@   int height)               [sp, #8]
++
++function ff_hevc_rpi_sao_edge_8_neon_8, export=1
++        edge_16b_init   8, 0, 1, 99f
++99:
++        edge_8bx2_4bx4_bodies edge_16b_body_8, 1
++endfunc
++
++@ void ff_hevc_rpi_sao_edge_16_neon_8(
++@   uint8_t *_dst,            [r0]
++@   uint8_t *_src,            [r1]
++@   int  stride_dst,          [r2]
++@   int16_t *_sao_offset_val, [r3]
++@   int eo,                   [sp, #0]
++@   int width,                [sp, #4]
++@   int height)               [sp, #8]
++
++function ff_hevc_rpi_sao_edge_16_neon_8, export=1
++        edge_16b_init   8, 0, 0, 99f
++99:
++        edge_16b_bodies edge_16b_body_8, 1
++endfunc
++
++@ void ff_hevc_rpi_sao_edge_32_neon_8(
++@   uint8_t *_dst,            [r0]
++@   uint8_t *_src,            [r1]
++@   int  stride_dst,          [r2]
++@   int16_t *_sao_offset_val, [r3]
++@   int eo,                   [sp, #0]
++@   int width,                [sp, #4]
++@   int height)               [sp, #8]
++
++function ff_hevc_rpi_sao_edge_32_neon_8, export=1
++        edge_64b_init   8, 0, 0, 99f
++99:
++        edge_32bx2_bodies edge_64b_body_8, 1
++endfunc
++
++@ void ff_hevc_rpi_sao_edge_64_neon_8(
++@   uint8_t *_dst,            [r0]
++@   uint8_t *_src,            [r1]
++@   int  stride_dst,          [r2]
++@   int16_t *_sao_offset_val, [r3]
++@   int eo,                   [sp, #0]
++@   int width,                [sp, #4]
++@   int height)               [sp, #8]
++
++function ff_hevc_rpi_sao_edge_64_neon_8, export=1
++        edge_64b_init   8, 0, 0, 99f
++99:
++        edge_64b_bodies edge_64b_body_8, 1
++endfunc
++
++@ ff_hevc_rpi_sao_edge_c_8_neon_8(
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   const int16_t *_sao_offset_val_u, [r3]
++@   const int16_t *_sao_offset_val_v, [sp, #0]
++@   int eo,                           [sp, #4]
++@   int width,                        [sp, #8]
++@   int height)                       [sp, #12]
++
++function ff_hevc_rpi_sao_edge_c_8_neon_8, export=1
++        edge_16b_init   8, 1, 1, 99f
++99:
++        edge_16b_8bx2_bodies edge_16b_body_8, 2
++endfunc
++
++@ ff_hevc_rpi_sao_edge_c_16_neon_8(
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   const int16_t *_sao_offset_val_u, [r3]
++@   const int16_t *_sao_offset_val_v, [sp, #0]
++@   int eo,                           [sp, #4]
++@   int width,                        [sp, #8]
++@   int height)                       [sp, #12]
++
++function ff_hevc_rpi_sao_edge_c_16_neon_8, export=1
++        edge_64b_init   8, 1, 0, 99f
++99:
++        edge_32bx2_bodies edge_64b_body_8, 2
++endfunc
++
++@ ff_hevc_rpi_sao_edge_c_32_neon_8(
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   const int16_t *_sao_offset_val_u, [r3]
++@   const int16_t *_sao_offset_val_v, [sp, #0]
++@   int eo,                           [sp, #4]
++@   int width,                        [sp, #8]
++@   int height)                       [sp, #12]
++
++function ff_hevc_rpi_sao_edge_c_32_neon_8, export=1
++        edge_64b_init   8, 1, 0, 99f
++99:
++        edge_64b_bodies edge_64b_body_8, 2
++endfunc
++
++@ void ff_hevc_rpi_sao_edge_8_neon_10(
++@   uint8_t *_dst,            [r0]
++@   uint8_t *_src,            [r1]
++@   int  stride_dst,          [r2]
++@   int16_t *_sao_offset_val, [r3]
++@   int eo,                   [sp, #0]
++@   int width,                [sp, #4]
++@   int height)               [sp, #8]
++
++function ff_hevc_rpi_sao_edge_8_neon_10, export=1
++        edge_16b_init   10, 0, 1, 99f
++99:
++        edge_16b_8bx2_bodies edge_16b_body_16, 2
++endfunc
++
++@ void ff_hevc_rpi_sao_edge_16_neon_10(
++@   uint8_t *_dst,            [r0]
++@   uint8_t *_src,            [r1]
++@   int  stride_dst,          [r2]
++@   int16_t *_sao_offset_val, [r3]
++@   int eo,                   [sp, #0]
++@   int width,                [sp, #4]
++@   int height)               [sp, #8]
++
++function ff_hevc_rpi_sao_edge_16_neon_10, export=1
++        edge_64b_init   10, 0, 0, 99f
++99:
++        edge_32bx2_bodies edge_64b_body_16, 2
++endfunc
++
++@ void ff_hevc_rpi_sao_edge_64_neon_10(
++@   uint8_t *_dst,            [r0]
++@   uint8_t *_src,            [r1]
++@   int  stride_dst,          [r2]
++@   int16_t *_sao_offset_val, [r3]
++@   int eo,                   [sp, #0]
++@   int width,                [sp, #4]
++@   int height)               [sp, #8]
++
++@ We simply split the 32 case into 2 vertical stripes
++@ and call the fns for w32
++@
++@ Calling code will always have src != dst so we don't have to worry
++@ about edge effects
++
++function ff_hevc_rpi_sao_edge_64_neon_10, export=1
++        edge_64b_init   10, 0, 1, 99f, xjump=1
++endfunc
++
++@ void ff_hevc_rpi_sao_edge_32_neon_10(
++@   uint8_t *_dst,            [r0]
++@   uint8_t *_src,            [r1]
++@   int  stride_dst,          [r2]
++@   int16_t *_sao_offset_val, [r3]
++@   int eo,                   [sp, #0]
++@   int width,                [sp, #4]
++@   int height)               [sp, #8]
++
++function ff_hevc_rpi_sao_edge_32_neon_10, export=1
++        edge_64b_init   10, 0, 0, 99f
++99:
++        edge_64b_bodies edge_64b_body_16, 2
++endfunc
++
++@ ff_hevc_rpi_sao_edge_c_8_neon_10(
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   const int16_t *_sao_offset_val_u, [r3]
++@   const int16_t *_sao_offset_val_v, [sp, #0]
++@   int eo,                           [sp, #4]
++@   int width,                        [sp, #8]
++@   int height)                       [sp, #12]
++
++function ff_hevc_rpi_sao_edge_c_8_neon_10, export=1
++        edge_xxb_init   10, 1, 99f, check_w4=1, setup_16b=1, setup_64b=1
++99:
++        edge_32bx2_16b_bodies edge_64b_body_16, edge_16b_body_16, 4
++endfunc
++
++@ ff_hevc_rpi_sao_edge_c_32_neon_10(
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   const int16_t *_sao_offset_val_u, [r3]
++@   const int16_t *_sao_offset_val_v, [sp, #0]
++@   int eo,                           [sp, #4]
++@   int width,                        [sp, #8]
++@   int height)                       [sp, #12]
++
++function ff_hevc_rpi_sao_edge_c_32_neon_10, export=1
++        edge_64b_init   10, 1, 1, 99f, xjump=1
++endfunc
++
++
++@ ff_hevc_rpi_sao_edge_c_16_neon_10(
++@   uint8_t *_dst,                    [r0]
++@   const uint8_t *_src,              [r1]
++@   ptrdiff_t stride_dst,             [r2]
++@   const int16_t *_sao_offset_val_u, [r3]
++@   const int16_t *_sao_offset_val_v, [sp, #0]
++@   int eo,                           [sp, #4]
++@   int width,                        [sp, #8]
++@   int height)                       [sp, #12]
++
++function ff_hevc_rpi_sao_edge_c_16_neon_10, export=1
++        edge_64b_init   10, 1, 0, 99f
++99:
++        edge_64b_bodies edge_64b_body_16, 4
++endfunc
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcpred_arm.h
+@@ -0,0 +1,28 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_ARM_HEVCPRED_ARM_H
++#define AVCODEC_ARM_HEVCPRED_ARM_H
++
++#include "libavcodec/rpi_hevcpred.h"
++
++void ff_hevc_rpi_pred_init_arm(HEVCRpiPredContext * const c, const int bit_depth);
++void ff_hevc_rpi_pred_init_neon(HEVCRpiPredContext * const c, const int bit_depth);
++
++#endif /* AVCODEC_ARM_HEVCPRED_ARM_H */
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcpred_init_arm.c
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2018 John Cox (for Raspberry Pi)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/attributes.h"
++#include "libavutil/cpu.h"
++#include "libavutil/arm/cpu.h"
++
++#include "libavcodec/rpi_hevcpred.h"
++#include "rpi_hevcpred_arm.h"
++
++av_cold void ff_hevc_rpi_pred_init_arm(HEVCRpiPredContext * const c, const int bit_depth)
++{
++    int cpu_flags = av_get_cpu_flags();
++
++    if (have_neon(cpu_flags))
++        ff_hevc_rpi_pred_init_neon(c, bit_depth);
++}
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcpred_init_neon.c
+@@ -0,0 +1,210 @@
++/*
++ * Copyright (c) 2018 John Cox (for Raspberry Pi)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "rpi_hevcpred_arm.h"
++
++intra_filter_fn_t ff_hevc_rpi_intra_filter_4_neon_8;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_8_neon_8;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_4_neon_16;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_8_neon_16;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_16_neon_16;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_4_neon_32;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_8_neon_32;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_16_neon_32;
++
++void ff_hevc_rpi_pred_angular_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_32_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_c_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_c_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_c_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_32_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_c_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_c_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_angular_c_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++
++void ff_hevc_rpi_pred_vertical_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_32_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_c_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_c_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_c_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_32_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_c_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_c_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_vertical_c_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++
++void ff_hevc_rpi_pred_horizontal_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_32_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_c_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_c_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_c_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_32_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_c_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_c_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++void ff_hevc_rpi_pred_horizontal_c_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride, int mode);
++
++void ff_hevc_rpi_pred_planar_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_32_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_c_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_c_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_c_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_32_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_c_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_c_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_planar_c_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++
++void ff_hevc_rpi_pred_dc_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_32_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_c_4_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_c_8_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_c_16_neon_8(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_32_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_c_4_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_c_8_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++void ff_hevc_rpi_pred_dc_c_16_neon_10(uint8_t *src, const uint8_t *top, const uint8_t *left, ptrdiff_t stride);
++
++void ff_hevc_rpi_pred_init_neon(HEVCRpiPredContext * const c, const int bit_depth)
++{
++    switch (bit_depth)
++    {
++    case 8:
++        c->intra_filter[0] = ff_hevc_rpi_intra_filter_4_neon_8;
++        c->intra_filter[1] = ff_hevc_rpi_intra_filter_8_neon_8;
++        c->intra_filter_c[0] = ff_hevc_rpi_intra_filter_4_neon_16;  // Equivalent to c_4_neon_8
++        c->intra_filter_c[1] = ff_hevc_rpi_intra_filter_8_neon_16;
++        c->intra_filter_c[2] = ff_hevc_rpi_intra_filter_16_neon_16;
++
++        c->pred_angular[0] = ff_hevc_rpi_pred_angular_4_neon_8;
++        c->pred_angular[1] = ff_hevc_rpi_pred_angular_8_neon_8;
++        c->pred_angular[2] = ff_hevc_rpi_pred_angular_16_neon_8;
++        c->pred_angular[3] = ff_hevc_rpi_pred_angular_32_neon_8;
++        c->pred_angular_c[0] = ff_hevc_rpi_pred_angular_c_4_neon_8;
++        c->pred_angular_c[1] = ff_hevc_rpi_pred_angular_c_8_neon_8;
++        c->pred_angular_c[2] = ff_hevc_rpi_pred_angular_c_16_neon_8;
++
++        c->pred_horizontal[0] = ff_hevc_rpi_pred_horizontal_4_neon_8;
++        c->pred_horizontal[1] = ff_hevc_rpi_pred_horizontal_8_neon_8;
++        c->pred_horizontal[2] = ff_hevc_rpi_pred_horizontal_16_neon_8;
++        c->pred_horizontal[3] = ff_hevc_rpi_pred_horizontal_32_neon_8;
++        c->pred_horizontal_c[0] = ff_hevc_rpi_pred_horizontal_c_4_neon_8;
++        c->pred_horizontal_c[1] = ff_hevc_rpi_pred_horizontal_c_8_neon_8;
++        c->pred_horizontal_c[2] = ff_hevc_rpi_pred_horizontal_c_16_neon_8;
++
++        c->pred_vertical[0] = ff_hevc_rpi_pred_vertical_4_neon_8;
++        c->pred_vertical[1] = ff_hevc_rpi_pred_vertical_8_neon_8;
++        c->pred_vertical[2] = ff_hevc_rpi_pred_vertical_16_neon_8;
++        c->pred_vertical[3] = ff_hevc_rpi_pred_vertical_32_neon_8;
++        c->pred_vertical_c[0] = ff_hevc_rpi_pred_vertical_c_4_neon_8;
++        c->pred_vertical_c[1] = ff_hevc_rpi_pred_vertical_c_8_neon_8;
++        c->pred_vertical_c[2] = ff_hevc_rpi_pred_vertical_c_16_neon_8;
++
++        c->pred_planar[0] = ff_hevc_rpi_pred_planar_4_neon_8;
++        c->pred_planar[1] = ff_hevc_rpi_pred_planar_8_neon_8;
++        c->pred_planar[2] = ff_hevc_rpi_pred_planar_16_neon_8;
++        c->pred_planar[3] = ff_hevc_rpi_pred_planar_32_neon_8;
++        c->pred_planar_c[0] = ff_hevc_rpi_pred_planar_c_4_neon_8;
++        c->pred_planar_c[1] = ff_hevc_rpi_pred_planar_c_8_neon_8;
++        c->pred_planar_c[2] = ff_hevc_rpi_pred_planar_c_16_neon_8;
++
++        c->pred_dc[0]   = ff_hevc_rpi_pred_dc_4_neon_8;
++        c->pred_dc[1]   = ff_hevc_rpi_pred_dc_8_neon_8;
++        c->pred_dc[2]   = ff_hevc_rpi_pred_dc_16_neon_8;
++        c->pred_dc[3]   = ff_hevc_rpi_pred_dc_32_neon_8;
++        c->pred_dc_c[0] = ff_hevc_rpi_pred_dc_c_4_neon_8;
++        c->pred_dc_c[1] = ff_hevc_rpi_pred_dc_c_8_neon_8;
++        c->pred_dc_c[2] = ff_hevc_rpi_pred_dc_c_16_neon_8;
++        break;
++    case 10:
++        c->intra_filter[0] = ff_hevc_rpi_intra_filter_4_neon_16;
++        c->intra_filter[1] = ff_hevc_rpi_intra_filter_8_neon_16;
++        c->intra_filter[2] = ff_hevc_rpi_intra_filter_16_neon_16;
++        c->intra_filter_c[0] = ff_hevc_rpi_intra_filter_4_neon_32;
++        c->intra_filter_c[1] = ff_hevc_rpi_intra_filter_8_neon_32;
++        c->intra_filter_c[2] = ff_hevc_rpi_intra_filter_16_neon_32;
++
++        c->pred_angular[0] = ff_hevc_rpi_pred_angular_4_neon_10;
++        c->pred_angular[1] = ff_hevc_rpi_pred_angular_8_neon_10;
++        c->pred_angular[2] = ff_hevc_rpi_pred_angular_16_neon_10;
++        c->pred_angular[3] = ff_hevc_rpi_pred_angular_32_neon_10;
++        c->pred_angular_c[0] = ff_hevc_rpi_pred_angular_c_4_neon_10;
++        c->pred_angular_c[1] = ff_hevc_rpi_pred_angular_c_8_neon_10;
++        c->pred_angular_c[2] = ff_hevc_rpi_pred_angular_c_16_neon_10;
++
++        c->pred_horizontal[0] = ff_hevc_rpi_pred_horizontal_4_neon_10;
++        c->pred_horizontal[1] = ff_hevc_rpi_pred_horizontal_8_neon_10;
++        c->pred_horizontal[2] = ff_hevc_rpi_pred_horizontal_16_neon_10;
++        c->pred_horizontal[3] = ff_hevc_rpi_pred_horizontal_32_neon_10;
++        c->pred_horizontal_c[0] = ff_hevc_rpi_pred_horizontal_c_4_neon_10;
++        c->pred_horizontal_c[1] = ff_hevc_rpi_pred_horizontal_c_8_neon_10;
++        c->pred_horizontal_c[2] = ff_hevc_rpi_pred_horizontal_c_16_neon_10;
++
++        c->pred_vertical[0] = ff_hevc_rpi_pred_vertical_4_neon_10;
++        c->pred_vertical[1] = ff_hevc_rpi_pred_vertical_8_neon_10;
++        c->pred_vertical[2] = ff_hevc_rpi_pred_vertical_16_neon_10;
++        c->pred_vertical[3] = ff_hevc_rpi_pred_vertical_32_neon_10;
++        c->pred_vertical_c[0] = ff_hevc_rpi_pred_vertical_c_4_neon_10;
++        c->pred_vertical_c[1] = ff_hevc_rpi_pred_vertical_c_8_neon_10;
++        c->pred_vertical_c[2] = ff_hevc_rpi_pred_vertical_c_16_neon_10;
++
++        c->pred_planar[0] = ff_hevc_rpi_pred_planar_4_neon_10;
++        c->pred_planar[1] = ff_hevc_rpi_pred_planar_8_neon_10;
++        c->pred_planar[2] = ff_hevc_rpi_pred_planar_16_neon_10;
++        c->pred_planar[3] = ff_hevc_rpi_pred_planar_32_neon_10;
++        c->pred_planar_c[0] = ff_hevc_rpi_pred_planar_c_4_neon_10;
++        c->pred_planar_c[1] = ff_hevc_rpi_pred_planar_c_8_neon_10;
++        c->pred_planar_c[2] = ff_hevc_rpi_pred_planar_c_16_neon_10;
++
++        c->pred_dc[0]   = ff_hevc_rpi_pred_dc_4_neon_10;
++        c->pred_dc[1]   = ff_hevc_rpi_pred_dc_8_neon_10;
++        c->pred_dc[2]   = ff_hevc_rpi_pred_dc_16_neon_10;
++        c->pred_dc[3]   = ff_hevc_rpi_pred_dc_32_neon_10;
++        c->pred_dc_c[0] = ff_hevc_rpi_pred_dc_c_4_neon_10;
++        c->pred_dc_c[1] = ff_hevc_rpi_pred_dc_c_8_neon_10;
++        c->pred_dc_c[2] = ff_hevc_rpi_pred_dc_c_16_neon_10;
++        break;
++    default:
++        break;
++    }
++}
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcpred_intra_angular_neon.S
+@@ -0,0 +1,2984 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++/*
++ * General angular pred
++ *
++ * Horizontal (10) & Vertical (26) cases have their own file
++ * and are not dealt with properly here (luma filtering is missing)
++ *
++ * The inv_angle calculations are annoying - if it wasn't for the +128
++ * rounding step then the result would simply be the loop counter :-(
++ */
++
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++.text
++
++@ Horizontal Patch functions
++@ These need a transpose before store so exist as smaller patches
++@ Patches can be called repeatedly without any intermediate setup
++@ to generate a horizontal block
++@
++@ It is almost certainly the case that larger patch fns can be built
++@ and they would be a little faster, but we would still need the small
++@ fns and code size (or at least instruction cache size) is an issue
++@ given how much code we already have here
++
++@ Generate 8x8 luma 8 patch
++@
++@ r3   Out stride
++@ r4   Angle add
++@ r7   Inv angle (_up only)
++@
++@ In/Out (updated)
++@ r0   Out pointer - on exit point to start of next patch horizontally (i.e. r0 + patch width)
++@ r2   Left ptr - updated
++@ r10  Inv angle accumulator (_up only)
++@ r12  32 - angle frac (_down) or angle frac (_up)
++@ d0   Older reference samples
++@ d1=r8+r9  Newer reference samples
++@ d2   32 - angle frac
++@ d3   Angle frac
++@ q2   Partially computed next result (_up only)
++@
++@ Temps
++@ r5   Loop counter
++@ r6
++@ r7   (_down only)
++@ r11  (_up only)
++@ q2, q8-q11
++
++patch_h_down_8x8_8:
++        ldrd        r8, r9, [r2]        @ Left
++        rsb         r12, r6, #32
++        vmov        d0, r8, r9
++        vdup.8      d3, r6
++        lsr         r8, #8
++        vdup.8      d2, r12
++        orr         r8, r8, r9, lsl #24
++        ldr         r9, [r2, #5]!
++        vmov        d1, r8, r9
++        // drop through...
++patch_h_down_8x8_8_continue:
++        mov         r5, #8
++1:
++          subs        r12, r4
++        vmull.u8    q2, d0, d2
++          it          mi
++          addmi       r12, #32
++        vmlal.u8    q2, d1, d3
++          rsb         r6, r12, #32
++        vext.8      q8, q8, q9, #8
++          itt         mi
++          lsrmi       r7, r8, #8
++          vmovmi      d0, r8, r9
++          vdup.8      d2, r12
++        vext.8      q9, q9, q10, #8
++          it          mi
++          orrmi       r8, r7, r9, lsl #24
++        vext.8      q10, q10, q11, #8
++          it          mi
++          ldrmi       r9, [r2, #1]!
++        vmov        d22, d23
++        vrshrn.u16  d23, q2, #5
++          it          mi
++          vmovmi      d1, r8, r9
++        subs        r5, #1
++          vdup.8      d3, r6
++        bne         1b
++        // drop through...
++store_tran_8x8_8:
++        vzip.8      d16, d17
++        add         r6, r0, r3
++        vzip.8      d18, d19
++        lsl         r3, #1
++        vzip.8      d20, d21
++        add         r5, r0, r3
++        vzip.8      d22, d23
++        vzip.16     q8, q9
++        vzip.16     q10, q11
++        vzip.32     q8, q10
++        vzip.32     q9, q11
++        vst1.8      {d16}, [r0]!
++        vst1.8      {d17}, [r6], r3
++        vst1.8      {d20}, [r5], r3
++        vst1.8      {d21}, [r6], r3
++        vst1.8      {d18}, [r5], r3
++        vst1.8      {d19}, [r6], r3
++        vst1.8      {d22}, [r5]
++        asr         r3, #1
++        vst1.8      {d23}, [r6]
++
++        bx          lr
++
++patch_h_up_8x8_8:
++        ldrd        r8, r9, [r2]
++        rsb         r6, r4, #32
++        vmov        d0, r8, r9
++        vdup.8      d3, r4
++        lsr         r11, r8, #24
++        vdup.8      d2, r6
++        ldr         r8, [r2, #-1]!
++        orr         r9, r11, r9, lsl #8
++        vmov        d1, r8, r9
++        mov         r12, r4
++        vmull.u8    q2, d0, d2
++        vmlal.u8    q2, d1, d3
++patch_h_up_8x8_8_continue:
++        mov         r5, #8
++1:
++          add         r12, r4
++          mov         r11, #0
++          cmp         r12, #33
++          it          cs
++          addcs       r10, r7
++        vext.8      q8, q8, q9, #8
++          itt         cs
++          subcs       r12, #32
++          tstcs       r10, #1<<31
++          rsb         r6, r12, #32
++          it          eq
++          asreq       r11, r10, #8
++          it          cs
++          vmovcs      d0, r8, r9
++          vdup.8      d2, r6
++          it          cs
++          lsrcs       r6, r8, #24
++        vext.8      q9, q9, q10, #8
++          itt         cs
++          orrcs       r9, r6, r9, lsl #8
++          ldrbcs      r11, [r1, r11]
++          vdup.8      d3, r12
++        vext.8      q10, q10, q11, #8
++          it          hi
++          ldrbhi      r11, [r2, #-1]!
++        vmov        d22, d23
++        vrshrn.u16  d23, q2, #5
++          itt         cs
++          orrcs       r8, r11, r8, lsl #8
++          vmovcs      d1, r8, r9
++          vmull.u8    q2, d0, d2
++        subs        r5, #1
++          vmlal.u8    q2, d1, d3
++        bne         1b
++
++        b           store_tran_8x8_8
++
++
++.macro ADRT reg, val
++@ adr in T32 has enough range but not in A32
++A       adrl        \reg, \val
++T       adr         \reg, \val
++.endm
++
++@ ff_hevc_rpi_pred_angular_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_4_neon_8, export=1
++        ldr         r12, [sp]
++        push        {r4-r8, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        ldr         lr, [r2], #1        @ Top
++        rsb         r12, r6, #32
++        vmov        s0, lr
++        vdup.8      d3, r6
++        ldr         lr, [r2], #1
++        vdup.8      d2, r12
++        vmov        s2, lr
++          subs        r12, r4
++        vmull.u8    q2, d0, d2
++          it          mi
++          addmi       r12, #32
++        vmlal.u8    q2, d1, d3
++          rsb         r6, r12, #32
++          itt         mi
++          vmovmi      s0, lr
++          ldrmi       lr, [r2], #1
++          vdup.8      d2, r12
++          it          mi
++          vmovmi      s2, lr
++          vdup.8      d3, r6
++        mov         r5, #2
++1:
++        vrshrn.u16  d20, q2, #5
++            subs        r12, r4
++          vmull.u8    q2, d0, d2
++            it          mi
++            addmi       r12, #32
++          vmlal.u8    q2, d1, d3
++            rsb         r6, r12, #32
++        vext.64     q8, q8, q9, #1
++            it          mi
++            vmovmi      s0, lr
++        vext.64     q9, q9, q10, #1
++            it          mi
++            ldrmi       lr, [r2], #1
++            vdup.8      d2, r12
++            it          mi
++            vmovmi      s2, lr
++        subs        r5, #1
++            vdup.8      d3, r6
++        bne         1b
++
++          vrshrn.u16  d20, q2, #5
++            vmull.u8    q2, d0, d2
++        add         r12, r0,  r3
++            vmlal.u8    q2, d1, d3
++        lsl         r3,  #1
++          vext.64     q8, q8, q9, #1
++          vext.64     q9, q9, q10, #1
++            vrshrn.u16  d20, q2, #5
++
++98:
++        vst4.8      {d17[0], d18[0], d19[0], d20[0]}, [r0], r3
++        vst4.8      {d17[1], d18[1], d19[1], d20[1]}, [r12], r3
++        vst4.8      {d17[2], d18[2], d19[2], d20[2]}, [r0]
++        vst4.8      {d17[3], d18[3], d19[3], d20[3]}, [r12]
++        pop        {r4-r8, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        rsb         r12, r6, #32
++        ldr         lr, [r2]            @ Left
++        ldrb        r2, [r2, #-1]       @ Top-left
++        vmov        s0, lr
++        vdup.8      d2, r12
++        vdup.8      d3, r6
++        orr         lr, r2, lr, lsl #8
++        vmov        s2, lr
++        sub         r8, r7, #128
++        mov         r5, #3
++2:
++        vmull.u8    q2, d0, d2
++          subs        r12, r4
++        vmlal.u8    q2, d1, d3
++T         it          mi
++          addmi       r12, #32
++T         asr         r6, r8, #8
++T         it          mi
++T         ldrbmi      r2, [r1, r6]
++A         ldrbmi      r2, [r1, r8, asr #8]
++          rsb         r6, r12, #32
++          vdup.8      d2, r12
++          ittt        mi
++          vmovmi      s0, lr
++          orrmi       lr, r2, lr, lsl #8
++          vmovmi      s2, lr
++        vrshrn.u16  d20, q2, #5
++          vdup.8      d3, r6
++          it          mi
++          addmi       r8, r7
++        subs        r5, #1
++        vext.64     q8, q8, q9, #1
++        vext.64     q9, q9, q10, #1
++        bne         2b
++
++          vmull.u8    q2, d0, d2
++        add         r12, r0,  r3
++          vmlal.u8    q2, d1, d3
++        lsl         r3,  #1
++          vrshrn.u16  d20, q2, #5
++        b           98b
++
++@ Left of vertical - works down left
++18:
++        ldrh        r7, [r7]
++        rsb         r12, r6, #32
++        ldr         lr, [r1]            @ Top
++        ldrb        r1, [r2, #-1]       @ Top-left
++        vmov        s0, lr
++        vdup.8      d2, r12
++        vdup.8      d3, r6
++        orr         lr, r1, lr, lsl #8
++        vmov        s2, lr
++        sub         r8, r7, #128
++        mov         r5, #3
++2:
++        vmull.u8    q2, d0, d2
++          subs        r12, r4
++        vmlal.u8    q2, d1, d3
++T         it          mi
++          addmi       r12, #32
++T         asr         r6, r8, #8
++T         it          mi
++T         ldrbmi      r1, [r2, r6]
++A         ldrbmi      r1, [r2, r8, asr #8]
++          rsb         r6, r12, #32
++          vdup.8      d2, r12
++          ittt        mi
++          vmovmi      s0, lr
++          orrmi       lr, r1, lr, lsl #8
++          vmovmi      s2, lr
++        vrshrn.u16  d4, q2, #5
++          vdup.8      d3, r6
++          it          mi
++          addmi       r8, r7
++        subs        r5, #1
++        vst1.32     {d4[0]}, [r0], r3
++        bne         2b
++
++          vmull.u8    q2, d0, d2
++          vmlal.u8    q2, d1, d3
++          vrshrn.u16  d4, q2, #5
++          vst1.32     {d4[0]}, [r0]
++
++        pop         {r4-r8, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        ldr         lr, [r1], #1        @ Top
++        rsb         r12, r6, #32
++        vmov        s0, lr
++        vdup.8      d3, r6
++        ldr         lr, [r1], #1
++        vdup.8      d2, r12
++        vmov        s2, lr
++          subs        r12, r4
++        vmull.u8    q2, d0, d2
++          it          mi
++          addmi       r12, #32
++        vmlal.u8    q2, d1, d3
++          rsb         r6, r12, #32
++          itt         mi
++          vmovmi      s0, lr
++          ldrmi       lr, [r1], #1
++          vdup.8      d2, r12
++          it          mi
++          vmovmi      s2, lr
++          vdup.8      d3, r6
++        mov         r5, #2
++1:
++        vrshrn.u16  d6, q2, #5
++            subs        r12, r4
++          vmull.u8    q2, d0, d2
++            it          mi
++            addmi       r12, #32
++          vmlal.u8    q2, d1, d3
++            rsb         r6, r12, #32
++        vst1.32     {d6[0]}, [r0], r3
++            itt         mi
++            vmovmi      s0, lr
++            ldrmi       lr, [r1], #1
++            vdup.8      d2, r12
++            it          mi
++            vmovmi      s2, lr
++        subs        r5, #1
++            vdup.8      d3, r6
++        bne         1b
++
++          vrshrn.u16  d6, q2, #5
++            vmull.u8    q2, d0, d2
++            vmlal.u8    q2, d1, d3
++          vst1.32     {d6[0]}, [r0], r3
++            vrshrn.u16  d6, q2, #5
++            vst1.32     {d6[0]}, [r0]
++
++        pop         {r4-r8, pc}
++
++endfunc
++
++
++
++@ ff_hevc_rpi_pred_angular_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_8_neon_8, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        bl          patch_h_down_8x8_8
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++        bl          patch_h_up_8x8_8
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        ldrd        r8, r9, [r1]        @ Top
++        rsb         r12, r6, #32
++        ldrb        lr, [r2, #-1]       @ Top-left
++        ldrh        r7, [r7]
++        vmov        d0, r8, r9
++        lsl         r9, r9, #8
++        vdup.8      d2, r12
++        orr         r9, r9, r8, lsr #24
++        orr         r8, lr, r8, lsl #8
++        vmov        d1, r8, r9
++        sub         r1, r7, #128
++        mov         r5, #7
++1:
++        vdup.8      d3, r6
++        vmull.u8    q2, d0, d2
++          subs        r12, r12, r4
++        vmlal.u8    q2, d1, d3
++          ittt        mi
++          addmi       lr, r2, r1, asr #8
++          addmi       r12, r12, #32
++          vmovmi      d0, r8, r9
++          rsb         r6, r12, #32
++          itt         mi
++          lslmi       r9, r9, #8
++          ldrbmi      lr, [lr]
++          vdup.8      d2, r12
++        vrshrn.u16  d4, q2, #5
++          itttt       mi
++          orrmi       r9, r9, r8, lsr #24
++          orrmi       r8, lr, r8, lsl #8
++          vmovmi      d1, r8, r9
++          addmi       r1, r1, r7
++        subs        r5, r5, #1
++        vst1.8      {d4}, [r0], r3
++        bne         1b
++
++          vdup.8      d3, r6
++          vmull.u8    q2, d0, d2
++          vmlal.u8    q2, d1, d3
++          vrshrn.u16  d4, q2, #5
++          vst1.8      {d4}, [r0]
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        ldrd        r8, r9, [r1]        @ Top
++        rsb         r12, r6, #32
++        vmov        d0, r8, r9
++        vdup.8      d3, r6
++        mov         r5, #7
++        lsr         r8, #8
++        vdup.8      d2, r12
++        orr         r8, r8, r9, lsl #24
++        ldr         r9, [r1, #5]!
++        vmov        d1, r8, r9
++1:
++        vmull.u8    q2, d0, d2
++          subs        r12, r4
++        vmlal.u8    q2, d1, d3
++          it          mi
++          addmi       r12, #32
++          rsb         r6, r12, #32
++          itt         mi
++          vmovmi      d0, r8, r9
++          lsrmi       r8, #8
++          vdup.8      d2, r12
++          itt         mi
++          orrmi       r8, r8, r9, lsl #24
++          ldrmi       r9, [r1, #1]!
++        vrshrn.u16  d6, q2, #5
++          it          mi
++          vmovmi      d1, r8, r9
++          vdup.8      d3, r6
++        subs        r5, #1
++        vst1.8      {d6}, [r0], r3
++        bne         1b
++
++          vmull.u8    q2, d0, d2
++          vmlal.u8    q2, d1, d3
++          vrshrn.u16  d6, q2, #5
++          vst1.8      {d6}, [r0]
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_16_neon_8, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        mov         r1,  r2             @ save r2 - r1 unused by patch_down
++
++        bl          patch_h_down_8x8_8
++        bl          patch_h_down_8x8_8_continue
++
++        add         r2, r1, #8          @ restore r2, but 8 rows further down left
++        sub         r0, #16
++        mov         r6, r4
++        add         r0, r0, r3, lsl #3
++
++        bl          patch_h_down_8x8_8
++        bl          patch_h_down_8x8_8_continue
++
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++
++        push        {r2}
++        bl          patch_h_up_8x8_8
++        bl          patch_h_up_8x8_8_continue
++        pop         {r2}
++
++        sub         r0, #16
++        mov         r10, #-128
++        add         r2, #8
++        add         r0, r0, r3, lsl #3
++        sub         r10, r10, r7, lsl #3
++
++        bl          patch_h_up_8x8_8
++        bl          patch_h_up_8x8_8_continue
++
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        vld1.8      {q9}, [r1]
++        sub         r1, r2, #1
++        rsb         r12, r6, #32
++        ldrh        r7, [r7]
++        vdup.8      d6, r6
++        vext.8      q8, q9, q9, #15
++        sub         r8, r7, #128
++        vld1.8      {d16[0]}, [r1]
++        vdup.8      d7, r12
++        mov         r5, #15
++1:
++        vmull.u8    q0, d18, d7
++        subs        r12, r4
++        vmlal.u8    q0, d16, d6
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q1, d19, d7
++        it          cc
++        addcc       r1, r2, r8, asr #8
++        vmlal.u8    q1, d17, d6
++        rsb         r6, r12, #32
++        vext.8      q10, q8, q8, #15
++        sub         r5, #1
++        vld1.8      {d20[0]}, [r1]
++        it          cc
++        addcc       r8, r7
++        vmov        q11, q8
++        teq         r5, #0
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vst1.8      {q0}, [r0], r3
++        bhi         1b
++        beq         4f
++2:
++        vmull.u8    q0, d22, d7
++        subs        r12, r4
++        vmlal.u8    q0, d20, d6
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q1, d23, d7
++        it          cc
++        addcc       r1, r2, r8, asr #8
++        vmlal.u8    q1, d21, d6
++        rsb         r6, r12, #32
++        vext.8      q8, q10, q10, #15
++        sub         r5, #1
++        vld1.8      {d16[0]}, [r1]
++        it          cc
++        addcc       r8, r7
++        vmov        q9, q10
++        teq         r5, #0
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vst1.8      {q0}, [r0], r3
++        bhi         2b
++        bne         1b
++        bcc         5f
++3:
++        vmull.u8    q0, d22, d7
++        vmlal.u8    q0, d20, d6
++        vmull.u8    q1, d23, d7
++        vmlal.u8    q1, d21, d6
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vst1.8      {q0}, [r0]
++
++        pop         {r4-r11, pc}
++4:
++        bcc         3b
++5:
++        vmull.u8    q0, d18, d7
++        vmlal.u8    q0, d16, d6
++        vmull.u8    q1, d19, d7
++        vmlal.u8    q1, d17, d6
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vst1.8      {q0}, [r0]
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        vld1.8      {q9}, [r1]!
++        rsb         r12, r6, #32
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vext.8      q8, q9, q9, #1
++        vld1.8      {d17[7]}, [r1]!
++        mov         r5, #15
++1:
++        vmull.u8    q0, d16, d6
++        subs        r12, r4
++        vmlal.u8    q0, d18, d7
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q1, d17, d6
++        rsb         r6, r12, #32
++        vmlal.u8    q1, d19, d7
++        sub         r5, #1
++        vext.8      q10, q8, q8, #1
++        teq         r5, #0
++        vld1.8      {d21[7]}, [r1]
++        it          cc
++        addcc       r1, #1
++        vmov        q11, q8
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vst1.8      {q0}, [r0], r3
++        bhi         1b
++        beq         4f
++2:
++        vmull.u8    q0, d20, d6
++        subs        r12, r4
++        vmlal.u8    q0, d22, d7
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q1, d21, d6
++        rsb         r6, r12, #32
++        vmlal.u8    q1, d23, d7
++        sub         r5, #1
++        vext.8      q8, q10, q10, #1
++        teq         r5, #0
++        vld1.8      {d17[7]}, [r1]
++        it          cc
++        addcc       r1, #1
++        vmov        q9, q10
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vst1.8      {q0}, [r0], r3
++        bhi         2b
++        bne         1b
++        bcc         5f
++3:
++        vmull.u8    q0, d20, d6
++        vmlal.u8    q0, d22, d7
++        vmull.u8    q1, d21, d6
++        vmlal.u8    q1, d23, d7
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vst1.8      {q0}, [r0]
++
++        pop         {r4-r11, pc}
++4:
++        bcc         3b
++5:
++        vmull.u8    q0, d16, d6
++        vmlal.u8    q0, d18, d7
++        vmull.u8    q1, d17, d6
++        vmlal.u8    q1, d19, d7
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vst1.8      {q0}, [r0]
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_32_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_32_neon_8, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        mov         r10, #4
++        mov         r1, r2
++1:
++        bl          patch_h_down_8x8_8
++        bl          patch_h_down_8x8_8_continue
++        bl          patch_h_down_8x8_8_continue
++        bl          patch_h_down_8x8_8_continue
++
++        add         r2, r1, #8          @ restore r2, but 8 rows further down left
++        add         r1, r1, #8
++        mov         r6, r4
++        sub         r0, #32
++        subs        r10, #1
++        add         r0, r0, r3, lsl #3
++        bne         1b
++
++        pop        {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++        vmov.i8     d6, #1<<2
++1:
++        push        {r2,r10}
++        bl          patch_h_up_8x8_8
++        bl          patch_h_up_8x8_8_continue
++        bl          patch_h_up_8x8_8_continue
++        bl          patch_h_up_8x8_8_continue
++        pop         {r2,r10}
++
++        vmov        r8, s12
++        sub         r0, #32
++        add         r2, #8
++        add         r0, r0, r3, lsl #3
++        sub         r10, r10, r7, lsl #3
++        vshr.u8     d6, #1
++        teq         r8, #0
++        bne         1b
++
++        pop        {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        vld1.8      {q0-q1}, [r1]
++        sub         r9, r2, #1
++        rsb         r12, r6, #32
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        vdup.8      d18, r6
++        vdup.8      d19, r12
++        mov         r5, #32
++1:
++        vld1.8      {d17[7]}, [r9]
++        add         r8, r7
++        vmov        q2, q0
++        vmov        q3, q1
++        add         r9, r2, r8, asr #8
++        vext.8      q1, q0, q1, #15
++        vext.8      q0, q8, q0, #15
++2:
++        vmull.u8    q10, d4, d19
++        subs        r12, r4
++        vmlal.u8    q10, d0, d18
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q11, d5, d19
++        rsb         r6, r12, #32
++        vmlal.u8    q11, d1, d18
++        sub         r5, #1
++        vmull.u8    q12, d6, d19
++        teq         r5, #0
++        vmlal.u8    q12, d2, d18
++        vmull.u8    q13, d7, d19
++        vmlal.u8    q13, d3, d18
++        vdup.8      d18, r6
++        vdup.8      d19, r12
++        vrshrn.u16  d20, q10, #5
++        vrshrn.u16  d21, q11, #5
++        vrshrn.u16  d22, q12, #5
++        vrshrn.u16  d23, q13, #5
++        vst1.8      {q10-q11}, [r0], r3
++        bhi         2b
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        add         r5, r1, #32
++        vld1.8      {q0-q1}, [r1]!
++        rsb         r12, r6, #32
++        vld1.8      {d16[0]}, [r5]
++        mov         r5, #32
++        vdup.8      d18, r6
++        vdup.8      d19, r12
++1:
++        vmov        q2, q0
++        add         r1, #1
++        vmov        q3, q1
++        vext.8      q0, q0, q1, #1
++        vext.8      q1, q1, q8, #1
++2:
++        vmull.u8    q10, d0, d18
++        subs        r12, r4
++        vmlal.u8    q10, d4, d19
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q11, d1, d18
++        rsb         r6, r12, #32
++        vmlal.u8    q11, d5, d19
++        sub         r5, #1
++        vmull.u8    q12, d2, d18
++        teq         r5, #0
++        vmlal.u8    q12, d6, d19
++        vmull.u8    q13, d3, d18
++        vmlal.u8    q13, d7, d19
++        vld1.8      {d16[0]}, [r1]
++        vdup.8      d18, r6
++        vdup.8      d19, r12
++        vrshrn.u16  d20, q10, #5
++        vrshrn.u16  d21, q11, #5
++        vrshrn.u16  d22, q12, #5
++        vrshrn.u16  d23, q13, #5
++        vst1.8      {q10-q11}, [r0], r3
++        bhi         2b
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++@ Chroma 8 bit 4x4 patch fns
++        .text
++
++patch_h_down_c_4x4_8:
++        ldrd        r8, r9, [r2]        @ Left
++        rsb         r12, r6, #32
++        vmov        d0, r8, r9
++        vdup.8      d3, r6
++        lsr         r8, #16
++        vdup.8      d2, r12
++        orr         r8, r8, r9, lsl #16
++        ldr         r9, [r2, #6]!
++        vmov        d1, r8, r9
++        // drop through...
++patch_h_down_c_4x4_8_continue:
++        mov         r5, #4
++1:
++          subs        r12, r4
++        vmull.u8    q2, d0, d2
++          it          mi
++          addmi       r12, #32
++        vmlal.u8    q2, d1, d3
++          rsb         r6, r12, #32
++        vext.8      q8, q8, q9, #8
++          it          mi
++          lsrmi       r7, r8, #16
++        vmov        d18, d19
++          it          mi
++          vmovmi      d0, r8, r9
++          vdup.8      d2, r12
++          it          mi
++          orrmi       r8, r7, r9, lsl #16
++        vrshrn.u16  d19, q2, #5
++          itt         mi
++          ldrmi       r9, [r2, #2]!
++          vmovmi      d1, r8, r9
++        subs        r5, #1
++          vdup.8      d3, r6
++        bne         1b
++        // drop through...
++store_tran_c_4x4_8:
++        vzip.16     d16, d17
++        add         r6, r0, r3
++        vzip.16     d18, d19
++        lsl         r3, #1
++        vzip.32     q8, q9
++        add         r5, r0, r3
++        vst1.16     {d16}, [r0]!
++        vst1.16     {d17}, [r6], r3
++        vst1.16     {d18}, [r5]
++        asr         r3, #1
++        vst1.16     {d19}, [r6]
++
++        bx          lr
++
++patch_h_up_c_4x4_8:
++        ldrd        r8, r9, [r2]
++        rsb         r6, r4, #32
++        vmov        d0, r8, r9
++        vdup.8      d3, r4
++        lsr         r11, r8, #16
++        vdup.8      d2, r6
++        ldr         r8, [r2, #-2]!
++        orr         r9, r11, r9, lsl #16
++        vmov        d1, r8, r9
++        mov         r12, r4
++        vmull.u8    q2, d0, d2
++        vmlal.u8    q2, d1, d3
++patch_h_up_c_4x4_8_continue:
++        mov         r5, #4
++1:
++          add         r12, r4
++          cmp         r12, #33
++          it          cs
++          addcs       r10, r7
++          mov         r11, #0
++          itt         cs
++          subcs       r12, #32
++          tstcs       r10, #1<<31
++          rsb         r6, r12, #32
++          it          eq
++          asreq       r11, r10, #7
++          it          cs
++          vmovcs      d0, r8, r9
++          it          eq
++          biceq       r11, #1
++          vdup.8      d2, r6
++          it          cs
++          lsrcs       r6, r8, #16
++          vdup.8      d3, r12
++        vext.8      q8, q8, q9, #8
++          itt         cs
++          orrcs       r9, r6, r9, lsl #16
++          ldrhcs      r11, [r1, r11]
++        vmov        d18, d19
++          it          hi
++          ldrhhi      r11, [r2, #-2]!
++        vrshrn.u16  d19, q2, #5
++          itt         cs
++          orrcs       r8, r11, r8, lsl #16
++          vmovcs      d1, r8, r9
++          vmull.u8    q2, d0, d2
++        subs        r5, #1
++          vmlal.u8    q2, d1, d3
++        bne         1b
++
++        b           store_tran_c_4x4_8
++
++
++@ ff_hevc_rpi_pred_angular_c_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_c_4_neon_8, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        bl          patch_h_down_c_4x4_8
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++        bl          patch_h_up_c_4x4_8
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        ldrd        r8, r9, [r1]        @ Top
++        rsb         r12, r6, #32
++        ldrh        lr, [r2, #-2]       @ Top-left
++        ldrh        r7, [r7]
++        vmov        d0, r8, r9
++        lsl         r9, r9, #16
++        vdup.8      d2, r12
++        orr         r9, r9, r8, lsr #16
++        orr         r8, lr, r8, lsl #16
++        vmov        d1, r8, r9
++        sub         r1, r7, #128
++        mov         r5, #3
++1:
++        vdup.8      d3, r6
++        vmull.u8    q2, d0, d2
++          subs        r12, r12, r4
++        vmlal.u8    q2, d1, d3
++          itttt       mi
++          addmi       lr, r2, r1, asr #7
++          bicmi       lr, #1
++          addmi       r12, r12, #32
++          vmovmi      d0, r8, r9
++          rsb         r6, r12, #32
++          itt         mi
++          lslmi       r9, r9, #16
++          ldrhmi      lr, [lr]
++          vdup.8      d2, r12
++        vrshrn.u16  d4, q2, #5
++          itttt       mi
++          orrmi       r9, r9, r8, lsr #16
++          orrmi       r8, lr, r8, lsl #16
++          vmovmi      d1, r8, r9
++          addmi       r1, r1, r7
++        subs        r5, r5, #1
++        vst1.16     {d4}, [r0], r3
++        bne         1b
++
++          vdup.8      d3, r6
++          vmull.u8    q2, d0, d2
++          vmlal.u8    q2, d1, d3
++          vrshrn.u16  d4, q2, #5
++          vst1.16     {d4}, [r0]
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        ldrd        r8, r9, [r1]        @ Top
++        rsb         r12, r6, #32
++        vmov        d0, r8, r9
++        vdup.8      d3, r6
++        mov         r5, #3
++        lsr         r8, #16
++        vdup.8      d2, r12
++        orr         r8, r8, r9, lsl #16
++        ldr         r9, [r1, #6]!
++        vmov        d1, r8, r9
++1:
++        vmull.u8    q2, d0, d2
++          subs        r12, r4
++        vmlal.u8    q2, d1, d3
++          it          mi
++          addmi       r12, #32
++          rsb         r6, r12, #32
++          itt         mi
++          vmovmi      d0, r8, r9
++          lsrmi       r8, #16
++          vdup.8      d2, r12
++          itt         mi
++          orrmi       r8, r8, r9, lsl #16
++          ldrmi       r9, [r1, #2]!
++        vrshrn.u16  d6, q2, #5
++          it          mi
++          vmovmi      d1, r8, r9
++          vdup.8      d3, r6
++        subs        r5, #1
++        vst1.16     {d6}, [r0], r3
++        bne         1b
++
++          vmull.u8    q2, d0, d2
++          vmlal.u8    q2, d1, d3
++          vrshrn.u16  d6, q2, #5
++          vst1.16     {d6}, [r0]
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_c_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_c_8_neon_8, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        mov         r1,  r2             @ save r2 - r1 unused by patch_down
++
++        bl          patch_h_down_c_4x4_8
++        bl          patch_h_down_c_4x4_8_continue
++
++        add         r2, r1, #4*2        @ restore r2, but 4 rows further down left
++        sub         r0, #16
++        mov         r6, r4
++        add         r0, r0, r3, lsl #2
++
++        bl          patch_h_down_c_4x4_8
++        bl          patch_h_down_c_4x4_8_continue
++
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++
++        push        {r2}
++        bl          patch_h_up_c_4x4_8
++        bl          patch_h_up_c_4x4_8_continue
++        pop         {r2}
++
++        sub         r0, #16
++        mov         r10, #-128
++        add         r2, #8
++        add         r0, r0, r3, lsl #2
++        sub         r10, r10, r7, lsl #2
++
++        bl          patch_h_up_c_4x4_8
++        bl          patch_h_up_c_4x4_8_continue
++
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        vld1.8      {q9}, [r1]
++        sub         r1, r2, #2
++        rsb         r12, r6, #32
++        ldrh        r7, [r7]
++        vdup.8      d6, r6
++        vext.8      q8, q9, q9, #14
++        sub         r8, r7, #128
++        vld1.16     {d16[0]}, [r1]
++        vdup.8      d7, r12
++        mov         r5, #7
++1:
++        subs        r12, r4
++        vmull.u8    q0, d18, d7
++        it          cc
++        asrcc       r1, r8, #8
++        vmlal.u8    q0, d16, d6
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q1, d19, d7
++        it          cc
++        addcc       r1, r2, r1, lsl #1
++        vmlal.u8    q1, d17, d6
++        rsb         r6, r12, #32
++        vext.8      q10, q8, q8, #14
++        sub         r5, #1
++        vld1.16     {d20[0]}, [r1]
++        it          cc
++        addcc       r8, r7
++        vmov        q11, q8
++        teq         r5, #0
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vst1.8      {q0}, [r0], r3
++        bhi         1b
++        beq         4f
++2:
++        subs        r12, r4
++        vmull.u8    q0, d22, d7
++        it          cc
++        asrcc       r1, r8, #8
++        vmlal.u8    q0, d20, d6
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q1, d23, d7
++        it          cc
++        addcc       r1, r2, r1, lsl #1
++        vmlal.u8    q1, d21, d6
++        rsb         r6, r12, #32
++        vext.8      q8, q10, q10, #14
++        sub         r5, #1
++        vld1.16     {d16[0]}, [r1]
++        it          cc
++        addcc       r8, r7
++        vmov        q9, q10
++        teq         r5, #0
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vst1.8      {q0}, [r0], r3
++        bhi         2b
++        bne         1b
++        bcc         5f
++3:
++        vmull.u8    q0, d22, d7
++        vmlal.u8    q0, d20, d6
++        vmull.u8    q1, d23, d7
++        vmlal.u8    q1, d21, d6
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vst1.8      {q0}, [r0]
++
++        pop         {r4-r11, pc}
++4:
++        bcc         3b
++5:
++        vmull.u8    q0, d18, d7
++        vmlal.u8    q0, d16, d6
++        vmull.u8    q1, d19, d7
++        vmlal.u8    q1, d17, d6
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vst1.8      {q0}, [r0]
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        vld1.8      {q9}, [r1]!
++        rsb         r12, r6, #32
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vext.8      q8, q9, q9, #2
++        vld1.16     {d17[3]}, [r1]!
++        mov         r5, #7
++1:
++        vmull.u8    q0, d16, d6
++        subs        r12, r4
++        vmlal.u8    q0, d18, d7
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q1, d17, d6
++        rsb         r6, r12, #32
++        vmlal.u8    q1, d19, d7
++        sub         r5, #1
++        vext.8      q10, q8, q8, #2
++        teq         r5, #0
++        vld1.16     {d21[3]}, [r1]
++        it          cc
++        addcc       r1, #2
++        vmov        q11, q8
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vst1.8      {q0}, [r0], r3
++        bhi         1b
++        beq         4f
++2:
++        vmull.u8    q0, d20, d6
++        subs        r12, r4
++        vmlal.u8    q0, d22, d7
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q1, d21, d6
++        rsb         r6, r12, #32
++        vmlal.u8    q1, d23, d7
++        sub         r5, #1
++        vext.8      q8, q10, q10, #2
++        teq         r5, #0
++        vld1.16     {d17[3]}, [r1]
++        it          cc
++        addcc       r1, #2
++        vmov        q9, q10
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vdup.8      d6, r6
++        vdup.8      d7, r12
++        vst1.8      {q0}, [r0], r3
++        bhi         2b
++        bne         1b
++        bcc         5f
++3:
++        vmull.u8    q0, d20, d6
++        vmlal.u8    q0, d22, d7
++        vmull.u8    q1, d21, d6
++        vmlal.u8    q1, d23, d7
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vst1.8      {q0}, [r0]
++
++        pop         {r4-r11, pc}
++4:
++        bcc         3b
++5:
++        vmull.u8    q0, d16, d6
++        vmlal.u8    q0, d18, d7
++        vmull.u8    q1, d17, d6
++        vmlal.u8    q1, d19, d7
++        vrshrn.u16  d0, q0, #5
++        vrshrn.u16  d1, q1, #5
++        vst1.8      {q0}, [r0]
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_c_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_c_16_neon_8, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        mov         r10, #4
++        mov         r1, r2
++1:
++        bl          patch_h_down_c_4x4_8
++        bl          patch_h_down_c_4x4_8_continue
++        bl          patch_h_down_c_4x4_8_continue
++        bl          patch_h_down_c_4x4_8_continue
++
++        add         r2, r1, #4*2         @ restore r2, but 4 rows further down left
++        add         r1, r1, #4*2
++        mov         r6, r4
++        sub         r0, #32
++        subs        r10, #1
++        add         r0, r0, r3, lsl #2
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++        vmov.i8     d6, #1<<2
++1:
++        push        {r2, r10}
++        bl          patch_h_up_c_4x4_8
++        bl          patch_h_up_c_4x4_8_continue
++        bl          patch_h_up_c_4x4_8_continue
++        bl          patch_h_up_c_4x4_8_continue
++        pop         {r2, r10}
++
++        vmov        r8, s12
++        sub         r0, #32
++        add         r2, #8
++        add         r0, r0, r3, lsl #2
++        sub         r10, r10, r7, lsl #2
++        vshr.u8     d6, #1
++        teq         r8, #0
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        vld1.8      {q0-q1}, [r1]
++        sub         r9, r2, #2
++        rsb         r12, r6, #32
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        vdup.8      d18, r6
++        vdup.8      d19, r12
++        mov         r5, #16
++1:
++        vld1.16     {d17[3]}, [r9]
++        add         r8, r7
++        vmov        q2, q0
++        vmov        q3, q1
++        asr         r9, r8, #8
++        vext.8      q1, q0, q1, #14
++        add         r9, r2, r9, lsl #1
++        vext.8      q0, q8, q0, #14
++2:
++        vmull.u8    q10, d4, d19
++        subs        r12, r4
++        vmlal.u8    q10, d0, d18
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q11, d5, d19
++        rsb         r6, r12, #32
++        vmlal.u8    q11, d1, d18
++        sub         r5, #1
++        vmull.u8    q12, d6, d19
++        teq         r5, #0
++        vmlal.u8    q12, d2, d18
++        vmull.u8    q13, d7, d19
++        vmlal.u8    q13, d3, d18
++        vdup.8      d18, r6
++        vdup.8      d19, r12
++        vrshrn.u16  d20, q10, #5
++        vrshrn.u16  d21, q11, #5
++        vrshrn.u16  d22, q12, #5
++        vrshrn.u16  d23, q13, #5
++        vst1.8      {q10-q11}, [r0], r3
++        bhi         2b
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        add         r5, r1, #32
++        vld1.8      {q0-q1}, [r1]!
++        rsb         r12, r6, #32
++        vld1.16     {d16[0]}, [r5]
++        mov         r5, #16
++        vdup.8      d18, r6
++        vdup.8      d19, r12
++1:
++        vmov        q2, q0
++        add         r1, #2
++        vmov        q3, q1
++        vext.8      q0, q0, q1, #2
++        vext.8      q1, q1, q8, #2
++2:
++        vmull.u8    q10, d0, d18
++        subs        r12, r4
++        vmlal.u8    q10, d4, d19
++        it          cc
++        addcc       r12, #32
++        vmull.u8    q11, d1, d18
++        rsb         r6, r12, #32
++        vmlal.u8    q11, d5, d19
++        sub         r5, #1
++        vmull.u8    q12, d2, d18
++        teq         r5, #0
++        vmlal.u8    q12, d6, d19
++        vmull.u8    q13, d3, d18
++        vmlal.u8    q13, d7, d19
++        vld1.16     {d16[0]}, [r1]
++        vdup.8      d18, r6
++        vdup.8      d19, r12
++        vrshrn.u16  d20, q10, #5
++        vrshrn.u16  d21, q11, #5
++        vrshrn.u16  d22, q12, #5
++        vrshrn.u16  d23, q13, #5
++        vst1.8      {q10-q11}, [r0], r3
++        bhi         2b
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++@------------------------------------------------------------------------------
++@ Data
++
++        .text
++        .balign  64
++angle_2:
++        .byte    32
++        .byte    26,  21,  17,  13,   9,   5,   2,   0
++        @ Sign inverted from standards table
++        .byte     2,   5,   9,  13,  17,  21,  26,  32
++        .byte    26,  21,  17,  13,   9,   5,   2,   0
++        @ Standard sign
++        .byte     2,   5,   9,  13,  17,  21,  26,  32
++
++        .balign   2
++
++        @ Sign inverted from standards table
++inv_angle:
++        .short   4096, 1638,  910,  630,  482,  390,  315
++        .short    256
++        .short    315,  390,  482,  630,  910, 1638, 4096
++
++@------------------------------------------------------------------------------
++@
++@ 10 bit fns
++@ Should work for 9 & 11 bit as there is no actual bit-depth specific code
++@ but runs out of register width for 12+ bit
++
++        .text
++        .balign 64
++
++patch_h_down_4x4_10:
++        ldrd        r8, r9, [r2]        @ Left
++        rsb         r12, r6, #32
++        vmov        d0, r8, r9
++        vdup.16     d3, r6
++        lsr         r8, #16
++        vdup.16     d2, r12
++        orr         r8, r8, r9, lsl #16
++        ldr         r9, [r2, #6]!
++        vmov        d1, r8, r9
++        // drop through...
++patch_h_down_4x4_10_continue:
++        mov         r5, #4
++1:
++          subs        r12, r4
++        vmul.u16    d4, d0, d2
++          it          mi
++          addmi       r12, #32
++        vmla.u16    d4, d1, d3
++          rsb         r6, r12, #32
++        vext.16     q8, q8, q9, #4
++          it          mi
++          lsrmi       r7, r8, #16
++        vmov        d18, d19
++          it          mi
++          vmovmi      d0, r8, r9
++          vdup.16     d2, r12
++          it          mi
++          orrmi       r8, r7, r9, lsl #16
++        vrshr.u16   d19, d4, #5
++          itt         mi
++          ldrmi       r9, [r2, #2]!
++          vmovmi      d1, r8, r9
++        subs        r5, #1
++          vdup.16     d3, r6
++        bne         1b
++        // drop through...
++store_tran_4x4_10:
++        vzip.16     d16, d17
++        add         r6, r0, r3
++        vzip.16     d18, d19
++        lsl         r3, #1
++        vzip.32     q8, q9
++        add         r5, r0, r3
++        vst1.16     {d16}, [r0]!
++        vst1.16     {d17}, [r6], r3
++        vst1.16     {d18}, [r5]
++        asr         r3, #1
++        vst1.16     {d19}, [r6]
++
++        bx          lr
++
++patch_h_up_4x4_10:
++        ldrd        r8, r9, [r2]
++        rsb         r6, r4, #32
++        vmov        d0, r8, r9
++        vdup.16     d3, r4
++        lsr         r11, r8, #16
++        vdup.16     d2, r6
++        ldr         r8, [r2, #-2]!
++        orr         r9, r11, r9, lsl #16
++        vmov        d1, r8, r9
++        mov         r12, r4
++        vmul.u16    d4, d0, d2
++        vmla.u16    d4, d1, d3
++patch_h_up_4x4_10_continue:
++        mov         r5, #4
++1:
++          add         r12, r4
++          cmp         r12, #33
++          it          cs
++          addcs       r10, r7
++          mov         r11, #0
++          itt         cs
++          subcs       r12, #32
++          tstcs       r10, #1<<31
++          rsb         r6, r12, #32
++          it          eq
++          asreq       r11, r10, #7
++          it          cs
++          vmovcs      d0, r8, r9
++          it          eq
++          biceq       r11, #1
++          vdup.16     d2, r6
++          it          cs
++          lsrcs       r6, r8, #16
++          vdup.16     d3, r12
++        vext.16     q8, q8, q9, #4
++          itt         cs
++          orrcs       r9, r6, r9, lsl #16
++          ldrhcs      r11, [r1, r11]
++        vmov        d18, d19
++          it          hi
++          ldrhhi      r11, [r2, #-2]!
++        vrshr.u16   d19, d4, #5
++          itt         cs
++          orrcs       r8, r11, r8, lsl #16
++          vmovcs      d1, r8, r9
++          vmul.u16    d4, d0, d2
++        subs        r5, #1
++          vmla.u16    d4, d1, d3
++        bne         1b
++
++        b           store_tran_4x4_10
++
++
++@ ff_hevc_rpi_pred_angular_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_4_neon_10, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        bl          patch_h_down_4x4_10
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++        bl          patch_h_up_4x4_10
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        ldrd        r8, r9, [r1]        @ Top
++        rsb         r12, r6, #32
++        ldrh        lr, [r2, #-2]       @ Top-left
++        ldrh        r7, [r7]
++        vmov        d0, r8, r9
++        lsl         r9, r9, #16
++        vdup.16     d2, r12
++        orr         r9, r9, r8, lsr #16
++        orr         r8, lr, r8, lsl #16
++        vmov        d1, r8, r9
++        sub         r1, r7, #128
++        mov         r5, #3
++1:
++        sel         lr, lr, lr          @ force pipeline 0 on Cortex-A53
++        vdup.16     d3, r6
++        vmul.u16    d4, d0, d2
++          subs        r12, r12, r4
++        vmla.u16    d4, d1, d3
++          itttt       mi
++          addmi       lr, r2, r1, asr #7
++          bicmi       lr, #1
++          addmi       r12, r12, #32
++          vmovmi      d0, r8, r9
++          rsb         r6, r12, #32
++          itt         mi
++          lslmi       r9, r9, #16
++          ldrhmi      lr, [lr]
++          vdup.16     d2, r12
++        vrshr.u16   d4, d4, #5
++          itttt       mi
++          orrmi       r9, r9, r8, lsr #16
++          orrmi       r8, lr, r8, lsl #16
++          vmovmi      d1, r8, r9
++          addmi       r1, r1, r7
++        subs        r5, r5, #1
++        vst1.16     {d4}, [r0], r3
++        bne         1b
++
++          vdup.16     d3, r6
++          nop                           @ force next insn into pipeline 0 to enable
++          vmul.u16    d4, d0, d2        @ vmla to execute back-to-back on Cortex-A53
++          vmla.u16    d4, d1, d3
++          vrshr.u16   d4, d4, #5
++          vst1.16     {d4}, [r0]
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        ldrd        r8, r9, [r1]        @ Top
++        rsb         r12, r6, #32
++        vmov        d0, r8, r9
++        vdup.16     d3, r6
++        lsr         r8, #16
++        vdup.16     d2, r12
++        orr         r8, r8, r9, lsl #16
++        ldr         r9, [r1, #6]!
++        vmov        d1, r8, r9
++        mov         r5, #3
++1:
++        vmul.u16    d4, d0, d2
++          subs        r12, r4
++        vmla.u16    d4, d1, d3
++          it          mi
++          addmi       r12, #32
++          rsb         r6, r12, #32
++          itt         mi
++          vmovmi      d0, r8, r9
++          lsrmi       r8, #16
++          vdup.16     d2, r12
++          itt         mi
++          orrmi       r8, r8, r9, lsl #16
++          ldrmi       r9, [r1, #2]!
++        vrshr.u16   d4, d4, #5
++          it          mi
++          vmovmi      d1, r8, r9
++          vdup.16     d3, r6
++        subs        r5, #1
++        vst1.16     {d4}, [r0], r3
++        bne         1b
++
++          vmul.u16    d4, d0, d2
++          vmla.u16    d4, d1, d3
++          vrshr.u16   d4, d4, #5
++          vst1.16     {d4}, [r0]
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_8_neon_10, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        mov         r1,  r2             @ save r2 - r1 unused by patch_down
++
++        bl          patch_h_down_4x4_10
++        bl          patch_h_down_4x4_10_continue
++
++        add         r2, r1, #4*2        @ restore r2, but 4 rows further down left
++        sub         r0, #16
++        mov         r6, r4
++        add         r0, r0, r3, lsl #2
++
++        bl          patch_h_down_4x4_10
++        bl          patch_h_down_4x4_10_continue
++
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++
++        push        {r2}
++        bl          patch_h_up_4x4_10
++        bl          patch_h_up_4x4_10_continue
++        pop         {r2}
++
++        sub         r0, #16
++        mov         r10, #-128
++        add         r2, #8
++        add         r0, r0, r3, lsl #2
++        sub         r10, r10, r7, lsl #2
++
++        bl          patch_h_up_4x4_10
++        bl          patch_h_up_4x4_10_continue
++
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        vld1.16     {q9}, [r1]
++        sub         r1, r2, #2
++        rsb         r12, r6, #32
++        ldrh        r7, [r7]
++        vdup.16     q2, r6
++        vext.16     q8, q9, q9, #7
++        sub         r8, r7, #128
++        vld1.16     {d16[0]}, [r1]
++        vdup.16     q3, r12
++        mov         r5, #7
++1:
++        vmul.u16    q0, q9, q3
++        subs        r12, r4
++        vmla.u16    q0, q8, q2
++        ittt        cc
++        asrcc       r1, r8, #8
++        addcc       r12, #32
++        addcc       r1, r2, r1, lsl #1
++        vext.16     q10, q8, q8, #7
++        rsb         r6, r12, #32
++        vmov        q11, q8
++        sub         r5, #1
++        vrshr.u16   q0, q0, #5
++        it          cc
++        addcc       r8, r7
++        vld1.16     {d20[0]}, [r1]
++        teq         r5, #0
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vst1.16     {q0}, [r0], r3
++        bhi         1b
++        beq         4f
++2:
++        vmul.u16    q0, q11, q3
++        subs        r12, r4
++        vmla.u16    q0, q10, q2
++        ittt        cc
++        asrcc       r1, r8, #8
++        addcc       r12, #32
++        addcc       r1, r2, r1, lsl #1
++        vext.16     q8, q10, q10, #7
++        rsb         r6, r12, #32
++        vmov        q9, q10
++        sub         r5, #1
++        vrshr.u16   q0, q0, #5
++        it          cc
++        addcc       r8, r7
++        vld1.16     {d16[0]}, [r1]
++        teq         r5, #0
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vst1.16     {q0}, [r0], r3
++        bhi         2b
++        bne         1b
++        bcc         5f
++3:
++        vmul.u16    q0, q11, q3
++        vmla.u16    q0, q10, q2
++        vrshr.u16   q0, q0, #5
++        vst1.16     {q0}, [r0]
++
++        pop         {r4-r11, pc}
++4:
++        bcc         3b
++5:
++        vmul.u16    q0, q9, q3
++        vmla.u16    q0, q8, q2
++        vrshr.u16   q0, q0, #5
++        vst1.16     {q0}, [r0]
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        vld1.16     {q9}, [r1]!
++        rsb         r12, r6, #32
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vext.16     q8, q9, q9, #1
++        vld1.16     {d17[3]}, [r1]!
++        mov         r5, #7
++1:
++        vmul.u16    q0, q8, q2
++        subs        r12, r4
++        vmla.u16    q0, q9, q3
++        it          cc
++        addcc       r12, #32
++        vext.16     q10, q8, q8, #1
++        rsb         r6, r12, #32
++        vld1.16     {d21[3]}, [r1]
++        sub         r5, #1
++        vmov        q11, q8
++        teq         r5, #0
++        vrshr.u16   q0, q0, #5
++        it          cc
++        addcc       r1, #2
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vst1.16     {q0}, [r0], r3
++        bhi         1b
++        beq         4f
++2:
++        vmul.u16    q0, q10, q2
++        subs        r12, r4
++        vmla.u16    q0, q11, q3
++        it          cc
++        addcc       r12, #32
++        vext.16     q8, q10, q10, #1
++        rsb         r6, r12, #32
++        vld1.16     {d17[3]}, [r1]
++        sub         r5, #1
++        vmov        q9, q10
++        teq         r5, #0
++        vrshr.u16   q0, q0, #5
++        it          cc
++        addcc       r1, #2
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vst1.16     {q0}, [r0], r3
++        bhi         2b
++        bne         1b
++        bcc         5f
++3:
++        vmul.u16    q0, q10, q2
++        vmla.u16    q0, q11, q3
++        vrshr.u16   q0, q0, #5
++        vst1.16     {q0}, [r0]
++
++        pop         {r4-r11, pc}
++4:
++        bcc         3b
++5:
++        vmul.u16    q0, q8, q2
++        vmla.u16    q0, q9, q3
++        vrshr.u16   q0, q0, #5
++        vst1.16     {q0}, [r0]
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_16_neon_10, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #1
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        mov         r10, #4
++        mov         r1, r2
++1:
++        bl          patch_h_down_4x4_10
++        bl          patch_h_down_4x4_10_continue
++        bl          patch_h_down_4x4_10_continue
++        bl          patch_h_down_4x4_10_continue
++
++        add         r2, r1, #4*2         @ restore r2, but 4 rows further down left
++        add         r1, r1, #4*2
++        mov         r6, r4
++        sub         r0, #32
++        subs        r10, #1
++        add         r0, r0, r3, lsl #2
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r10, #-128
++        vmov.i8     d6, #1<<2
++1:
++        push        {r2, r10}
++        bl          patch_h_up_4x4_10
++        bl          patch_h_up_4x4_10_continue
++        bl          patch_h_up_4x4_10_continue
++        bl          patch_h_up_4x4_10_continue
++        pop         {r2, r10}
++
++        vmov        r8, s12
++        sub         r0, #32
++        add         r2, #8
++        add         r0, r0, r3, lsl #2
++        sub         r10, r10, r7, lsl #2
++        vshr.u8     d6, #1
++        teq         r8, #0
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        vld1.16     {q0-q1}, [r1]
++        sub         r9, r2, #2
++        rsb         r12, r6, #32
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        vdup.16     q9, r6
++        vdup.16     q10, r12
++        mov         r5, #16
++1:
++        vld1.16     {d17[3]}, [r9]
++        add         r8, r7
++        vmov        q2, q0
++        vmov        q3, q1
++        asr         r9, r8, #8
++        vext.16     q1, q0, q1, #7
++        add         r9, r2, r9, lsl #1
++        vext.16     q0, q8, q0, #7
++2:
++        vmul.u16    q11, q2, q10
++        subs        r12, r4
++        vmla.u16    q11, q0, q9
++        it          cc
++        addcc       r12, #32
++        vmul.u16    q12, q3, q10
++        rsb         r6, r12, #32
++        vmla.u16    q12, q1, q9
++        sub         r5, #1
++        teq         r5, #0
++        vdup.16     q9, r6
++        vdup.16     q10, r12
++        vrshr.u16   q11, q11, #5
++        vrshr.u16   q12, q12, #5
++        vst1.16     {q11-q12}, [r0], r3
++        bhi         2b
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        add         r5, r1, #32
++        vld1.16     {q0-q1}, [r1]!
++        rsb         r12, r6, #32
++        vld1.16     {d16[0]}, [r5]
++        mov         r5, #16
++        vdup.16     q9, r6
++        vdup.16     q10, r12
++1:
++        vmov        q2, q0
++        add         r1, #2
++        vmov        q3, q1
++        vext.16     q0, q0, q1, #1
++        vext.16     q1, q1, q8, #1
++2:
++        vmul.u16    q11, q0, q9
++        subs        r12, r4
++        vmla.u16    q11, q2, q10
++        it          cc
++        addcc       r12, #32
++        vmul.u16    q12, q1, q9
++        rsb         r6, r12, #32
++        vmla.u16    q12, q3, q10
++        sub         r5, #1
++        vld1.16     {d16[0]}, [r1]
++        teq         r5, #0
++        vdup.16     q9, r6
++        vdup.16     q10, r12
++        vrshr.u16   q11, q11, #5
++        vrshr.u16   q12, q12, #5
++        vst1.16     {q11-q12}, [r0], r3
++        bhi         2b
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_32_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_32_neon_10, export=1
++        ldr         r12, [sp]
++        push        {r4-r11, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #1
++        vpush       {d8}
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        add         sp, #8
++        mov         r10, #8
++        mov         r1, r2
++1:
++        bl          patch_h_down_4x4_10
++        bl          patch_h_down_4x4_10_continue
++        bl          patch_h_down_4x4_10_continue
++        bl          patch_h_down_4x4_10_continue
++        bl          patch_h_down_4x4_10_continue
++        bl          patch_h_down_4x4_10_continue
++        bl          patch_h_down_4x4_10_continue
++        bl          patch_h_down_4x4_10_continue
++
++        add         r2, r1, #4*2         @ restore r2, but 4 rows further down left
++        add         r1, r1, #4*2
++        mov         r6, r4
++        sub         r0, #64
++        subs        r10, #1
++        add         r0, r0, r3, lsl #2
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Up of Horizontal - works down up
++10:
++        add         sp, #8
++        ldrh        r7, [r7]
++        mov         r10, #-128
++        vmov.i8     d6, #1<<6
++1:
++        push        {r2, r10}
++        bl          patch_h_up_4x4_10
++        bl          patch_h_up_4x4_10_continue
++        bl          patch_h_up_4x4_10_continue
++        bl          patch_h_up_4x4_10_continue
++        bl          patch_h_up_4x4_10_continue
++        bl          patch_h_up_4x4_10_continue
++        bl          patch_h_up_4x4_10_continue
++        bl          patch_h_up_4x4_10_continue
++        pop         {r2, r10}
++
++        vmov        r8, s12
++        sub         r0, #64
++        add         r2, #8
++        add         r0, r0, r3, lsl #2
++        sub         r10, r10, r7, lsl #2
++        vshr.u8     d6, #1
++        teq         r8, #0
++        bne         1b
++
++        pop         {r4-r11, pc}
++
++@ Left of vertical - works down left
++18:
++        add         r5, r1, #32
++        vld1.16     {q1-q2}, [r1]
++        rsb         r12, r6, r6, lsl #16
++        vld1.16     {q3-q4}, [r5]
++        sub         r9, r2, #2
++        rsb         r4, r12, #0
++        rsb         r12, r12, #32 << 16
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        vmov        d0, d9
++        vmov        s2, r12
++        add         r10, r0, #32
++        mov         r5, #32
++1:
++        vld1.16     {d1[3]}, [r9]
++        add         r8, r7
++        vmov        q11, q4
++        vmov        q10, q3
++        asr         r9, r8, #8
++        vmov        q9, q2
++        add         r9, r2, r9, lsl #1
++        vmov        q8, q1
++        vext.16     q4, q3, q4, #7
++        vext.16     q3, q2, q3, #7
++        vext.16     q2, q1, q2, #7
++        vext.16     q1, q0, q1, #7
++2:
++        vmul.u16    q12, q8, d1[1]
++        adds        r12, r4
++        vmla.u16    q12, q1, d1[0]
++        it          cc
++        addcc       r12, #32 << 16
++        vmul.u16    q13, q9, d1[1]
++        it          cc
++        subcc       r12, #32
++        vmla.u16    q13, q2, d1[0]
++        sub         r5, #1
++        vmul.u16    q14, q10, d1[1]
++        teq         r5, #0
++        vmla.u16    q14, q3, d1[0]
++        vmul.u16    q15, q11, d1[1]
++        vmla.u16    q15, q4, d1[0]
++        vmov        s2, r12
++        vrshr.u16   q12, q12, #5
++        vrshr.u16   q13, q13, #5
++        vrshr.u16   q14, q14, #5
++        vrshr.u16   q15, q15, #5
++        vst1.16     {q12-q13}, [r0], r3
++        vst1.16     {q14-q15}, [r10], r3
++        bhi         2b
++        bne         1b
++
++        vpop        {d8}
++        vmov        d9, d0
++        pop         {r4-r11, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        add         r5, r1, #32
++        vld1.16     {q1-q2}, [r1]
++        rsb         r12, r6, r6, lsl #16
++        vld1.16     {q3-q4}, [r5]
++        add         r1, r1, #64
++        rsb         r4, r12, #0
++        rsb         r12, r12, #32 << 16
++        vmov        d1, d9
++        vmov        s1, r12
++        add         r10, r0, #32
++        mov         r5, #32
++1:
++        vld1.16     {d0[0]}, [r1]!
++        vmov        q8, q1
++        vmov        q9, q2
++        vmov        q10, q3
++        vmov        q11, q4
++        vext.16     q1, q1, q2, #1
++        vext.16     q2, q2, q3, #1
++        vext.16     q3, q3, q4, #1
++        vext.16     q4, q4, q0, #1
++2:
++        vmul.u16    q12, q1, d0[2]
++        adds        r12, r4
++        vmla.u16    q12, q8, d0[3]
++        it          cc
++        addcc       r12, #32 << 16
++        vmul.u16    q13, q2, d0[2]
++        it          cc
++        subcc       r12, #32
++        vmla.u16    q13, q9, d0[3]
++        sub         r5, #1
++        vmul.u16    q14, q3, d0[2]
++        teq         r5, #0
++        vmla.u16    q14, q10, d0[3]
++        vmul.u16    q15, q4, d0[2]
++        vmla.u16    q15, q11, d0[3]
++        vmov        s1, r12
++        vrshr.u16   q12, q12, #5
++        vrshr.u16   q13, q13, #5
++        vrshr.u16   q14, q14, #5
++        vrshr.u16   q15, q15, #5
++        vst1.16     {q12-q13}, [r0], r3
++        vst1.16     {q14-q15}, [r10], r3
++        bhi         2b
++        bne         1b
++
++        vpop        {d8}
++        vmov        d9, d1
++        pop         {r4-r11, pc}
++
++endfunc
++
++
++
++@ Generate 4x4 chroma patch
++@
++@ In (const)
++@ r1   Up ptr (_up only)
++@ r3   Out stride
++@ r4   Angle add
++@ r7   Inv angle (_up only)
++@
++@ In/Out (updated)
++@ r0   Out pointer - on exit point to start of next patch horizontally (i.e. r0 + patch width)
++@ r2   Left ptr - updated
++@ r6   Angle frac (init to r4 + 32)
++@ r8   Inv angle accumulator
++@ q2   Cur Line - load before 1st call for down - set by _up
++@ q8   Cur Line - load before 1st call for up   - set by _down
++@
++@ Temps
++@ r5   Loop counter
++@ r12
++@ d0, q1, q12-q15
++
++patch_h_down_c_4x4_10:
++        vld1.16     {q12}, [r2]!
++        rsb         r12, r6, #32
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        mov         r5, #4
++1:
++        vmov        q13, q12
++        vext.16     q12, q12, q12, #2
++        vld1.32     {d25[1]}, [r2]!
++patch_h_down_c_4x4_10_continue:
++2:
++        vmov        q8, q9
++        subs        r12, r4
++        vmul.u16    q0, q13, q3
++        it          cc
++        addcc       r12, #32
++        vmla.u16    q0, q12, q2
++        rsb         r6, r12, #32
++        vmov        q9, q10
++        sub         r5, #1
++        vmov        q10, q11
++        teq         r5, #0
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vrshr.u16   q11, q0, #5
++        bhi         2b
++        bne         1b
++
++        bcs         3f
++        vmov        q13, q12
++        vext.16     q12, q12, q12, #2
++        vld1.32     {d25[1]}, [r2]!
++3:
++
++store_tran_c_4x4_10:
++T       add         r6, r0, r3
++        vzip.32     q8, q10
++A       add         r6, r0, r3
++T       lsl         r3, #1
++        vzip.32     q9, q11
++A       add         r5, r0, r3, lsl #1
++T       add         r5, r0, r3
++        vst2.32     {d16,d18}, [r0]!
++A       lsl         r3, #1
++        vst2.32     {d17,d19}, [r6], r3
++        asr         r3, #1
++        vst2.32     {d20,d22}, [r5]
++        mov         r5, #4
++        vst2.32     {d21,d23}, [r6]
++        bx          lr
++
++patch_h_up_c_4x4_10:
++        vld1.16     {q1}, [r2]
++        rsb         r12, r6, #32
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        mov         r5, #4
++1:
++        adds        r8, r7
++        vmov        q12, q1
++        it          mi
++        ldrmi       r6, [r2, #-4]!
++        vext.16     q1, q1, q1, #6
++        itt         pl
++        asrpl       r6, r8, #8
++        ldrpl       r6, [r1, r6, lsl #2]
++        vmov        s4, r6
++patch_h_up_c_4x4_10_continue:
++2:
++        vmov        q8, q9
++        subs        r12, r4
++        vmul.u16    q0, q12, q3
++        it          cc
++        addcc       r12, #32
++        vmla.u16    q0, q1, q2
++        rsb         r6, r12, #32
++        vmov        q9, q10
++        sub         r5, #1
++        vmov        q10, q11
++        teq         r5, #0
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vrshr.u16   q11, q0, #5
++        bhi         2b
++        bne         1b
++
++        bcs         store_tran_c_4x4_10
++        adds        r8, r7
++        vmov        q12, q1
++        it          mi
++        ldrmi       r6, [r2, #-4]!
++        vext.16     q1, q1, q1, #6
++        itt         pl
++        asrpl       r6, r8, #8
++        ldrpl       r6, [r1, r6, lsl #2]
++        vmov        s4, r6
++        b           store_tran_c_4x4_10
++
++
++@ ff_hevc_rpi_pred_angular_c_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_c_4_neon_10, export=1
++        ldr         r12, [sp]
++        push        {r4-r8, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #2
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        bl          patch_h_down_c_4x4_10
++        pop         {r4-r8, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        sub         r8, r7
++        bl          patch_h_up_c_4x4_10
++        pop         {r4-r8, pc}
++
++@ Left of vertical - works down left
++18:
++        vld1.16     {q9}, [r1]
++        sub         r1, r2, #4
++        rsb         r12, r6, #32
++        ldrh        r7, [r7]
++        vdup.16     q2, r6
++        vext.16     q8, q9, q9, #6
++        sub         r8, r7, #128
++        vld1.32     {d16[0]}, [r1]
++        vdup.16     q3, r12
++        mov         r5, #3
++1:
++        vmul.u16    q0, q9, q3
++        subs        r12, r4
++        vmla.u16    q0, q8, q2
++        ittt        cc
++        asrcc       r1, r8, #8
++        addcc       r12, #32
++        addcc       r1, r2, r1, lsl #2
++        vext.16     q10, q8, q8, #6
++        rsb         r6, r12, #32
++        vmov        q11, q8
++        sub         r5, #1
++        vrshr.u16   q0, q0, #5
++        it          cc
++        addcc       r8, r7
++        vld1.32     {d20[0]}, [r1]
++        teq         r5, #0
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vst1.16     {q0}, [r0], r3
++        bhi         1b
++        beq         4f
++2:
++        vmul.u16    q0, q11, q3
++        subs        r12, r4
++        vmla.u16    q0, q10, q2
++        ittt        cc
++        asrcc       r1, r8, #8
++        addcc       r12, #32
++        addcc       r1, r2, r1, lsl #2
++        vext.16     q8, q10, q10, #6
++        rsb         r6, r12, #32
++        vmov        q9, q10
++        sub         r5, #1
++        vrshr.u16   q0, q0, #5
++        it          cc
++        addcc       r8, r7
++        vld1.32     {d16[0]}, [r1]
++        teq         r5, #0
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vst1.16     {q0}, [r0], r3
++        bhi         2b
++        bne         1b
++        bcc         5f
++3:
++        vmul.u16    q0, q11, q3
++        vmla.u16    q0, q10, q2
++        vrshr.u16   q0, q0, #5
++        vst1.16     {q0}, [r0]
++
++        pop         {r4-r8, pc}
++4:
++        bcc         3b
++5:
++        vmul.u16    q0, q9, q3
++        vmla.u16    q0, q8, q2
++        vrshr.u16   q0, q0, #5
++        vst1.16     {q0}, [r0]
++
++        pop         {r4-r8, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        vld1.16     {q9}, [r1]!
++        rsb         r12, r6, #32
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vext.16     q8, q9, q9, #2
++        vld1.32     {d17[1]}, [r1]!
++        mov         r5, #3
++1:
++        vmul.u16    q0, q8, q2
++        subs        r12, r4
++        vmla.u16    q0, q9, q3
++        it          cc
++        addcc       r12, #32
++        vext.16     q10, q8, q8, #2
++        rsb         r6, r12, #32
++        vld1.32     {d21[1]}, [r1]
++        sub         r5, #1
++        vmov        q11, q8
++        teq         r5, #0
++        vrshr.u16   q0, q0, #5
++        it          cc
++        addcc       r1, #4
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vst1.16     {q0}, [r0], r3
++        bhi         1b
++        beq         4f
++2:
++        vmul.u16    q0, q10, q2
++        subs        r12, r4
++        vmla.u16    q0, q11, q3
++        it          cc
++        addcc       r12, #32
++        vext.16     q8, q10, q10, #2
++        rsb         r6, r12, #32
++        vld1.32     {d17[1]}, [r1]
++        sub         r5, #1
++        vmov        q9, q10
++        teq         r5, #0
++        vrshr.u16   q0, q0, #5
++        it          cc
++        addcc       r1, #4
++        vdup.16     q2, r6
++        vdup.16     q3, r12
++        vst1.16     {q0}, [r0], r3
++        bhi         2b
++        bne         1b
++        bcc         5f
++3:
++        vmul.u16    q0, q10, q2
++        vmla.u16    q0, q11, q3
++        vrshr.u16   q0, q0, #5
++        vst1.16     {q0}, [r0]
++
++        pop         {r4-r8, pc}
++4:
++        bcc         3b
++5:
++        vmul.u16    q0, q8, q2
++        vmla.u16    q0, q9, q3
++        vrshr.u16   q0, q0, #5
++        vst1.16     {q0}, [r0]
++
++        pop         {r4-r8, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_c_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_c_8_neon_10, export=1
++        ldr         r12, [sp]
++        push        {r4-r8, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #2
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        mov         r1,  r2             @ save r2 - r1 unused by patch_down
++
++        bl          patch_h_down_c_4x4_10
++        bl          patch_h_down_c_4x4_10_continue
++
++        add         r2, r1, #4*4        @ restore r2, but 4 rows further down left
++        sub         r0, #32
++        mov         r6, r4
++        add         r0, r0, r3, lsl #2
++
++        bl          patch_h_down_c_4x4_10
++        bl          patch_h_down_c_4x4_10_continue
++
++        pop         {r4-r8, pc}
++
++@ Up of Horizontal - works down up
++10:
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        sub         r8, r7
++
++        push        {r2, r8}
++        bl          patch_h_up_c_4x4_10
++        bl          patch_h_up_c_4x4_10_continue
++        pop         {r2, r8}
++
++        sub         r0, #32
++        mov         r6, r4
++        add         r2, #16
++        sub         r8, r8, r7, lsl #2
++        add         r0, r0, r3, lsl #2
++
++        bl          patch_h_up_c_4x4_10
++        bl          patch_h_up_c_4x4_10_continue
++
++        pop         {r4-r8, pc}
++
++@ Left of vertical - works down left
++18:
++        vld1.16     {q0-q1}, [r1]
++        sub         r9, r2, #4
++        rsb         r12, r6, #32
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        vdup.16     q9, r6
++        vdup.16     q10, r12
++        mov         r5, #8
++1:
++        vld1.32     {d17[1]}, [r9]
++        add         r8, r7
++        vmov        q2, q0
++        vmov        q3, q1
++        asr         r9, r8, #8
++        vext.16     q1, q0, q1, #6
++        add         r9, r2, r9, lsl #2
++        vext.16     q0, q8, q0, #6
++2:
++        vmul.u16    q11, q2, q10
++        subs        r12, r4
++        vmla.u16    q11, q0, q9
++        it          cc
++        addcc       r12, #32
++        vmul.u16    q12, q3, q10
++        rsb         r6, r12, #32
++        vmla.u16    q12, q1, q9
++        sub         r5, #1
++        teq         r5, #0
++        vdup.16     q9, r6
++        vdup.16     q10, r12
++        vrshr.u16   q11, q11, #5
++        vrshr.u16   q12, q12, #5
++        vst1.16     {q11-q12}, [r0], r3
++        bhi         2b
++        bne         1b
++
++        pop         {r4-r8, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        add         r5, r1, #32
++        vld1.16     {q0-q1}, [r1]!
++        rsb         r12, r6, #32
++        vld1.32     {d16[0]}, [r5]
++        mov         r5, #8
++        vdup.16     q9, r6
++        vdup.16     q10, r12
++1:
++        vmov        q2, q0
++        add         r1, #4
++        vmov        q3, q1
++        vext.16     q0, q0, q1, #2
++        vext.16     q1, q1, q8, #2
++2:
++        vmul.u16    q11, q0, q9
++        subs        r12, r4
++        vmla.u16    q11, q2, q10
++        it          cc
++        addcc       r12, #32
++        vmul.u16    q12, q1, q9
++        rsb         r6, r12, #32
++        vmla.u16    q12, q3, q10
++        sub         r5, #1
++        vld1.32     {d16[0]}, [r1]
++        teq         r5, #0
++        vdup.16     q9, r6
++        vdup.16     q10, r12
++        vrshr.u16   q11, q11, #5
++        vrshr.u16   q12, q12, #5
++        vst1.16     {q11-q12}, [r0], r3
++        bhi         2b
++        bne         1b
++
++        pop         {r4-r8, pc}
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_angular_c_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride        [r3]
++@       unsigned int mode       [sp, #0]  2..34
++
++function ff_hevc_rpi_pred_angular_c_16_neon_10, export=1
++        ldr         r12, [sp]
++        push        {r4-r10, lr}
++        ADRT        r4, angle_2 - 2
++        ADRT        r7, inv_angle - 11*2
++        add         r7, r7, r12, lsl #1
++        lsl         r3, #2
++        vpush       {d8}
++        ldrsb       r6, [r4, r12]
++        cmp         r12, #26
++        ldrsb       r4, [r4, r12]
++        bge         26f
++        cmp         r12, #18
++        bge         18f
++        cmp         r12, #10
++        bge         10f
++
++@ Down of Horizontal - works down left
++        add         sp, #8
++        mov         r10, #4
++        mov         r1, r2
++1:
++        bl          patch_h_down_c_4x4_10
++        bl          patch_h_down_c_4x4_10_continue
++        bl          patch_h_down_c_4x4_10_continue
++        bl          patch_h_down_c_4x4_10_continue
++
++        add         r2, r1, #4*4         @ restore r2, but 4 rows further down left
++        add         r1, r1, #4*4
++        mov         r6, r4
++        sub         r0, #64
++        subs        r10, #1
++        add         r0, r0, r3, lsl #2
++        bne         1b
++
++        pop         {r4-r10, pc}
++
++@ Up of Horizontal - works down up
++10:
++        add         sp, #8
++        mov         r10, #4
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        sub         r8, r7
++2:
++        push        {r2, r8}
++        bl          patch_h_up_c_4x4_10
++        bl          patch_h_up_c_4x4_10_continue
++        bl          patch_h_up_c_4x4_10_continue
++        bl          patch_h_up_c_4x4_10_continue
++        pop         {r2, r8}
++
++        sub         r0, #64
++        mov         r6, r4
++        add         r2, #16
++        sub         r8, r8, r7, lsl #2
++        add         r0, r0, r3, lsl #2
++        subs        r10, #1
++        bne         2b
++
++        pop         {r4-r10, pc}
++
++@ Left of vertical - works down left
++18:
++        add         r5, r1, #32
++        vld1.16     {q1-q2}, [r1]
++        rsb         r12, r6, r6, lsl #16
++        vld1.16     {q3-q4}, [r5]
++        sub         r9, r2, #4
++        rsb         r4, r12, #0
++        rsb         r12, r12, #32 << 16
++        ldrh        r7, [r7]
++        mov         r8, #-128
++        vmov        d0, d9
++        vmov        s2, r12
++        add         r10, r0, #32
++        mov         r5, #16
++1:
++        vld1.32     {d1[1]}, [r9]
++        add         r8, r7
++        vmov        q11, q4
++        vmov        q10, q3
++        asr         r9, r8, #8
++        vmov        q9, q2
++        add         r9, r2, r9, lsl #2
++        vmov        q8, q1
++        vext.16     q4, q3, q4, #6
++        vext.16     q3, q2, q3, #6
++        vext.16     q2, q1, q2, #6
++        vext.16     q1, q0, q1, #6
++2:
++        vmul.u16    q12, q8, d1[1]
++        adds        r12, r4
++        vmla.u16    q12, q1, d1[0]
++        it          cc
++        addcc       r12, #32 << 16
++        vmul.u16    q13, q9, d1[1]
++        it          cc
++        subcc       r12, #32
++        vmla.u16    q13, q2, d1[0]
++        sub         r5, #1
++        vmul.u16    q14, q10, d1[1]
++        teq         r5, #0
++        vmla.u16    q14, q3, d1[0]
++        vmul.u16    q15, q11, d1[1]
++        vmla.u16    q15, q4, d1[0]
++        vmov        s2, r12
++        vrshr.u16   q12, q12, #5
++        vrshr.u16   q13, q13, #5
++        vrshr.u16   q14, q14, #5
++        vrshr.u16   q15, q15, #5
++        vst1.16     {q12-q13}, [r0], r3
++        vst1.16     {q14-q15}, [r10], r3
++        bhi         2b
++        bne         1b
++
++        vpop        {d8}
++        vmov        d9, d0
++        pop         {r4-r10, pc}
++
++@ Right of vertical - works along top - left unused
++26:
++        add         r5, r1, #32
++        vld1.16     {q1-q2}, [r1]
++        rsb         r12, r6, r6, lsl #16
++        vld1.16     {q3-q4}, [r5]
++        add         r1, r1, #64
++        rsb         r4, r12, #0
++        rsb         r12, r12, #32 << 16
++        vmov        d1, d9
++        vmov        s1, r12
++        add         r10, r0, #32
++        mov         r5, #16
++1:
++        vld1.32     {d0[0]}, [r1]!
++        vmov        q8, q1
++        vmov        q9, q2
++        vmov        q10, q3
++        vmov        q11, q4
++        vext.16     q1, q1, q2, #2
++        vext.16     q2, q2, q3, #2
++        vext.16     q3, q3, q4, #2
++        vext.16     q4, q4, q0, #2
++2:
++        vmul.u16    q12, q1, d0[2]
++        adds        r12, r4
++        vmla.u16    q12, q8, d0[3]
++        it          cc
++        addcc       r12, #32 << 16
++        vmul.u16    q13, q2, d0[2]
++        it          cc
++        subcc       r12, #32
++        vmla.u16    q13, q9, d0[3]
++        sub         r5, #1
++        vmul.u16    q14, q3, d0[2]
++        teq         r5, #0
++        vmla.u16    q14, q10, d0[3]
++        vmul.u16    q15, q4, d0[2]
++        vmla.u16    q15, q11, d0[3]
++        vmov        s1, r12
++        vrshr.u16   q12, q12, #5
++        vrshr.u16   q13, q13, #5
++        vrshr.u16   q14, q14, #5
++        vrshr.u16   q15, q15, #5
++        vst1.16     {q12-q13}, [r0], r3
++        vst1.16     {q14-q15}, [r10], r3
++        bhi         2b
++        bne         1b
++
++        vpop        {d8}
++        vmov        d9, d1
++        pop         {r4-r10, pc}
++
++endfunc
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcpred_intra_dc_neon.S
+@@ -0,0 +1,705 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++
++@ ff_hevc_rpi_pred_dc_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_4_neon_8, export=1
++
++        @ Average the els of top & left
++        ldr         r2, [r2]
++        vld1.32     {d0[0]}, [r1]
++        mov         r1, #2
++        vmov        s1, r2
++        vmov        s2, r2
++        vmov.i16    q2, #3
++        add         r2, r0, r3
++        vaddl.u8    q1, d0, d1    @ d2[0] = top[0] + left[0]
++        lsl         r3, #1
++        vmovl.u8    q0, d0
++        vmov.i64    d7, #0xffff
++        vmov.16     d4[0], r1     @ 2, 3, 3, 3...
++        vpadd.i16   d6, d2, d2    @ 2 (top & bottom of vector the same)
++        vbit        d0, d2, d7    @ q0 = top[0]+left[0], top[1..3], left[0..3]
++
++        @ top line gets some smoothing
++        @ (top[i] + 3*dc + 2) >> 2
++        @ as does left
++        @ top_line[0] is extra special
++        @ (top[0] + left[0] + 2*dc + 2) >> 2
++
++        vmov.i64    d7, #0xff
++        vpadd.i16   d6, d6        @ 1 (all the same)
++        vrshr.u16   d6, #3
++        vmla.i16    q0, q2, d6[0]
++        vdup.8      d6, d6[0]
++        vrshrn.i16  d0, q0, #2
++
++        @ Store top line
++        vst1.32     {d0[0]}, [r0], r3
++
++        @ Store the rest
++        vshr.u64    d1, d0, #5*8
++        vshr.u64    d2, d0, #6*8
++        vshr.u64    d3, d0, #7*8
++        vbif        d1, d6, d7
++        vbif        d2, d6, d7
++        vst1.32     {d1[0]}, [r2], r3
++        vbif        d3, d6, d7
++        vst1.32     {d2[0]}, [r0]
++        vst1.32     {d3[0]}, [r2]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_c_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_c_4_neon_8, export=1
++
++        @ Average the els of top & left
++        vld1.8      {d0}, [r1]
++        vld1.8      {d1}, [r2]
++A       add         r2, r0, r3, lsl #1
++A       lsl         r3, #2
++T       lsl         r3, #1
++T       add         r2, r0, r3
++T       lsl         r3, #1
++        vaddl.u8    q0, d0, d1
++        vadd.i16    d0, d1       @ d0 has 2 val pairs
++        vpadd.i32   d2, d0, d0   @ This adds U & V separately
++        vpadd.i32   d3, d0, d0
++        vrshrn.u16  d0, q1, #3
++
++        @ Store
++        vst1.8      {d0}, [r0], r3
++        vst1.8      {d0}, [r2], r3
++        vst1.8      {d0}, [r0]
++        vst1.8      {d0}, [r2]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_8_neon_8, export=1
++
++        @ Average the els of top & left
++        vld1.8      {d0}, [r1]
++        mov         r1, #2
++        vld1.8      {d16}, [r2]
++        vmov.i16    q2, #3
++        vmov.i64    d7, #0xffff
++        vaddl.u8    q1, d0, d16   @ d2[0] = top[0] + left[0]
++        vmovl.u8    q0, d0
++        vadd.i16    d6, d2, d3    @ d6 has 4 vals
++        vmov.16     d4[0], r1     @ 2, 3, 3, 3...
++        vbit        d0, d2, d7    @ q0 = top[0]+left[0], top[1..7]
++
++        @ top line gets some smoothing
++        @ (top[i] + 3*dc + 2) >> 2
++        @ as does left
++        @ top_line[0] is extra special
++        @ (top[0] + left[0] + 2*dc + 2) >> 2
++
++        vmov.i64    d7, #0xff
++        vmovl.u8    q1, d16
++        vpadd.i16   d6, d6        @ 2 (top & bottom of vector the same)
++        vpadd.i16   d6, d6        @ 1 (all the same)
++        vrshr.u16   d6, #4
++        vmla.i16    q1, q2, d6[0]
++        vmla.i16    q0, q2, d6[0]
++        vdup.8      d6, d6[0]
++        vrshrn.i16  d2, q1, #2
++        vrshrn.i16  d0, q0, #2
++
++        @ Store top line
++        vst1.8      {d0}, [r0], r3
++
++        @ Store the rest
++        vshr.u64    d2, #8
++        vbit        d6, d2, d7
++        vshr.u64    d2, #8
++        vst1.8      {d6}, [r0], r3
++        mov         r1, #6
++1:
++        vbit        d6, d2, d7
++        vshr.u64    d2, #8
++        vst1.8      {d6}, [r0], r3
++        subs        r1, #2
++        vbit        d6, d2, d7
++        vshr.u64    d2, #8
++        vst1.8      {d6}, [r0], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_c_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_c_8_neon_8, export=1
++
++        @ Average the els of top & left
++        vld1.8      {q0}, [r1]
++        mov         r1, #8
++        vld1.8      {q1}, [r2]
++T       lsl         r3, #1
++        vaddl.u8    q0, d0, d1
++A       add         r2, r0, r3, lsl #1
++A       lsl         r3, #2
++T       add         r2, r0, r3
++T       lsl         r3, #1
++        vaddl.u8    q1, d2, d3
++        vadd.i16    q1, q0
++        vadd.i16    d3, d2        @ d3 has 2 val pairs
++        vpadd.i32   d2, d3, d3    @ This add U & V separately
++        vpadd.i32   d3, d3, d3
++        vrshrn.u16  d0, q1, #4
++        vrshrn.u16  d1, q1, #4
++
++        @ Store
++1:
++        vst1.8      {q0}, [r0], r3
++        subs        r1, #4
++        vst1.8      {q0}, [r2], r3
++        vst1.8      {q0}, [r0], r3
++        vst1.8      {q0}, [r2], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_16_neon_8, export=1
++
++        @ Average the els of top & left
++        vld1.8      {q8}, [r1]
++        mov         r1, #2
++        vld1.8      {q9}, [r2]
++        vaddl.u8    q10, d16, d17
++        vaddl.u8    q11, d16, d18
++        vaddl.u8    q0, d18, d19
++        vmov.i16    q1, #3
++        vadd.i16    q10, q0
++        vmovl.u8    q0, d18
++        vadd.i16    d20, d21
++        vmov.i16    d2[0], r1     @ 2, 3, 3, 3...
++
++        @ top line gets some smoothing
++        @ (top[i] + 3*dc + 2) >> 2
++        @ as does left
++        @ top_line[0] is extra special
++        @ (top[0] + left[0] + 2*dc + 2) >> 2
++
++        vmovl.u8    q2, d16
++        vmovl.u8    q9, d19
++        vpadd.i16   d20, d20      @ 2 (top & bottom of vector the same)
++        vmov.i64    d7, #0xffff
++        vmovl.u8    q8, d17
++        vbit        d4, d22, d7   @ q2 = top[0]+left[0], top[1..7]
++        vmov.i64    d7, #0xff
++        vpadd.i16   d20, d20      @ 1 (all the same)
++        vrshr.u16   d21, d20, #5
++        vrshr.u16   d20, d20, #5
++        vmla.i16    q0, q10, d2[1]
++        vmla.i16    q9, q10, d2[1]
++        vmla.i16    q2, q10, q1
++        vmla.i16    q8, q10, d2[1]
++        vdup.8      q1, d20[0]
++        vrshrn.i16  d0, q0, #2
++        vrshrn.i16  d1, q9, #2
++        vrshrn.i16  d4, q2, #2
++        vrshrn.i16  d5, q8, #2
++        vext.8      q0, q0, q0, #1
++
++        @ Store top line
++        vst1.8      {q2}, [r0], r3
++
++        @ Store the rest
++        mov         r1, #15
++1:
++        vbit        d2, d0, d7
++        vext.8      q0, q0, q0, #1
++        subs        r1, #1
++        vst1.8      {q1}, [r0], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_c_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_c_16_neon_8, export=1
++
++        @ Average the els of top & left
++        vld1.8      {q0-q1}, [r1]
++        mov         r1, #16
++        vld1.8      {q2-q3}, [r2]
++T       lsl         r3, #1
++        vaddl.u8    q0, d0, d1
++A       add         r2, r0, r3, lsl #1
++T       add         r2, r0, r3
++        vaddl.u8    q1, d2, d3
++A       lsl         r3, #2
++T       lsl         r3, #1
++        vaddl.u8    q2, d4, d5
++        vaddl.u8    q3, d6, d7
++        vadd.i16    q0, q1
++        vadd.i16    q2, q3
++        vadd.i16    q0, q2
++        vadd.i16    d0, d1        @ d0 has 2 val pairs
++        vpadd.i32   d4, d0, d0    @ This adds U & V separately
++        vpadd.i32   d5, d0, d0
++        vrshrn.u16  d0, q2, #5
++        vrshrn.u16  d1, q2, #5
++        vrshrn.u16  d2, q2, #5
++        vrshrn.u16  d3, q2, #5
++
++        @ Store
++1:
++        vst1.8      {q0-q1}, [r0], r3
++        subs        r1, #2
++        vst1.8      {q0-q1}, [r2], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_32_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_32_neon_8, export=1
++
++        @ Average the els of top & left
++        vld1.8      {q0-q1}, [r1]
++        mov         r1, #32
++        vld1.8      {q2-q3}, [r2]
++        add         r2, r0, r3
++        vaddl.u8    q0, d0, d1
++        lsl         r3, #1
++        vaddl.u8    q1, d2, d3
++        vaddl.u8    q2, d4, d5
++        vaddl.u8    q3, d6, d7
++        vadd.i16    q0, q1
++        vadd.i16    q2, q3
++        vadd.i16    q0, q2
++        vadd.i16    d0, d1        @ d0 has 4 vals
++        vpadd.i16   d0, d0        @ 2 (top & bottom the same)
++        vpadd.i16   d4, d0, d0    @ 1 (all the same)
++        vpadd.i16   d5, d0, d0
++        vrshrn.u16  d0, q2, #6
++        vrshrn.u16  d1, q2, #6
++        vrshrn.u16  d2, q2, #6
++        vrshrn.u16  d3, q2, #6
++
++        @ Store
++1:
++        vst1.8      {q0-q1}, [r0], r3
++        subs        r1, #2
++        vst1.8      {q0-q1}, [r2], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ -----------------------------------------------------------------------------
++@
++@ 10 Bit versions
++@
++@ There is no actual bit depth dependency in this code except that our
++@ intermediate results will overflow the 16 bits they are stored in
++@ All there functions are good to 10 bits - with the worst case being
++@ in dc_32 where we use all 16 bits.
++
++
++@ ff_hevc_rpi_pred_dc_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_4_neon_10, export=1
++
++        @ Average the els of top & left
++        vld1.16     {d0}, [r1]
++        mov         r1, #2
++        vld1.16     {d1}, [r2]
++T       lsl         r3, #1
++        vmov.i16    q2, #3
++A       add         r2, r0, r3, lsl #1
++T       add         r2, r0, r3
++        vadd.u16    d2, d0, d1    @ d2[0] = top[0] + left[0]
++A       lsl         r3, #2
++T       lsl         r3, #1
++        vmov.16     d4[0], r1     @ 2, 3, 3, 3...
++        vmov.i64    d7, #0xffff
++        vbit        d0, d2, d7    @ q0 = top[0]+left[0], top[1..3], left[0..3]
++
++        @ top line gets some smoothing
++        @ (top[i] + 3*dc + 2) >> 2
++        @ as does left
++        @ top_line[0] is extra special
++        @ (top[0] + left[0] + 2*dc + 2) >> 2
++
++        vpadd.i16   d6, d2, d2    @ 2 (top & bottom of vector the same)
++        vpadd.i16   d6, d6        @ 1 (all the same)
++        vrshr.u16   d6, #3
++        vmla.i16    q0, q2, d6[0]
++        vrshr.u16   q0, #2
++
++        @ Store top line
++        vst1.16     {d0}, [r0], r3
++
++        @ Store the rest
++        vshr.u64    d3, d1, #1*16
++        vshr.u64    d4, d1, #2*16
++        vshr.u64    d5, d1, #3*16
++        vbif        d3, d6, d7
++        vbif        d4, d6, d7
++        vst1.16     {d3}, [r2], r3
++        vbif        d5, d6, d7
++        vst1.16     {d4}, [r0]
++        vst1.16     {d5}, [r2]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_c_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]  (In pels - needs * 4)
++
++function ff_hevc_rpi_pred_dc_c_4_neon_10, export=1
++
++        @ Average the els of top & left
++        vld1.8      {q0}, [r1]
++        vld1.8      {q1}, [r2]
++A       add         r2, r0, r3, lsl #2
++A       lsl         r3, #3
++T       lsl         r3, #2
++T       add         r2, r0, r3
++T       lsl         r3, #1
++        vadd.i16    q0, q1
++        vadd.i16    d0, d1       @ d0 has 2 val pairs
++        vpadd.i32   d2, d0, d0   @ This adds U & V separately
++        vpadd.i32   d3, d0, d0
++        vrshr.u16   q0, q1, #3
++
++        vst1.16     {q0}, [r0], r3
++        vst1.16     {q0}, [r2], r3
++        vst1.16     {q0}, [r0]
++        vst1.16     {q0}, [r2]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_8_neon_10, export=1
++
++        @ Average the els of top & left
++        vld1.16     {q0}, [r1]
++        mov         r1, #2
++        vld1.16     {q8}, [r2]
++T       lsl         r3, #1
++        vmov.i16    q2, #3
++A       add         r2, r0, r3, lsl #1
++T       add         r2, r0, r3
++        vadd.i16    q1, q0, q8    @ q1[0] = top[0] + left[0]
++A       lsl         r3, #2
++T       lsl         r3, #1
++        vmov.i64    d7, #0xffff
++        vmov.16     d4[0], r1     @ 2, 3, 3, 3...
++        vadd.i16    d6, d2, d3    @ d6 has 4 vals
++        vbit        d0, d2, d7    @ q0 = top[0]+left[0], top[1..7]
++
++        @ top line gets some smoothing
++        @ (top[i] + 3*dc + 2) >> 2
++        @ as does left
++        @ top_line[0] is extra special
++        @ (top[0] + left[0] + 2*dc + 2) >> 2
++
++        vpadd.i16   d6, d6        @ 2 (top & bottom of vector the same)
++        vpadd.i16   d6, d6        @ 1 (all the same)
++        vrshr.u16   d6, #4
++        vmla.i16    q8, q2, d6[0]
++        vmla.i16    q0, q2, d6[0]
++        vdup.16     q2, d6[0]
++        vdup.16     q9, d6[0]
++        vrshr.u16   q8, q8, #2
++        vrshr.u16   q0, q0, #2
++        vext.16     q1, q8, q8, #1
++
++        @ Store top line
++        vst1.16     {q0}, [r0], r3
++
++        @ Store the rest
++        vbit        d18, d2, d7
++        vst1.16     {q9}, [r2], r3
++        mov         r1, #6
++1:
++        vext.16     q8, q8, q8, #2
++        subs        r1, #2
++        vext.16     q1, q1, q1, #2
++        vbit        d4, d16, d7
++        vst1.16     {q2}, [r0], r3
++        vbit        d18, d2, d7
++        vst1.16     {q9}, [r2], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_c_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]  (In pels - needs * 4)
++
++function ff_hevc_rpi_pred_dc_c_8_neon_10, export=1
++
++        @ Average the els of top & left
++        vld1.16     {q0-q1}, [r1]
++        mov         r1, #8
++        vld1.16     {q2-q3}, [r2]
++T       lsl         r3, #2
++        vadd.i16    q1, q0
++A       add         r2, r0, r3, lsl #2
++A       lsl         r3, #3
++T       add         r2, r0, r3
++T       lsl         r3, #1
++        vadd.i16    q2, q3
++        vadd.i16    q1, q2
++        vadd.i16    d3, d2        @ d3 has 2 val pairs
++        vpadd.i32   d2, d3, d3    @ This add U & V separately
++        vpadd.i32   d3, d3, d3
++        vrshr.u16   q0, q1, #4
++        vrshr.u16   q1, q1, #4
++
++        @ Store
++1:
++        vst1.8      {q0-q1}, [r0], r3
++        subs        r1, #2
++        vst1.8      {q0-q1}, [r2], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_dc_16_neon_10, export=1
++
++        @ Average the els of top & left
++        vld1.16     {q8-q9}, [r1]
++        mov         r1, #2
++        vld1.16     {q10-q11}, [r2]
++        lsl         r3, #1        @ stride given in pels
++        vadd.i16    q0, q8, q9
++        vadd.i16    q1, q10, q11
++        vmov.i16    q3, #3
++        vadd.i16    q1, q0
++        vadd.i16    d0, d16, d20
++        vmov.i64    d31, #0xffff
++        vadd.i16    d3, d2
++        vmov.16     d6[0], r1     @ 2, 3, 3, 3...
++
++        @ top line gets some smoothing
++        @ (top[i] + 3*dc + 2) >> 2
++        @ as does left
++        @ topline[0] is extra special
++        @ (top[0] + left[0] + 2*dc + 2) >> 2
++
++        vbit        d16, d0, d31  @ q8 = top[0]+left[0], top[1..7]
++        vpadd.i16   d3, d3        @ 2 (top & bottom of vector the same)
++        vpadd.i16   d3, d3        @ 1 (all the same)
++        vrshr.u16   d2, d3, #5
++        vrshr.u16   d3, d3, #5
++        vmov        q0, q1
++        vmla.i16    q10, q1, d6[1]
++        vmla.i16    q11, q1, d6[1]
++        vmla.i16    q8, q1, q3
++        vmla.i16    q9, q1, d6[1]
++        vrshr.u16   q2, q10, #2
++        vrshr.u16   q3, q11, #2
++        vrshr.u16   q8, #2
++        vrshr.u16   q9, #2
++        vext.16     q2, q2, q2, #1
++        mov         r1, #7<<29
++
++        @ Store top line
++        vst1.16     {q8-q9}, [r0], r3
++
++        @ Store the rest
++1:
++        vbit        d0, d4, d31
++        vext.16     q2, q2, q2, #1
++        subs        r1, #1<<29
++        vst1.16     {q0-q1}, [r0], r3
++        bne         1b
++1:
++        vbit        d0, d6, d31
++        vext.16     q3, q3, q3, #1
++        subs        r1, #1<<29
++        vst1.16     {q0-q1}, [r0], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_c_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]  (In pels - needs * 4)
++
++function ff_hevc_rpi_pred_dc_c_16_neon_10, export=1
++
++        @ Average the els of top & left
++        vldm        r1, {q0-q3}
++        vldm        r2, {q8-q11}
++        vadd.i16    q0, q1
++        mov         r1, #16
++        vadd.i16    q2, q3
++        add         r2, r0, #32
++        vadd.i16    q8, q9
++        lsl         r3, #2
++        vadd.i16    q10, q11
++        vadd.u16    q0, q2
++        vadd.u16    q8, q10
++        vadd.i16    q0, q8
++        vadd.i16    d0, d1        @ d0 has 2 val pairs
++        vpadd.i32   d4, d0, d0    @ This adds U & V separately
++        vpadd.i32   d5, d0, d0
++        vrshr.u16   q0, q2, #5
++        vrshr.u16   q1, q2, #5
++
++        @ Store
++1:
++        vst1.16     {q0-q1}, [r0], r3
++        subs        r1, #1
++        vst1.16     {q0-q1}, [r2], r3
++        bne         1b
++
++        bx           lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_dc_32_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]  (In pels)
++
++function ff_hevc_rpi_pred_dc_32_neon_10, export=1
++
++        @ Average the els of top & left
++        @ With 10 bits we are (just) safe from overflow in i16
++        vldm        r1, {q0-q3}
++        vldm        r2, {q8-q11}
++        vadd.i16    q0, q1
++        mov         r1, #32
++        vadd.i16    q2, q3
++        add         r2, r0, #32
++        vadd.i16    q8, q9
++        lsl         r3, #1
++        vadd.i16    q10, q11
++        vadd.u16    q0, q2
++        vadd.u16    q8, q10
++        vadd.i16    q0, q8
++        vadd.i16    d0, d1        @ d0 has 4 vals
++        vpadd.i16   d0, d0        @ 2 (top & bottom the same)
++        vpadd.i16   d4, d0, d0    @ 1 (all the same)
++        vpadd.i16   d5, d0, d0
++        vrshr.u16   q0, q2, #6
++        vrshr.u16   q1, q2, #6
++
++        @ Store
++1:
++        vst1.16     {q0-q1}, [r0], r3
++        subs        r1, #1
++        vst1.16     {q0-q1}, [r2], r3
++        bne         1b
++
++        bx           lr
++endfunc
++
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcpred_intra_filter_neon.S
+@@ -0,0 +1,881 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++@ All functions have the call
++@
++@ int ff_hevc_rpi_intra_filter_N_neon_PW(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++@
++@ Assumptions:
++@ (that wouldn't apply to all frame layoouts but do apply to sand, so beware
++@  if reuseing this code)
++@
++@ Min ctb size is 8 so we don't need to worry about tr_size or dl_size for
++@ N==4, but do for chroma N>=8.  As we share Y/C fns that means we can ignore
++@ N==8,PW=8 (chroma always PW>8) but have to cope for larger
++@
++@ We always have at least 64 pixel H frame width rounding - this lets us
++@ load UR widthout having to worry about exactly how many pixels are actually
++@ within the frame.  As partial loads will only occur very occasionally this
++@ should be a win in nearly all cases.
++@
++@ 16 bit fns can be used as 8 bit chroma fns as chroma never filters
++@ so we do no maths on the contents
++@
++@ No filtering in 32bit fns as they are chroma only
++
++
++.equ    AVAIL_UR, 1
++.equ    AVAIL_U,  2
++.equ    AVAIL_UL, 4
++.equ    AVAIL_L,  8
++.equ    AVAIL_DL, 16
++
++.equ    FILTER_LIGHT, 0x40
++.equ    FILTER_STRONG, 0x80
++
++.equ    AVAIL_S_UR_N_U_C, 32 - 1
++.equ    AVAIL_S_U_N_UL_C, 32 - 2
++.equ    AVAIL_S_UL_N_L_C, 32 - 3
++.equ    AVAIL_S_L_N_DL_C, 32 - 4
++
++.equ    AVAIL_S_U_DL_CPSR, 31 - 4  @ Shift for u..dl to go into flags via cpsr
++
++@ On entry
++@  r2   req
++@  r3   avail
++@ [sp, #sp_offset...]  args
++@
++@ On Exit:
++@
++@ Extend values:
++@  d_l  scalar contains value for L & DL
++@       if DL avail then this is is DL[0] so we don't need to load that
++@  d_ul scalar containing value for UL
++@  d_u  scalar containing value for U
++@  d_ur scalar containing value for UR
++@ If DL avail then d_l == b_dl elif L avail then d_l == a_l else...
++@ This means that L-light-filter works even if nreq DL (we never filter
++@ req-DL without req-L, but we do filter req-L without req-DL)
++@ If UR avail then d_ur == a_ur so U-filter good too
++@
++@ Data load pointers (only load if req & avail):
++@  r4   DL + stride
++@  r10  L
++@  r6   U
++@  r5   UR
++@
++@ Others:
++@  r2   req
++@  r7   req & avail
++@  r3   L + stride
++@  r8   DL + stride * 2
++@  r9   stride * 2
++@  cs   Load U
++@  mi   Load UR
++@
++@ Clobbered:
++@  r12
++
++.macro  load_pointers pw_s, log2_s, sp_offset, d_type, d_l, d_ul, d_u, d_ur
++
++.equ    src_l\@,   \sp_offset + 0
++.equ    src_u\@,   \sp_offset + 4
++.equ    src_ur\@,  \sp_offset + 8
++.equ    stride\@,  \sp_offset + 12
++.equ    pw\@,      (1 << \pw_s)                 @ pel width in bytes
++.equ    b_size\@,  (1 << (\pw_s + \log2_s))     @ size in bytes
++
++@ r9    stride
++@                       r7 = ab_ul, r6 = a_u, r5 = a_ur
++@ r4 = b_dl, r10 = b_l,             r8 = b_u
++
++        ldr        r5,  [sp, #src_ur\@]
++        lsl        r12, r3,  #AVAIL_S_U_DL_CPSR
++        ldr        r10, [sp, #src_l\@]
++        ldr        r9,  [sp, #stride\@]
++        ldr        r6,  [sp, #src_u\@]
++
++        @ This is quite a slow instruction but it replaces
++        @ a decent number of tests that yield a max of 2 flags/op
++        @ It is annoying we can't branch on Q!
++        @ If L navail (ne) then DL must be navail (pl)
++        msr        APSR_nzcvq, r12      @ n=dl, z=l, c=ul, v=u, q=ur
++
++        mov        r4,  r5
++        sub        r7,  r10, r9
++        it vs
++        movvs      r4,  r6
++        add        r8,  r6,  #b_size\@ - pw\@
++        it cs
++        movcs      r4,  r7
++        ite ne
++        movne      r10, r4
++        addeq      r4,  r7,  r9,  lsl #\log2_s
++        it cc
++        movcc      r7,  r10
++        it mi
++        addmi      r4,  r10, r9,  lsl #\log2_s
++        vld1.\d_type {\d_ul}, [r7]
++        itt vc
++        movvc      r8,  r7
++        movvc      r6,  r7
++        vld1.\d_type {\d_l }, [r4], r9
++        tst        r3,  #AVAIL_UR
++        vld1.\d_type {\d_u }, [r6]
++        it eq
++        moveq      r5,  r8
++        and        r7,  r2,  r3
++        add        r8,  r4,  r9
++        vld1.\d_type {\d_ur}, [r5]
++        lsls       r12, r7,  #AVAIL_S_UR_N_U_C
++        add        r3,  r10, r9
++        lsl        r9,  #1
++.endm
++
++
++
++@ int ff_hevc_rpi_intra_filter_4_neon_8(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++
++.set    sp_base, 8*4
++.set    pw_s,    0
++.set    pw,      (1 << pw_s)
++.set    log2_s,  2
++
++function ff_hevc_rpi_intra_filter_4_neon_8, export=1
++        push       {r4-r10, lr}
++        load_pointers pw_s, log2_s, sp_base, 8, d0[], d31[7], d1[], d2[]
++
++        it cs
++        vldrcs     s2,  [r6]
++        ite pl
++        vmovpl     s3,  s4
++        vldrmi     s3,  [r5]
++
++        lsls       r7,  #AVAIL_S_L_N_DL_C
++        add        r12, r0,  #-pw
++        bpl        1f
++
++        vld1.8    {d0[0]}, [r10], r9
++        vld1.8    {d0[1]}, [r3],  r9
++        vld1.8    {d0[2]}, [r10]
++        vld1.8    {d0[3]}, [r3]
++1:
++        bcc        1f
++        vld1.8    {d0[5]}, [r4],  r9
++        vld1.8    {d0[6]}, [r8]
++        vld1.8    {d0[7]}, [r4]
++1:
++        vstr       d1,  [r1]            @ Up
++        vst1.8    {d31[7]}, [r12]
++        vstr       d0,  [r0]            @ Left
++        pop       {r4-r10, pc}
++endfunc
++
++
++@ int ff_hevc_rpi_intra_filter_4_neon_16(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++
++.set    sp_base, 8*4
++.set    pw_s,    1
++.set    pw,      (1 << pw_s)
++.set    log2_s,  2
++
++function ff_hevc_rpi_intra_filter_4_neon_16, export=1
++        push       {r4-r10, lr}
++        load_pointers pw_s, log2_s, sp_base, 16, "d0[],d1[]", d31[3], d2[], d3[]
++
++        it cs
++        vldrcs     d2,  [r6]
++        it mi
++        vldrmi     d3,  [r5]
++        lsls       r7,  #AVAIL_S_L_N_DL_C
++        add        r12, r0, #-pw
++        bpl        1f
++        vld1.16   {d0[0]}, [r10], r9
++        vld1.16   {d0[1]}, [r3],  r9
++        vld1.16   {d0[2]}, [r10]
++        vld1.16   {d0[3]}, [r3]
++1:
++        bcc        1f
++        vld1.16   {d1[1]}, [r4],  r9
++        vld1.16   {d1[2]}, [r8]
++        vld1.16   {d1[3]}, [r4]
++1:
++        vst1.16   {q1}, [r1]           @ Up
++        vst1.16   {d31[3]}, [r12]
++        vst1.16   {q0}, [r0]           @ Left
++        pop       {r4-r10, pc}
++endfunc
++
++
++@ int ff_hevc_rpi_intra_filter_8_neon_8(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++
++.set    sp_base, 8*4
++.set    pw_s,    0
++.set    pw,      (1 << pw_s)
++.set    log2_s,  3
++
++function ff_hevc_rpi_intra_filter_8_neon_8, export=1
++        push      {r4-r10, lr}
++        load_pointers pw_s, log2_s, sp_base, 8, "d0[],d1[]", d31[7], d4[], d5[]
++
++        it cs
++        vldrcs     d4,  [r6]
++        it mi
++        vldrmi     d5,  [r5]
++
++        lsls       r7,  #AVAIL_S_L_N_DL_C
++        bpl        1f
++        vld1.8    {d0[0]}, [r10], r9
++        vld1.8    {d0[1]}, [r3],  r9
++        vld1.8    {d0[2]}, [r10], r9
++        vld1.8    {d0[3]}, [r3],  r9
++        vld1.8    {d0[4]}, [r10], r9
++        vld1.8    {d0[5]}, [r3],  r9
++        vld1.8    {d0[6]}, [r10]
++        vld1.8    {d0[7]}, [r3]
++1:
++        bcc        1f
++        vld1.8    {d1[1]}, [r4],  r9
++        vld1.8    {d1[2]}, [r8],  r9
++        vld1.8    {d1[3]}, [r4],  r9
++        vld1.8    {d1[4]}, [r8],  r9
++        vld1.8    {d1[5]}, [r4],  r9
++        vld1.8    {d1[6]}, [r8]
++        vld1.8    {d1[7]}, [r4]
++1:
++        tst        r2,  #FILTER_LIGHT
++        add        r12, r0,  #-pw
++        beq        10f
++
++        @ Luma light filter
++        vext.8     q8,  q15, q2,  #15
++        vext.8     q12, q15, q0,  #15
++        vaddl.u8   q9,  d17, d5
++        vaddl.u8   q8,  d16, d4
++        vaddl.u8   q13, d25, d1
++        vaddl.u8   q12, d24, d0
++        vmov.u8    r3,  d5[7]           @ Save final pel
++        vmov.u8    r2,  d1[7]           @ Save final pel
++
++        vext.16    q2,  q8,  q9,  #1
++        vext.16    q3,  q9,  q9,  #1
++        vext.16    q0,  q12, q13, #1
++        vext.16    q1,  q13, q13, #1
++        vadd.u16   d30, d16, d24        @ d30[0] = l[0] + 2ul + u[0]
++        vadd.u16   q2,  q8
++        vadd.u16   q3,  q9
++        vadd.u16   q0,  q12
++        vadd.u16   q1,  q13
++
++        vrshrn.u16 d4,  q2,  #2
++        vrshrn.u16 d5,  q3,  #2
++        vrshrn.u16 d0,  q0,  #2
++        vrshrn.u16 d1,  q1,  #2
++        vrshr.u16  d30, #2
++        vmov.u8    d5[7], r3            @ Restore final pel
++        vmov.u8    d1[7], r2            @ Restore final pel
++        vdup.u8    d31, d30[0]          @ d31[3] = d30[0]
++
++10:
++        vst1.8    {q2 }, [r1]           @ Up
++        vst1.8    {d31[7]}, [r12]       @ Up-left
++        vst1.8    {q0 }, [r0]           @ Left
++        pop       {r4-r10, pc}
++endfunc
++
++
++@ int ff_hevc_rpi_intra_filter_8_neon_16(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++
++.set    sp_base, 8*4
++.set    ur_size, sp_base + 16
++.set    dl_size, sp_base + 20
++.set    pw_s,    1
++.set    pw,      (1 << pw_s)
++.set    log2_s,  3
++.set    p_size,  (1 << log2_s)          @ size in pels
++
++function ff_hevc_rpi_intra_filter_8_neon_16, export=1
++        push      {r4-r10, lr}
++        load_pointers pw_s, log2_s, sp_base, 16, "d0[],d1[]", d31[3], "d4[],d5[]", "d6[],d7[]"
++
++        it cs
++        vldmcs     r6,  {d4, d5}
++        ldr        r12, [sp, #ur_size]
++        bpl        1f
++        cmp        r12, #4
++        vldm       r5,  {d6, d7}
++        bgt        1f
++        vdup.16    d7,  d6[3]
++1:
++        lsls       r12, r7,  #AVAIL_S_L_N_DL_C
++        vdup.16    q1,  d0[0]
++        bpl        1f
++        vld1.16   {d0[0]}, [r10], r9
++        vld1.16   {d0[1]}, [r3],  r9
++        vld1.16   {d0[2]}, [r10], r9
++        vld1.16   {d0[3]}, [r3],  r9
++        vld1.16   {d1[0]}, [r10], r9
++        vld1.16   {d1[1]}, [r3],  r9
++        vld1.16   {d1[2]}, [r10]
++        vld1.16   {d1[3]}, [r3]
++1:
++        bcc        1f
++        ldr        r12, [sp, #dl_size]
++        vld1.16   {d2[1]}, [r4],  r9
++        cmp        r12, #p_size
++        vld1.16   {d2[2]}, [r8],  r9
++        vld1.16   {d2[3]}, [r4],  r9
++        blt        2f
++        vld1.16   {d3[0]}, [r8],  r9
++        vld1.16   {d3[1]}, [r4],  r9
++        vld1.16   {d3[2]}, [r8]
++        vld1.16   {d3[3]}, [r4]
++        b          1f
++2:
++        vdup.16    d3,  d2[3]
++1:
++        tst        r2,  #FILTER_LIGHT
++        add        r12, r0,  #-pw
++        beq        10f
++
++        @ Luma light filter
++        vext.16    q9,  q2,  q3,  #7
++        vext.16    q8,  q15, q2,  #7
++        vext.16    q13, q0,  q1,  #7
++        vext.16    q12, q15, q0,  #7
++        vadd.u16   q9,  q3
++        vadd.u16   q8,  q2
++        vadd.u16   q13, q1
++        vadd.u16   q12, q0
++        vmov.u16   r3,  d7[3]           @ Save final pel
++        vmov.u16   r2,  d3[3]           @ Save final pel
++
++        vext.16    q2,  q8,  q9,  #1
++        vext.16    q3,  q9,  q9,  #1
++        vext.16    q0,  q12, q13, #1
++        vext.16    q1,  q13, q13, #1
++        vadd.u16   d30, d16, d24        @ d30[0] = l[0] + 2ul + u[0]
++        vadd.u16   q2,  q8
++        vadd.u16   q3,  q9
++        vadd.u16   q0,  q12
++        vadd.u16   q1,  q13
++
++        vrshr.u16  q2,  #2
++        vrshr.u16  q3,  #2
++        vrshr.u16  q0,  #2
++        vrshr.u16  q1,  #2
++        vrshr.u16  d30, #2
++        vmov.u16   d7[3], r3            @ Restore final pel
++        vmov.u16   d3[3], r2            @ Restore final pel
++        vdup.u16   d31, d30[0]          @ d31[3] = d30[0]
++
++10:
++        vst1.16   {q2,  q3}, [r1]       @ Up
++        vst1.16   {d31[3]}, [r12]       @ Up-left
++        vst1.16   {q0,  q1}, [r0]       @ Left
++        pop       {r4-r10, pc}
++endfunc
++
++@ int ff_hevc_rpi_intra_filter_16_neon_16(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++
++.set    sp_base, 8*4
++.set    ur_size, sp_base + 16
++.set    dl_size, sp_base + 20
++.set    pw_s,    1
++.set    pw,      (1 << pw_s)
++.set    log2_s,  4
++.set    p_size,  (1 << log2_s)          @ size in pels
++
++function ff_hevc_rpi_intra_filter_16_neon_16, export=1
++        push      {r4-r10, lr}
++        load_pointers pw_s, log2_s, sp_base, 16, "d0[],d1[]", d31[3], "d16[],d17[]", "d20[],d21[]"
++
++        vdup.16    q9,  d16[0]
++        vdup.16    q11, d20[0]
++
++        it cs
++        vldmcs     r6,  {d16-d19}
++        ldr        r12, [sp, #ur_size]
++        bpl        1f
++        cmp        r12, #12
++        @ Given chroma frame layout, if UR exists then it is always legit to
++        @ load all of it even if most of it is outside the frame.
++        vldm       r5,  {d20-d23}
++        bgt        1f
++        bge        4f
++        cmp        r12,  #8
++        bge        3f
++        vdup.16    d21, d20[3]
++3:      vdup.16    d22, d21[3]
++4:      vdup.16    d23, d22[3]
++
++1:
++        lsls       r7,  #AVAIL_S_L_N_DL_C
++        ldr        r12, [sp, #dl_size]
++        vdup.16    q1,  d0[0]
++        vdup.16    q2,  d0[0]
++        vdup.16    q3,  d0[0]
++        bpl        1f
++        vld1.16   {d0[0]}, [r10], r9
++        vld1.16   {d0[1]}, [r3],  r9
++        vld1.16   {d0[2]}, [r10], r9
++        vld1.16   {d0[3]}, [r3],  r9
++        vld1.16   {d1[0]}, [r10], r9
++        vld1.16   {d1[1]}, [r3],  r9
++        vld1.16   {d1[2]}, [r10], r9
++        vld1.16   {d1[3]}, [r3],  r9
++        vld1.16   {d2[0]}, [r10], r9
++        vld1.16   {d2[1]}, [r3],  r9
++        vld1.16   {d2[2]}, [r10], r9
++        vld1.16   {d2[3]}, [r3],  r9
++        vld1.16   {d3[0]}, [r10], r9
++        vld1.16   {d3[1]}, [r3],  r9
++        vld1.16   {d3[2]}, [r10]
++        vld1.16   {d3[3]}, [r3]
++1:
++        bcc        1f
++        vld1.16   {d4[1]}, [r4],  r9
++        cmp        r12, #4
++        vld1.16   {d4[2]}, [r8],  r9
++        vld1.16   {d4[3]}, [r4],  r9
++        ble        2f
++        vld1.16   {d5[0]}, [r8],  r9
++        vld1.16   {d5[1]}, [r4],  r9
++        cmp        r12, #12
++        vld1.16   {d5[2]}, [r8],  r9
++        vld1.16   {d5[3]}, [r4],  r9
++        blt        3f
++        vld1.16   {d6[0]}, [r8],  r9
++        vld1.16   {d6[1]}, [r4],  r9
++        vld1.16   {d6[2]}, [r8],  r9
++        vld1.16   {d6[3]}, [r4],  r9
++        ble        4f
++        vld1.16   {d7[0]}, [r8],  r9
++        vld1.16   {d7[1]}, [r4],  r9
++        vld1.16   {d7[2]}, [r8]
++        vld1.16   {d7[3]}, [r4]
++        b          1f
++2:      vdup.16    d5,  d4[3]
++3:      vdup.16    d6,  d5[3]
++4:      vdup.16    d7,  d6[3]
++1:
++        tst        r2,  #FILTER_LIGHT
++        add        r12, r0,  #-pw
++        beq        10f
++
++        vpush     {q5}
++        @ Luma light filter
++        @ Left
++        vext.16    q5,  q2,  q3,  #7
++        vext.16    q14, q1,  q2,  #7
++        vext.16    q13, q0,  q1,  #7
++        vext.16    q12, q15, q0,  #7
++
++        vadd.u16   q5,  q3
++        vadd.u16   q14, q2
++        vadd.u16   q13, q1
++        vadd.u16   q12, q0
++        vmov.u16   r2,  d7[3]           @ Save final pel
++
++        vext.16    q0,  q12, q13, #1
++        vext.16    q1,  q13, q14, #1
++        vext.16    q2,  q14, q5,  #1
++        vext.16    q3,  q5,  q5,  #1
++
++        vmov       d30, d24             @ d30[0] = l[0] + ul
++        vadd.u16   q0,  q12
++        vadd.u16   q1,  q13
++        vadd.u16   q2,  q14
++        vadd.u16   q3,  q5
++
++        vrshr.u16  q0,  #2
++        vrshr.u16  q1,  #2
++        vrshr.u16  q2,  #2
++        vrshr.u16  q3,  #2
++
++        @ Up
++        vext.16    q5,  q10, q11, #7
++        vext.16    q14, q9,  q10, #7
++        vext.16    q13, q8,  q9,  #7
++        vext.16    q12, q15, q8,  #7
++
++        vadd.u16   q5,  q11
++        vadd.u16   q14, q10
++        vadd.u16   q13, q9
++        vadd.u16   q12, q8
++        vmov.u16   r3,  d23[3]          @ Save final pel
++
++        vext.16    q8,  q12, q13, #1
++        vext.16    q9,  q13, q14, #1
++        vext.16    q10, q14, q5,  #1
++        vext.16    q11, q5,  q5,  #1
++
++        vadd.u16   d30, d24             @ d30[0] = l[0] + 2ul + u[0]
++        vadd.u16   q8,  q12
++        vadd.u16   q9,  q13
++        vadd.u16   q10, q14
++        vadd.u16   q11, q5
++
++        vrshr.u16  q8,  #2
++        vrshr.u16  q9,  #2
++        vrshr.u16  q10, #2
++        vrshr.u16  q11, #2
++
++        @ Misc
++        vrshr.u16  d30, #2
++        vmov.u16   d7[3], r2            @ Restore final pel
++        vmov.u16   d23[3], r3           @ Restore final pel
++        vdup.u16   d31, d30[0]          @ d31[3] = d30[0]
++        vpop      {q5}
++
++10:
++        vstm       r1, {d16-d23}        @ Up
++        vst1.16   {d31[3]}, [r12]       @ Up-left
++        vstm       r0, { d0-d7 }        @ Left
++        pop       {r4-r10, pc}
++endfunc
++
++@ int ff_hevc_rpi_intra_filter_4_neon_32(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++
++.set    sp_base, 8*4
++.set    pw_s,    2
++.set    pw,      (1 << pw_s)
++.set    log2_s,  2
++
++function ff_hevc_rpi_intra_filter_4_neon_32, export=1
++        push       {r4-r10, lr}
++        load_pointers pw_s, log2_s, sp_base, 32, "d0[],d1[]", d31[1], "d4[],d5[]", "d6[],d7[]"
++
++        it cs
++        vldmcs     r6,  {d4, d5}
++        it mi
++        vldmmi     r5,  {d6, d7}
++        lsls       r7,  #AVAIL_S_L_N_DL_C
++        vdup.32    q1,  d0[0]
++        add        r12, r0,  #-pw
++        bpl        1f
++        vld1.32   {d0[0]}, [r10], r9
++        vld1.32   {d0[1]}, [r3],  r9
++        vld1.32   {d1[0]}, [r10]
++        vld1.32   {d1[1]}, [r3]
++1:
++        bcc        1f
++        vld1.32   {d2[1]}, [r4],  r9
++        vld1.32   {d3[0]}, [r8]
++        vld1.32   {d3[1]}, [r4]
++1:
++        vst1.32    {q2,  q3 }, [r1]     @ Up
++        vst1.32    {d31[1]}, [r12]
++        vst1.32    {q0,  q1 }, [r0]     @ Left
++        pop        {r4-r10, pc}
++endfunc
++
++
++@ int ff_hevc_rpi_intra_filter_8_neon_32(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++
++.set    sp_base, 8*4
++.set    ur_size, sp_base + 16
++.set    dl_size, sp_base + 20
++.set    pw_s,    2
++.set    pw,      (1 << pw_s)
++.set    log2_s,  3
++.set    p_size,  (1 << log2_s)          @ size in pels
++
++function ff_hevc_rpi_intra_filter_8_neon_32, export=1
++        push       {r4-r10, lr}
++        load_pointers pw_s, log2_s, sp_base, 32, "d0[],d1[]", d31[1], "d16[],d17[]", "d20[],d21[]"
++
++        vdup.32    q9,  d16[0]
++        vdup.32    q11, d20[0]
++
++        it cs
++        vldmcs     r6,  {q8,  q9 }
++        ldr        r12, [sp, #ur_size]
++        bpl        1f
++        cmp        r12, #p_size
++        vldm       r5,  {q10, q11}
++        bge        1f
++        vdup.32    q11, d21[1]
++1:
++        lsls       r7,  #AVAIL_S_L_N_DL_C
++        vdup.32    q1,  d0[0]
++        vdup.32    q2,  d0[0]
++        vdup.32    q3,  d0[0]
++        bpl        1f
++        vld1.32   {d0[0]}, [r10], r9
++        vld1.32   {d0[1]}, [r3],  r9
++        vld1.32   {d1[0]}, [r10], r9
++        vld1.32   {d1[1]}, [r3],  r9
++        vld1.32   {d2[0]}, [r10], r9
++        vld1.32   {d2[1]}, [r3],  r9
++        vld1.32   {d3[0]}, [r10]
++        vld1.32   {d3[1]}, [r3]
++1:
++        bcc        1f
++        ldr        r12, [sp, #dl_size]
++        vld1.32   {d4[1]}, [r4],  r9
++        cmp        r12, #p_size
++        vld1.32   {d5[0]}, [r8],  r9
++        vld1.32   {d5[1]}, [r4],  r9
++        blt        2f
++        vld1.32   {d6[0]}, [r8],  r9
++        vld1.32   {d6[1]}, [r4],  r9
++        vld1.32   {d7[0]}, [r8]
++        vld1.32   {d7[1]}, [r4]
++        b          1f
++2:
++        vdup.32    q3,  d5[1]
++1:
++        add        r12, r0,  #-pw
++        vstm       r1,  { q8-q11}       @ Up
++        vst1.32   {d31[1]}, [r12]
++        vstm       r0,  { q0-q3 }       @ Left
++        pop       {r4-r10, pc}
++endfunc
++
++
++@ int ff_hevc_rpi_intra_filter_16_neon_32(
++@    pixel * const left,                   [r0]
++@    pixel * const top,                    [r1]
++@    const unsigned int req,               [r2]
++@    const unsigned int avail,             [r3]
++@    const pixel * const src_l,            [sp, #0]
++@    const pixel * const src_u,            [sp, #4]
++@    const pixel * const src_ur,           [sp, #8]
++@    const unsigned int stride,            [sp, #12] (pels)
++@    const unsigned int top_right_size,    [sp, #16]
++@    const unsigned int down_left_size)    [sp, #20]
++
++.set    sp_base, 8*4
++.set    ur_size, sp_base + 16
++.set    dl_size, sp_base + 20
++.set    pw_s,    2
++.set    pw,      (1 << pw_s)
++.set    log2_s,  4
++.set    p_size,  (1 << log2_s)          @ size in pels
++
++function ff_hevc_rpi_intra_filter_16_neon_32, export=1
++        push       {r4-r10, lr}
++        load_pointers pw_s, log2_s, sp_base, 32, d30[0], d30[1], d31[0], d31[1]
++
++        @ Once we get this big we have run out of neon regs to store
++        @ everything at once so do in pieces
++
++        @ Up (have)
++        it cs
++        vldmcs     r6,  { q0-q3 }
++        ldr        r12, [sp, #ur_size]
++        it mi
++        vldmmi     r5,  { q8-q11}
++        it cs
++        vstmcs     r1,  { q0-q3 }
++        bpl        1f
++        cmp        r12, #12
++        add        lr,  r1,  #(pw << log2_s)
++        bgt        2f
++        cmp        r12, #8
++        bge        3f
++        vdup.16    q9,  d17[1]
++4:      vdup.16    d10, d19[1]
++3:      vdup.16    q11, d21[1]
++2:      vstm       lr, { q8-q11}
++1:
++
++        @ Left (have)
++        add        lr,  r0,  #-pw
++        lsls       r12, r7,  #AVAIL_S_L_N_DL_C
++        vst1.32   {d30[1]}, [lr]        @ UL
++        bpl        1f
++        vld1.32   { d0[0]}, [r10], r9
++        vld1.32   { d0[1]}, [r3],  r9
++        vld1.32   { d1[0]}, [r10], r9
++        vld1.32   { d1[1]}, [r3],  r9
++        vld1.32   { d2[0]}, [r10], r9
++        vld1.32   { d2[1]}, [r3],  r9
++        vld1.32   { d3[0]}, [r10], r9
++        vld1.32   { d3[1]}, [r3],  r9
++        vld1.32   { d4[0]}, [r10], r9
++        vld1.32   { d4[1]}, [r3],  r9
++        vld1.32   { d5[0]}, [r10], r9
++        vld1.32   { d5[1]}, [r3],  r9
++        vld1.32   { d6[0]}, [r10], r9
++        vld1.32   { d6[1]}, [r3],  r9
++        vld1.32   { d7[0]}, [r10]
++        vld1.32   { d7[1]}, [r3]
++        vstm       r0,  { q0-q3 }
++1:
++        bcc        1f
++        ldr        r12, [sp, #dl_size]
++        vdup.32    d16, d30[0]          @ d16[0] = d30[0]
++        add        lr,  r0,  #(pw << log2_s)
++        vld1.32   {d16[1]}, [r4],  r9
++        cmp        r12, #4
++        vld1.32   {d17[0]}, [r8],  r9
++        vld1.32   {d17[1]}, [r4],  r9
++        ble        2f
++        vld1.32   {d18[0]}, [r8],  r9
++        vld1.32   {d18[1]}, [r4],  r9
++        cmp        r12, #12
++        vld1.32   {d19[0]}, [r8],  r9
++        vld1.32   {d19[1]}, [r4],  r9
++        blt        3f
++        vld1.32   {d20[0]}, [r8],  r9
++        vld1.32   {d20[1]}, [r4],  r9
++        vld1.32   {d21[0]}, [r8],  r9
++        vld1.32   {d21[1]}, [r4],  r9
++        ble        4f
++        vld1.32   {d22[0]}, [r8],  r9
++        vld1.32   {d22[1]}, [r4],  r9
++        vld1.32   {d23[0]}, [r8]
++        vld1.32   {d23[1]}, [r4]
++        b          5f
++2:      vdup.32    q9,  d17[1]
++3:      vdup.32    q10, d19[1]
++4:      vdup.32    q11, d21[1]
++5:      vstm       lr,  { q8-q11}
++1:
++        eors       r7,  r2
++        beq        99f
++
++        lsls       r12, r7,  #AVAIL_S_UR_N_U_C
++        vdup.32    q0,  d31[0]
++        vdup.32    q1,  d31[0]
++        vdup.32    q2,  d31[0]
++        vdup.32    q3,  d31[0]
++        add        lr,  r1,  #(pw << log2_s)
++        vdup.32    q8,  d31[1]
++        vdup.32    q9,  d31[1]
++        vdup.32    q10, d31[1]
++        vdup.32    q11, d31[1]
++        it cs
++        vstmcs     r1,  { q0-q3 }
++        it mi
++        vstmmi     lr,  { q8-q11}
++
++        lsls       r7,  #AVAIL_S_L_N_DL_C
++        vdup.32    q0,  d30[0]
++        vdup.32    q1,  d30[0]
++        vdup.32    q2,  d30[0]
++        vdup.32    q3,  d30[0]
++        add        lr,  r0,  #(pw << log2_s)
++        it mi
++        vstmmi     r0, { q0-q3 }
++        it cs
++        vstmcs     lr, { q0-q3 }
++
++99:
++        pop       {r4-r10, pc}
++endfunc
++
++
++
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcpred_intra_hv_neon.S
+@@ -0,0 +1,920 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++/*
++ * Horizontal & Vertical special cases of angular intra pred
++ *
++ * Split out because:
++ *  Vertical, at least, is relatively common
++ *  Much simpler code than the general angular case
++ *  Luma with size < 32 has extra filtering that doesn't happen anywhere else
++ *
++ * *** Currently luma filtering is mandatory where it occurs, but there are
++ *     cases where it should be turned off (rdpcm & an extension sps flag).
++ *     These don't occur in the standard conformance suite for Main Profile
++ */
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++@ ff_hevc_rpi_pred_vertical_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_4_neon_8, export=1
++        ldrb        ip, [r2, #-1]       @ Top-left
++        vld1.32     {d0[0]}, [r2 :32]   @ Left
++        add         r2, r0, r3
++        vld1.8      {d1[]}, [r1]
++        lsl         r3, #1
++        vdup.8      d4, ip
++        vmov.i8     d2, #128
++        vhsub.u8    d4, d0, d4
++        veor        d1, d2
++        vld1.32     {d0[0]}, [r1 :32]   @ Top
++        vqadd.s8    d1, d4
++        vmov.i64    d3, #0xff
++        vmov        d4, d0
++        veor        d5, d1, d2
++        veor        d1, d1, d2
++        vbit        d0, d1, d3
++        vshr.u64    d5, #8
++        vst1.32     {d0[0]}, [r0], r3
++        vshr.u64    d1, #16
++        vbit        d4, d5, d3
++        vshr.u64    d5, #16
++        vst1.32     {d4[0]}, [r2], r3
++        vbit        d0, d1, d3
++        vst1.32     {d0[0]}, [r0]
++        vbit        d4, d5, d3
++        vst1.32     {d4[0]}, [r2]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_8_neon_8, export=1
++        ldrb        ip, [r2, #-1]       @ Top-left
++        vld1.8      {d0}, [r2 :64]      @ Left
++        vmov.i8     d1, #128
++        vld1.8      {d2[]}, [r1]
++        vld1.8      {d3}, [r1 :64]      @ Top
++        vdup.8      d4, ip
++        vhsub.u8    d4, d0, d4
++        veor        d2, d1
++        vmov.i64    d0, #0xff
++        mov         r1, #8
++        vqadd.s8    d2, d4, d2
++        veor        d1, d2, d1
++1:
++        vbit        d3, d1, d0
++        vshr.u64    d1, #8
++        vst1.8      {d3}, [r0 :64], r3
++        subs        r1, #2
++        vbit        d3, d1, d0
++        vshr.u64    d1, #8
++        vst1.8      {d3}, [r0 :64], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_16_neon_8, export=1
++        ldrb        ip, [r2, #-1]       @ Top-left
++        vld1.8      {q0}, [r2 :128]     @ Left
++        vdup.8      q1, ip
++        vld1.8      {d4[],d5[]}, [r1]
++        vhsub.u8    q0, q1
++        vmov.i8     q1, #128
++        veor        q2, q1
++        vmov.i64    d16, #0xff
++        vqadd.s8    q0, q2
++        vld1.8      {q3}, [r1 :128]     @ Top
++        mov         r1, #16
++        veor        q0, q1
++        vmov        q1, q3
++        vext.8      q2, q0, q0, #1
++1:
++        vbit        d2, d0, d16
++        vbit        d6, d4, d16
++        vext.8      q0, q0, q0, #2
++        subs        r1, #2
++        vst1.8      {q1}, [r0 :128], r3
++        vext.8      q2, q2, q2, #2
++        vst1.8      {q3}, [r0 :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vert_32_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_32_neon_8, export=1
++        vld1.8     {q0,  q1 }, [r1  :128]    @ Up
++        add         r2,  r0,  r3
++        lsl         r3,  #1
++        mov         r1,  #16
++1:
++        vst1.8     {q0,  q1 }, [r0  :128], r3
++        subs        r1,  #1
++        vst1.8     {q0,  q1 }, [r2  :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_c_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_c_4_neon_8, export=1
++        vld1.16    {d0 }, [r1  :64]    @ Up
++        add         r2,  r0,  r3,  lsl #1
++        lsl         r3,  #2
++
++        vst1.16    {d0 }, [r0  :64], r3
++        vst1.16    {d0 }, [r2  :64], r3
++        vst1.16    {d0 }, [r0  :64]
++        vst1.16    {d0 }, [r2  :64]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_c_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_c_8_neon_8, export=1
++        vld1.16    {q0 }, [r1  :128]    @ Up
++        add         r2,  r0,  r3,  lsl #1
++        lsl         r3,  #2
++        mov         r1,  #4
++1:
++        vst1.16    {q0 }, [r0  :128], r3
++        subs        r1,  #2
++        vst1.16    {q0 }, [r2  :128], r3
++        vst1.16    {q0 }, [r0  :128], r3
++        vst1.16    {q0 }, [r2  :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_c_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_c_16_neon_8, export=1
++        vld1.16    {q0,  q1 }, [r1  :128]    @ Up
++        add         r2,  r0,  r3,  lsl #1
++        lsl         r3,  #2
++        mov         r1,  #8
++1:
++        vst1.16    {q0,  q1 }, [r0  :128], r3
++        subs        r1,  #1
++        vst1.16    {q0,  q1 }, [r2  :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontalal_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++@ ? Might be faster as simple arm
++
++function ff_hevc_rpi_pred_horizontal_4_neon_8, export=1
++        ldrb        ip, [r2, #-1]       @ Top-left
++        vld1.32     {d0[0]}, [r1 :32]   @ Top
++        add         r1, r2, #3
++        vld1.8      {d1[]}, [r2]!
++        vdup.8      d2, ip
++        vmov.i8     d3, #128
++        vhsub.u8    d0, d2
++        veor        d1, d3
++        vld1.8      {d2[]}, [r2]!
++        add         ip, r0, r3
++        vqadd.s8    d0, d0, d1
++        lsl         r3, #1
++        vld1.8      {d1[]}, [r2]
++        vld1.8      {d4[]}, [r1]
++        veor        d0, d3
++        vst1.32     {d0[0]}, [r0 :32], r3
++        vst1.32     {d2[0]}, [ip :32], r3
++        vst1.32     {d1[0]}, [r0 :32]
++        vst1.32     {d4[0]}, [ip :32]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_8_neon_8, export=1
++        ldrb        ip, [r2, #-1]       @ Top-left
++        vld1.8      {d0}, [r1 :64]      @ Top
++        vmov.i8     d1, #128
++        vld1.8      {d2[]}, [r2]!
++        mov         r1, #8-2
++        vdup.8      d3, ip
++        vhsub.u8    d0, d3
++        veor        d2, d1
++        vqadd.s8    d0, d2
++          vld1.8      {d2[]}, [r2]!
++        veor        d0, d1
++        vst1.8      {d0}, [r0], r3
++1:
++            vld1.8      {d0[]}, [r2]!
++        subs        r1, #2
++          vst1.8      {d2}, [r0 :64], r3
++              vld1.8      {d2[]}, [r2]!
++            vst1.8      {d0}, [r0 :64], r3
++        bne         1b
++
++              vst1.8      {d2}, [r0 :64]
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_16_neon_8, export=1
++        ldrb        ip, [r2, #-1]       @ Top-left
++        vld1.8      {q0}, [r1 :64]      @ Top
++        mov         r1, #16-2
++        vld1.8      {d4[],d5[]}, [r2]!
++        vdup.8      q3, ip
++        vhsub.u8    q0, q3
++        vmov.i8     q1, #128
++        veor        q2, q1
++        vqadd.s8    q0, q2
++          vld1.8      {d4[],d5[]}, [r2]!
++        veor        q0, q1
++        vst1.8      {q0}, [r0], r3
++1:
++            vld1.8      {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.8      {q2}, [r0 :64], r3
++              vld1.8      {d4[],d5[]}, [r2]!
++            vst1.8      {q0}, [r0 :64], r3
++        bne         1b
++
++              vst1.8      {q2}, [r0 :64]
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_32_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_32_neon_8, export=1
++        vld1.8      {d0[],d1[]}, [r2]!
++        add         ip, r0, #16
++        mov         r1, #32-2
++          vld1.8      {d2[],d3[]}, [r2]!
++        vst1.8      {q0}, [r0 :128], r3
++        vst1.8      {q0}, [ip :128], r3
++1:
++            vld1.8      {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.8      {q1}, [r0 :128], r3
++          vst1.8      {q1}, [ip :128], r3
++              vld1.8      {d2[],d3[]}, [r2]!
++            vst1.8      {q0}, [r0 :128], r3
++            vst1.8      {q0}, [ip :128], r3
++        bne         1b
++
++              vst1.8      {q1}, [r0 :128]
++              vst1.8      {q1}, [ip :128]
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_c_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_c_4_neon_8, export=1
++        add         r1, r2, #2
++        vld1.16     {d0[]}, [r2]
++        add         r2, #4
++        vld1.16     {d1[]}, [r1]
++        add         r1, #4
++        vld1.16     {d2[]}, [r2]
++A       add         r2, r0, r3, lsl #1
++T       lsl         r3, #1
++T       add         r2, r0, r3
++        vld1.16     {d3[]}, [r1]
++A       lsl         r3, #2
++T       lsl         r3, #1
++        vst1.16     {d0}, [r0 :64], r3
++        vst1.16     {d1}, [r2 :64], r3
++        vst1.16     {d2}, [r0 :64]
++        vst1.16     {d3}, [r2 :64]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_c_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_c_8_neon_8, export=1
++        vld1.16     {d0[],d1[]}, [r2]!
++        lsl         r3, #1
++          vld1.16     {d2[],d3[]}, [r2]!
++        mov         r1, #8-2
++        vst1.16     {q0}, [r0 :64], r3
++1:
++            vld1.16     {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.16     {q1}, [r0 :64], r3
++              vld1.16     {d2[],d3[]}, [r2]!
++            vst1.16     {q0}, [r0 :64], r3
++        bne         1b
++
++              vst1.16     {q1}, [r0 :64]
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_c_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_c_16_neon_8, export=1
++        vld1.16     {d0[],d1[]}, [r2]!
++        lsl         r3, #1
++        add         ip, r0, #16
++        mov         r1, #16-2
++          vld1.16     {d2[],d3[]}, [r2]!
++        vst1.16     {q0}, [r0 :128], r3
++        vst1.16     {q0}, [ip :128], r3
++1:
++            vld1.16     {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.16     {q1}, [r0 :128], r3
++          vst1.16     {q1}, [ip :128], r3
++              vld1.16     {d2[],d3[]}, [r2]!
++            vst1.16     {q0}, [r0 :128], r3
++            vst1.16     {q0}, [ip :128], r3
++        bne         1b
++
++              vst1.16     {q1}, [r0 :128]
++              vst1.16     {q1}, [ip :128]
++        bx          lr
++endfunc
++
++
++@------------------------------------------------------------------------------
++@
++@ 10 Bit
++@ Has clipping constants so 10-bit only but could easily be macroed up to
++@ 14-bit before we run out of bits
++
++
++@ ff_hevc_rpi_pred_vertical_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_4_neon_10, export=1
++        ldrh        ip, [r2, #-2]       @ Top-left
++        vld1.16     {d0}, [r2 :64]      @ Left
++        vmov.i16    d2, #0
++        vld1.16     {d1[]}, [r1]
++T       lsl         r3, #1
++        vdup.16     d4, ip
++        vmov.i16    d3, #0x3ff
++        vld1.16     {d5}, [r1 :64]      @ Top
++        vhsub.u16   d4, d0, d4
++        vmov.i64    d0, #0xffff
++A       add         r2, r0, r3, lsl #1
++T       add         r2, r0, r3
++        vadd.i16    d1, d1, d4
++        vmov        d6, d5
++        vmax.s16    d1, d1, d2
++        vmin.s16    d2, d1, d3
++        vmin.s16    d1, d1, d3
++        vbit        d5, d1, d0
++A       lsl         r3, #2
++T       lsl         r3, #1
++        vshr.u64    d2, #16
++        vshr.u64    d1, #32
++        vbit        d6, d2, d0
++        vst1.16     {d5}, [r0], r3
++        vshr.u64    d2, #32
++        vst1.16     {d6}, [r2], r3
++        vbit        d5, d1, d0
++        vst1.16     {d5}, [r0]
++        vbit        d6, d2, d0
++        vst1.16     {d6}, [r2]
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_8_neon_10, export=1
++        ldrh        ip, [r2, #-2]       @ Top-left
++        vld1.16     {q0}, [r2 :128]     @ Left
++        lsl         r3, #1
++        vdup.16     q1, ip
++        vld1.16     {d4[],d5[]}, [r1]
++        vhsub.u16   q0, q0, q1
++        vmov.i16    q1, #0
++        vadd.i16    q0, q2
++        vmov.i16    q2, #0x3ff
++        vld1.16     {q3}, [r1 :128]     @ Top
++        mov         r1, #8
++        vmax.s16    q0, q1
++        vmov        q1, q3
++        vmin.s16    q0, q2
++        vmov.i64    d16, #0xffff
++        vext.16     q2, q0, q0, #1
++1:
++        vbit        d2, d0, d16
++        vbit        d6, d4, d16
++        vext.16     q0, q0, q0, #2
++        subs        r1, #2
++        vst1.16     {q1}, [r0 :128], r3
++        vext.16     q2, q2, q2, #2
++        vst1.16     {q3}, [r0 :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_16_neon_10, export=1
++        ldrh        ip, [r2, #-2]       @ Top-left
++        vld1.16     {q0-q1}, [r2 :128]  @ Left
++T       lsl         r3, #1
++        vdup.16     q2, ip
++A       add         r2, r0, r3, lsl #1
++T       add         r2, r0, r3
++        vld1.16     {d6[],d7[]}, [r1]
++A       lsl         r3, #2
++T       lsl         r3, #1
++        vhsub.u16   q0, q2
++        vhsub.u16   q1, q2
++        vadd.i16    q0, q3
++        vadd.i16    q1, q3
++        vmov.i16    q2, #0
++        vld1.16     {q8-q9}, [r1 :128]  @ Top
++        mov         r1, #0
++        vmov.i16    q3, #0x3ff
++        vmax.s16    q0, q2
++        vmax.s16    q1, q2
++        vmin.s16    q0, q3
++        vmin.s16    q1, q3
++        vmov        q10, q8
++        vmov        q11, q9
++        vext.16     q2, q0, q1, #1
++        vext.16     q3, q1, q1, #1
++        vmov.i64    d24, #0xffff
++1:
++        vbit        d16, d0, d24
++        vbit        d20, d4, d24
++        vext.16     q0, q0, q0, #2
++        subs        r1, #1<<30
++        vst1.16     {q8-q9}, [r0 :128], r3
++        vext.16     q2, q2, q2, #2
++        vst1.16     {q10-q11}, [r2 :128], r3
++        bne         1b
++1:
++        vbit        d16, d2, d24
++        vbit        d20, d6, d24
++        vext.16     q1, q1, q1, #2
++        subs        r1, #1<<30
++        vst1.16     {q8-q9}, [r0 :128], r3
++        vext.16     q3, q3, q3, #2
++        vst1.16     {q10-q11}, [r2 :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_32_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_32_neon_10, export=1
++        vldm        r1, { q0-q3 }    @ Up
++        lsl         r3, #1
++        mov         r1, #32
++        add         r2, r0, #32
++1:
++        vst1.16     {q0-q1}, [r0 :128], r3
++        subs        r1, #1
++        vst1.16     {q2-q3}, [r2 :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_c_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_c_4_neon_10, export=1
++        vld1.16    {q0 }, [r1  :128]    @ Up
++        add         r2,  r0,  r3,  lsl #2
++        lsl         r3,  #3
++
++        vst1.16    {q0 }, [r0  :128], r3
++        vst1.16    {q0 }, [r2  :128], r3
++        vst1.16    {q0 }, [r0  :128]
++        vst1.16    {q0 }, [r2  :128]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_c_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_c_8_neon_10, export=1
++        vld1.16    {q0,  q1 }, [r1  :128]    @ Up
++        add         r2,  r0,  r3,  lsl #2
++        lsl         r3,  #3
++        mov         r1,  #4
++1:
++        vst1.16    {q0,  q1 }, [r0  :128], r3
++        subs        r1,  #1
++        vst1.16    {q0,  q1 }, [r2  :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_vertical_c_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_vertical_c_16_neon_10, export=1
++        vldm        r1, { q0-q3 }    @ Up
++        lsl         r3, #2
++        mov         r1, #16
++        add         r2, r0, #32
++1:
++        vst1.16     {q0-q1}, [r0 :128], r3
++        subs        r1, #1
++        vst1.16     {q2-q3}, [r2 :128], r3
++        bne         1b
++
++        bx          lr
++endfunc
++
++@ ff_hevc_rpi_pred_horizontal_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_4_neon_10, export=1
++        ldrh        ip, [r2, #-2]       @ Top-left
++        vld1.16     {d0}, [r1 :64]      @ Top
++        vmov.i16    d1, #0
++        vld1.16     {d2[]}, [r2]!
++T       lsl         r3, #1
++        vdup.16     d3, ip
++        vmov.i16    d4, #0x3ff
++        vhsub.u16   d0, d3
++A       add         ip, r0, r3, lsl #1
++T       add         ip, r0, r3
++        vld1.16     {d3[]}, [r2]!
++A       lsl         r3, #2
++T       lsl         r3, #1
++        vadd.i16    d0, d2
++        vld1.16     {d2[]}, [r2]!
++        vmax.s16    d0, d1
++        vld1.16     {d1[]}, [r2]
++        vmin.s16    d0, d4
++        vst1.16     {d0}, [r0 :64], r3
++        vst1.16     {d3}, [ip :64], r3
++        vst1.16     {d2}, [r0 :64]
++        vst1.16     {d1}, [ip :64]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_8_neon_10, export=1
++        ldrh        ip, [r2, #-2]       @ Top-left
++        vld1.16     {q0}, [r1 :128]     @ Top
++        lsl         r3, #1
++        vdup.16     q1, ip
++        mov         r1, #8-2
++        vhsub.u16   q0, q1
++        vld1.16     {d2[],d3[]}, [r2]!
++        vmov.i16    q2, #0
++        vadd.i16    q0, q1
++        vmov.i16    q1, #0x3ff
++        vmax.s16    q0, q2
++          vld1.16     {d4[],d5[]}, [r2]!
++        vmin.s16    q0, q1
++        vst1.16     {q0}, [r0 :128], r3
++1:
++            vld1.16     {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.16     {q2}, [r0 :128], r3
++              vld1.16     {d4[],d5[]}, [r2]!
++            vst1.16     {q0}, [r0 :128], r3
++        bne         1b
++
++              vst1.16     {q2}, [r0 :128]
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontalal_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_16_neon_10, export=1
++        ldrh        ip, [r2, #-2]       @ Top-left
++        vld1.16     {q0-q1}, [r1 :128]  @ Top
++        lsl         r3, #1
++        vdup.16     q2, ip
++        add         ip, r0, r3
++        vhsub.u16   q0, q2
++        add         ip, #16
++        vhsub.u16   q1, q2
++        mov         r1, #16-2
++        vld1.16     {d4[],d5[]}, [r2]!
++        vmov.i16    q3, #0
++        vadd.u16    q0, q2
++        vadd.i16    q1, q2
++        vmov.i16    q2, #0x3ff
++        vmax.s16    q0, q3
++        vmax.s16    q1, q3
++          vld1.16     {d6[],d7[]}, [r2]!
++        vmin.s16    q0, q2
++        vmin.s16    q1, q2
++        vst1.16     {q0-q1}, [r0 :128], r3
++1:
++            vld1.16     {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.16     {q3}, [r0 :128], r3
++          vst1.16     {q3}, [ip :128], r3
++              vld1.16     {d6[],d7[]}, [r2]!
++            vst1.16     {q0}, [r0 :128], r3
++            vst1.16     {q0}, [ip :128], r3
++        bne         1b
++
++              vst1.16     {q3}, [r0 :128]
++              vst1.16     {q3}, [ip :128]
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_32_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_32_neon_10, export=1
++        vld1.16     {d0[],d1[]}, [r2]!
++        add         ip, r0, #16
++        push        {lr}
++        mov         lr, #32
++          vld1.16     {d2[],d3[]}, [r2]!
++        lsl         r3, #1
++        vst1.16     {q0}, [r0 :128], lr
++        sub         r3, #32
++        vst1.16     {q0}, [ip :128], lr
++        mov         r1, #32-2
++        vst1.16     {q0}, [r0 :128], r3
++        vst1.16     {q0}, [ip :128], r3
++1:
++            vld1.16     {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.16     {q1}, [r0 :128], lr
++          vst1.16     {q1}, [ip :128], lr
++          vst1.16     {q1}, [r0 :128], r3
++          vst1.16     {q1}, [ip :128], r3
++              vld1.16     {d2[],d3[]}, [r2]!
++            vst1.16     {q0}, [r0 :128], lr
++            vst1.16     {q0}, [ip :128], lr
++            vst1.16     {q0}, [r0 :128], r3
++            vst1.16     {q0}, [ip :128], r3
++        bne         1b
++
++              vst1.16     {q1}, [r0 :128], lr
++              vst1.16     {q1}, [ip :128], lr
++              vst1.16     {q1}, [r0 :128]
++              vst1.16     {q1}, [ip :128]
++        pop         {pc}
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_c_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_c_4_neon_10, export=1
++        add         r1, r2, #4
++        vld1.32     {d0[],d1[]}, [r2]
++        add         r2, #8
++        vld1.32     {d2[],d3[]}, [r1]
++        add         r1, #8
++        vld1.32     {d4[],d5[]}, [r2]
++A       add         r2, r0, r3, lsl #2
++T       lsl         r3, #2
++T       add         r2, r0, r3
++        vld1.32     {d6[],d7[]}, [r1]
++A       lsl         r3, #3
++T       lsl         r3, #1
++        vst1.32     {q0}, [r0 :128], r3
++        vst1.32     {q1}, [r2 :128], r3
++        vst1.32     {q2}, [r0 :128]
++        vst1.32     {q3}, [r2 :128]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_c_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_c_8_neon_10, export=1
++        vld1.32     {d0[],d1[]}, [r2]!
++        lsl         r3, #2
++        add         ip, r0, #16
++        mov         r1, #8-2
++          vld1.32     {d2[],d3[]}, [r2]!
++        vst1.32     {q0}, [r0 :128], r3
++        vst1.32     {q0}, [ip :128], r3
++1:
++            vld1.32     {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.32     {q1}, [r0 :128], r3
++          vst1.32     {q1}, [ip :128], r3
++              vld1.32     {d2[],d3[]}, [r2]!
++            vst1.32     {q0}, [r0 :128], r3
++            vst1.32     {q0}, [ip :128], r3
++        bne         1b
++
++              vst1.32     {q1}, [r0 :128]
++              vst1.32     {q1}, [ip :128]
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_horizontal_c_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_horizontal_c_16_neon_10, export=1
++        vld1.32     {d0[],d1[]}, [r2]!
++        add         ip, r0, #16
++        push        {lr}
++        mov         lr, #32
++          vld1.32     {d2[],d3[]}, [r2]!
++        lsl         r3, #2
++        vst1.32     {q0}, [r0 :128], lr
++        sub         r3, #32
++        vst1.32     {q0}, [ip :128], lr
++        mov         r1, #16-2
++        vst1.32     {q0}, [r0 :128], r3
++        vst1.32     {q0}, [ip :128], r3
++1:
++            vld1.32     {d0[],d1[]}, [r2]!
++        subs        r1, #2
++          vst1.32     {q1}, [r0 :128], lr
++          vst1.32     {q1}, [ip :128], lr
++          vst1.32     {q1}, [r0 :128], r3
++          vst1.32     {q1}, [ip :128], r3
++              vld1.32     {d2[],d3[]}, [r2]!
++            vst1.32     {q0}, [r0 :128], lr
++            vst1.32     {q0}, [ip :128], lr
++            vst1.32     {q0}, [r0 :128], r3
++            vst1.32     {q0}, [ip :128], r3
++        bne         1b
++
++              vst1.32     {q1}, [r0 :128], lr
++              vst1.32     {q1}, [ip :128], lr
++              vst1.32     {q1}, [r0 :128]
++              vst1.32     {q1}, [ip :128]
++        pop         {pc}
++endfunc
++
++
++
+--- /dev/null
++++ b/libavcodec/arm/rpi_hevcpred_intra_planar_neon.S
+@@ -0,0 +1,1043 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++#include "libavutil/arm/asm.S"
++#include "neon.S"
++
++@ Planar intra pred (8.4.4.2.4)
++@
++@ predSamples[ x ][ y ] =
++@ ( ( nTbS - 1 - x ) * p[ -1 ][ y ] +
++@   ( x + 1 ) * p[ nTbS ][ -1 ] +
++@   ( nTbS - 1 - y ) * p[ x ][ -1 ] +
++@   ( y + 1 ) * p[ -1 ][ nTbS ] + nTbS ) >> ( Log2( nTbS ) + 1 )
++
++@ All 10-bit functions would work with 9
++
++
++@ ff_hevc_rpi_pred_planar_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_4_neon_8, export=1
++
++        vld1.8      {d0}, [r1]          @ Top
++        adr         ip, nb_3_0_1_4
++        vld1.8      {d1}, [r2]          @ Left
++        vmov.i64    d2, #0xffffffff
++        vldr        d3, [ip, #8]        @ {1,2,3,4,1,2,3,4}
++        add         r1, r0, r3
++        vdup.32     d4, d0[0]           @ {t0,t1,t2,t3,t0,t1,t2,t3}
++        vdup.8      d0, d0[4]           @ {t4,t4,t4,t4,t4,t4,t4,t4}
++        vdup.8      d5, d1[4]           @ {l4,l4,l4,l4,l4,l4,l4,l4}
++        vdup.8      d6, d1[0]           @ {l0,l0,l0,l0,l0,l0,l0,l0}
++        vshll.u8    q8, d4, #2
++        lsl         r3, #1
++        vsubl.u8    q2, d5, d4
++        vmlal.u8    q8, d0, d3
++        vld1.8      {d0}, [ip]          @ {3,2,1,0,3,2,1,0}
++        vdup.8      d7, d1[1]           @ {l1,l1,l1,l1,l1,l1,l1,l1}
++        vshl.s16    q9, q2, #1
++        vbif        d6, d7, d2          @ {l0,l0,l0,l0,l1,l1,l1,l1}
++        vadd.i16    d16, d4
++        vdup.8      d7, d1[2]           @ {l2,l2,l2,l2,l2,l2,l2,l2}
++        vadd.i16    d17, d18
++        vdup.8      d1, d1[3]           @ {l3,l3,l3,l3,l3,l3,l3,l3}
++        vadd.i16    q2, q8, q9
++        vmlal.u8    q8, d0, d6
++        vbif        d7, d1, d2          @ {l2,l2,l2,l2,l3,l3,l3,l3}
++        vmlal.u8    q2, d0, d7
++        vrshrn.i16  d0, q8, #3
++        vst1.32     d0[0], [r0 :32], r3
++        vst1.32     d0[1], [r1 :32], r3
++        vrshrn.i16  d0, q2, #3
++        vst1.32     d0[0], [r0 :32]
++        vst1.32     d0[1], [r1 :32]
++
++        bx          lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_4_neon_10, export=1
++        @ Load from bytes & expand later - at the very least this uses less
++        @ memory than having a short table
++        vld1.16     {q0}, [r1 :64]      @ Top
++        adr         ip, nbh_3_0_1_4
++        vldr        d2, [r2, #8]        @ Left (lower)
++        vldr        d3, [ip, #8]        @ {1,2,3,4}
++T       lsl         r3, #1
++        vshl.s16    d4, d0, #2
++        vdup.16     d1, d1[0]           @ {t4,t4,t4,t4}
++        vldr        d5, [r2]            @ Left (upper)
++        vdup.16     d2, d2[0]           @ {l4,l4,l4,l4}
++        vldr        d6, [ip]            @ {3,2,1,0}
++        vmla.i16    d4, d3, d1          @ Acc set up
++        vsub.i16    d0, d2, d0          @ Add set up
++        vmov        d7, d6
++        vdup.16     d2, d5[0]
++        vdup.16     d3, d5[1]
++        vdup.16     d16, d5[2]
++        vadd.i16    d18, d0, d4
++        vshl.s16    d0, #1              @ x2
++        vadd.i16    d19, d0, d4
++        vdup.16     d17, d5[3]
++        vadd.i16    d4, d0, d18
++A       add         r1, r0, r3, lsl #1
++T       add         r1, r0, r3
++        vadd.i16    d5, d0, d19
++A       lsl         r3, #2
++T       lsl         r3, #1
++        vmla.i16    q9, q1, q3
++        vmla.i16    q2, q8, q3
++        vrshr.u16   q0, q9, #3
++        vst1.16     {d0}, [r0], r3
++        vrshr.u16   d2, d4, #3
++        vst1.16     {d1}, [r1], r3
++        vrshr.u16   d3, d5, #3
++        vst1.16     {d2}, [r0]
++        vst1.16     {d3}, [r1]
++
++        bx         lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_8_neon_8, export=1
++
++        vld1.8      {q0}, [r1]          @ Top
++        adr         ip, nb_7_0_1_8
++        vldr        d2, [r2, #8]        @ Left (lower)
++        mov         r1, #8
++        vldr        d3, [ip, #8]        @ {1,2,3,4,5,6,7,8}
++        vshll.u8    q2, d0, #3
++        vdup.8      d1, d1[0]           @ {t8,t8,t8,t8,t8,t8,t8,t8}
++        vdup.8      d2, d2[0]           @ {l8,l8,l8,l8,l8,l8,l8,l8}
++        vldr        d6, [r2]            @ Left (upper)
++        vmlal.u8    q2, d3, d1
++        vsubl.u8    q0, d2, d0
++        vldr        d7, [ip]            @ {7,6,5,4,3,2,1,0}
++
++@ u8   7..0    [1]  d7
++@ u8  left[y]  [1]  d6
++@ u16 acc      [2]  q2 (even rows) or q8 (odd rows) = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add      [2]  q0 = p[-1][nTbs] - p[x][-1]
++
++        vdup.8      d2, d6[0]
++        vadd.i16    q2, q0
++        vdup.8      d3, d6[1]
++        vadd.i16    q8, q2, q0
++1:
++        vmlal.u8    q2, d7, d2
++        subs        r1, #2
++        vadd.i16    q9, q8, q0
++        vmlal.u8    q8, d7, d3
++        vdup.8      d2, d6[2]
++        vdup.8      d3, d6[3]
++        vrshrn.i16  d20, q2, #4
++        vshr.u64    d6, #16
++        vmov        q2, q9
++        vst1.8      {d20}, [r0], r3
++        vrshrn.i16  d20, q8, #4
++        vadd.i16    q8, q2, q0
++        vst1.8      {d20}, [r0], r3
++        bne         1b
++
++        bx          lr
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_8_neon_10, export=1
++
++        adr         ip, nb_7_0_1_8
++        vld1.16     {q0}, [r1 :128]!    @ Top (left)
++        lsl         r3, #1
++        vld1.16     {q1}, [ip :128]     @ {7,6,5,4,3,2,1,0,1,2,3,4,5,6,7,8}
++        add         ip, r2, #16
++        vld1.16     {d4[],d5[]}, [r1]   @ Top (right)
++        mov         r1, #8-2
++        vshl.s16    q3, q0, #3
++        vmovl.u8    q8, d3              @ {1,2,3,4,5,6,7,8}
++        vld1.16     {d18[],d19[]}, [ip] @ Left (lower)
++        vmla.i16    q3, q8, q2          @ Acc set up
++        vsub.i16    q0, q9, q0          @ Add set up
++        vmovl.u8    q1, d2              @ {7,6,5,4,3,2,1,0}
++        vadd.i16    q2, q3, q0
++
++@ u16  7..0        [1]  q1
++@ u32 left[y]      [1]  [r2]
++@ u16 acc          [1]  q3 = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add          [1]  q0 = p[-1][nTbs] - p[x][-1]
++
++        vld1.16     {d6[],d7[]}, [r2]!
++        vadd.i16    q8, q2, q0
++        vld1.16     {d18[],d19[]}, [r2]!
++        vmla.i16    q2, q1, q3
++        vadd.i16    q3, q8, q0
++        vmla.i16    q8, q1, q9
++1:
++        vrshr.u16   q9, q2, #4
++        subs        r1, #2
++        vmov        q2, q3
++        vrshr.u16   q10, q8, #4
++          vld1.16     {d6[],d7[]}, [r2]!
++        vst1.16     {q9}, [r0 :128], r3
++          vadd.i16    q8, q2, q0
++          vld1.16     {d18[],d19[]}, [r2]!
++          vmla.i16    q2, q1, q3
++          vadd.i16    q3, q8, q0
++          vmla.i16    q8, q1, q9
++        vst1.16     {q10}, [r0 :128], r3
++        bne         1b
++
++        vrshr.u16   q9, q2, #4
++        add         r3, r0
++        vrshr.u16   q10, q8, #4
++        vst1.16     {q9}, [r0 :128]
++        vst1.16     {q10}, [r3 :128]
++
++        bx         lr
++endfunc
++
++
++@------------------------------------------------------------------------------
++@
++@ Data - has to be in two lumps to ensure we can always reach using adr
++
++        .balign 64
++
++nb_31_0_1_32:
++        .byte   31, 30, 29, 28, 27, 26, 25, 24
++        .byte   23, 22, 21, 20, 19, 18, 17, 16
++nb_15_0_1_16:
++        .byte   15, 14, 13, 12, 11, 10,  9,  8
++        .byte    7,  6,  5,  4,  3,  2,  1,  0
++        .byte    1,  2,  3,  4,  5,  6,  7,  8
++        .byte    9, 10, 11, 12, 13, 14, 15, 16
++        .byte   17, 18, 19, 20, 21, 22, 23, 24
++        .byte   25, 26, 27, 28, 29, 30, 31, 32
++
++        @ should be back on a 64-byte boundary here
++
++        @ These could be extracted from the above array, but separate out
++        @ out for better (16 byte) alignment
++nb_3_0_1_4:
++        .byte    3,  2,  1,  0,  3,  2,  1,  0
++        .byte    1,  2,  3,  4,  1,  2,  3,  4
++nb_7_0_1_8:
++        .byte    7,  6,  5,  4,  3,  2,  1,  0
++        .byte    1,  2,  3,  4,  5,  6,  7,  8
++nbh_3_0_1_4:
++        .short   3,  2,  1,  0,  1,  2,  3,  4
++
++@------------------------------------------------------------------------------
++
++
++@ ff_hevc_rpi_pred_planar_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_16_neon_8, export=1
++
++        adr         ip, nb_15_0_1_16 + 16
++        vld1.8      {q0}, [r1 :128]!    @ Top (left)
++        add         r2, #16
++        vld1.8      {q1}, [ip: 128]     @ {1,2,3...16}
++        vld1.8      {d4[]}, [r1]        @ Top (right)
++        sub         ip, #16
++        vshll.u8    q3, d0, #4
++        mov         r1, #16
++        vshll.u8    q8, d1, #4
++        vld1.8      {d5[]}, [r2]        @ Left (lower)
++        sub         r2, #16
++        vmlal.u8    q3, d2, d4
++        vmlal.u8    q8, d3, d4          @ Acc set up
++        vsubl.u8    q1, d5, d0
++        vsubl.u8    q0, d5, d1          @ Add set up
++        vld1.8      {q2}, [ip :128]     @ {15,14,13...0}
++
++@ u8  15..0    [1]  q2
++@ u8  left[y]  [1]  [r2]
++@ u16 acc      [2]  q3,q8 = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add      [2]  q1,q0 = p[-1][nTbs] - p[x][-1]
++
++        vadd.i16    q3, q1
++        vadd.i16    q8, q0
++1:
++        vadd.i16    q10, q3, q1
++        subs        r1, #2
++        vld1.8      {d18[]}, [r2]!
++        vadd.i16    q11, q8, q0
++        vld1.8      {d19[]}, [r2]!
++        vmlal.u8    q3, d4, d18
++        vmlal.u8    q8, d5, d18
++        vadd.i16    q12, q10, q1
++        vmlal.u8    q10, d4, d19
++        vadd.i16    q13, q11, q0
++        vmlal.u8    q11, d5, d19
++        vrshrn.u16  d18, q3, #5
++        vrshrn.u16  d19, q8, #5
++        vmov        q3, q12
++        vst1.8      {q9}, [r0 :128], r3
++        vrshrn.u16  d18, q10, #5
++        vrshrn.u16  d19, q11, #5
++        vmov        q8, q13
++        vst1.8      {q9}, [r0 :128], r3
++        bne         1b
++
++        bx          lr
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_16_neon_10, export=1
++
++        @ Load from bytes & expand later - at the very least this uses less
++        @ memory than having a short table
++        adr         ip, nb_15_0_1_16 + 16
++        vld1.16     {q0-q1}, [r1 :128]! @ Top (left)
++        add         r2, #32
++        vld1.8      {q2}, [ip :128]     @ {1,2,3...16}
++        lsl         r3, #1
++        vld1.16     {d6[],d7[]}, [r1]   @ Top (right)
++        sub         ip, #16
++        vmovl.u8    q8, d4
++        mov         r1, #16
++        vshl.i16    q9, q0, #4
++        vmovl.u8    q2, d5
++        vshl.i16    q10, q1, #4
++        vld1.16     {d22[],d23[]}, [r2] @ Left (lower)
++        sub         r2, #32
++        vld1.8      {q12}, [ip]         @ {15,14,13...0}
++        vmla.i16    q9, q8, q3
++        vmla.i16    q10, q2, q3         @ Acc set up
++        vsub.i16    q0, q11, q0
++        vsub.i16    q1, q11, q1         @ Add set up
++        vadd.i16    q2, q9, q0
++        vadd.i16    q3, q10, q1
++        vmovl.u8    q8, d24
++        vmovl.u8    q9, d25
++
++@ u16  15..0       [2]  q8,q9
++@ u32 left[y]      [2]  [r2]
++@ u16 acc          [2]  q2,q3 = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add          [2]  q0,q1 = p[-1][nTbs] - p[x][-1]
++
++1:
++        vadd.i16    q10, q2, q0
++        subs        r1, #2
++        vld1.16     {d24[],d25[]}, [r2]!
++        vadd.i16    q11, q3, q1
++        vld1.16     {d28[],d29[]}, [r2]!
++        vmla.i16    q2, q8, q12
++        vmla.i16    q3, q9, q12
++        vadd.i16    q12, q10, q0
++        vmla.i16    q10, q8, q14
++        vadd.i16    q13, q11, q1
++        vmla.i16    q11, q9, q14
++        vrshr.u16   q14, q2, #5
++        vrshr.u16   q15, q3, #5
++        vmov        q2, q12
++        vst1.16     {q14-q15}, [r0 :128], r3
++        vrshr.u16   q14, q10, #5
++        vrshr.u16   q15, q11, #5
++        vmov        q3, q13
++        vst1.16     {q14-q15}, [r0 :128], r3
++        bne         1b
++
++        bx         lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_32_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_32_neon_8, export=1
++
++        vld1.8      {q0-q1}, [r1 :128]! @ Top (left)
++        adr         ip, nb_31_0_1_32 + 32
++        vpush       {d8-d12}
++        vld1.8      {q2-q3}, [ip :128]  @ {1,2,3...32}
++        add         r2, #32
++        vld1.8      {d8[]}, [r1]        @ Top (right)
++        sub         ip, #32
++        vshll.u8    q8, d0, #5
++        mov         r1, #32
++        vld1.8      {d9[]}, [r2]        @ Left (lower)
++        sub         r2, #32
++        vshll.u8    q9, d1, #5
++        vshll.u8    q10, d2, #5
++        vshll.u8    q11, d3, #5
++        vmlal.u8    q8, d4, d8
++        vsubl.u8    q12, d9, d0
++        vmlal.u8    q9, d5, d8
++        vsubl.u8    q13, d9, d1
++        vmlal.u8    q10, d6, d8
++        vsubl.u8    q14, d9, d2
++        vmlal.u8    q11, d7, d8         @ Acc set up
++        vsubl.u8    q15, d9, d3         @ Add set up
++        vadd.i16    q8, q12
++        vadd.i16    q9, q13
++        vadd.i16    q10, q14
++        vadd.i16    q11, q15
++        vld1.8      {q4-q5}, [ip :128]  @ {31,30,29...0}
++
++@ u8  31..0    [2]  q4,q5
++@ u8  left[y]  [2]  [r2]
++@ u16 acc      [4]  q8-q11  = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add      [4]  q12-q15 = p[-1][nTbs] - p[x][-1]
++
++        vld1.8      {d12[]}, [r2]!
++        vadd.i16    q0, q8, q12
++        b           2f
++1:
++          vld1.8      {d12[]}, [r2]!
++        vrshrn.u16  d3, q1, #6
++        vrshrn.u16  d2, q0, #6
++          vadd.i16    q0, q8, q12
++        vrshrn.u16  d4, q2, #6
++        vrshrn.u16  d5, q3, #6
++        vst1.8      {q1-q2}, [r0 :128], r3
++2:        vadd.i16    q1, q9, q13
++          subs        r1, #2
++          vadd.i16    q2, q10, q14
++          vadd.i16    q3, q11, q15
++          vmlal.u8    q8, d8, d12
++          vmlal.u8    q9, d9, d12
++          vmlal.u8    q10, d10, d12
++          vmlal.u8    q11, d11, d12
++            vld1.8      {d12[]}, [r2]!
++          vrshrn.u16  d19, q9, #6
++          vrshrn.u16  d18, q8, #6
++            vadd.i16    q8, q0, q12
++          vrshrn.u16  d20, q10, #6
++          vrshrn.u16  d21, q11, #6
++          vst1.8      {q9-q10}, [r0 :128], r3
++            vadd.i16    q9, q1, q13
++            vadd.i16    q10, q2, q14
++            vadd.i16    q11, q3, q15
++            vmlal.u8    q0, d8, d12
++            vmlal.u8    q1, d9, d12
++            vmlal.u8    q2, d10, d12
++            vmlal.u8    q3, d11, d12
++
++        bne         1b
++
++        vpop        {d8-d12}
++
++        vrshrn.u16  d3, q1, #6
++        vrshrn.u16  d2, q0, #6
++        vrshrn.u16  d4, q2, #6
++        vrshrn.u16  d5, q3, #6
++        vst1.8      {q1-q2}, [r0 :128]
++
++        bx          lr
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_32_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_32_neon_10, export=1
++
++        @ Load from bytes & expand later - at the very least this uses less
++        @ memory than having a short table
++        vld1.16     {q0-q1}, [r1 :128]!  @ Top (left)
++        adr         ip, nb_31_0_1_32 + 32
++        vpush       {q4-q7}
++        vld1.16     {q2-q3}, [r1 :128]!  @ Top (centre)
++        add         r2, #64
++        vld1.8      {q14-q15}, [ip :128] @ {1,2,3...32}
++T       lsl         r3, #1
++        vld1.16     {d8[],d9[]}, [r1]    @ Top (right)
++        sub         ip, #32
++        vmovl.u8    q12, d28
++        mov         r1, #32
++        vmovl.u8    q13, d29
++        vld1.8      {q6-q7}, [ip :128]   @ {31,30,29...0}
++        vmovl.u8    q14, d30
++        vmovl.u8    q15, d31
++        vld1.16     {d10[],d11[]}, [r2]  @ Left (lower)
++        sub         r2, #64
++        vshl.i16    q8, q0, #5
++        vshl.i16    q9, q1, #5
++        vshl.i16    q10, q2, #5
++        vshl.i16    q11, q3, #5
++        vmla.i16    q8, q12, q4
++        vsub.i16    q0, q5, q0
++        vmla.i16    q9, q13, q4
++        vsub.i16    q1, q5, q1
++        vmla.i16    q10, q14, q4
++        vmov.u16    ip, d0[0]
++        vsub.i16    q2, q5, q2
++        vmla.i16    q11, q15, q4         @ Acc set up
++        vsub.i16    q3, q5, q3           @ Add set up
++        vadd.i16    q8, q0
++        vadd.i16    q9, q1
++        vadd.i16    q10, q2
++        vadd.i16    q11, q3
++        vmovl.u8    q4, d12
++        vmovl.u8    q5, d13
++        vmovl.u8    q6, d14
++        vmovl.u8    q7, d15
++
++@ u16 31..0    [4]  q4-q7
++@ u16 left[y]  [4]  [r2]
++@ u16 acc      [4]  q8-q11 = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add      [4]  q0-q3  = p[-1][nTbs] - p[x][-1]
++
++        vadd.i16    q12, q8, q0
++A       sub         r0, r0, r3, lsl #1
++T       sub         r0, r3
++1:
++        vld1.16     {d0[0]}, [r2]!
++A       add         r0, r0, r3, lsl #1
++T       add         r0, r3
++        vadd.i16    q13, q9, q1
++        subs        r1, #2
++        vadd.i16    q14, q10, q2
++        vadd.i16    q15, q11, q3
++        vmla.i16    q8, q4, d0[0]
++        vmla.i16    q9, q5, d0[0]
++        vmla.i16    q10, q6, d0[0]
++        vmla.i16    q11, q7, d0[0]
++        vmov.16     d0[0], ip
++        vrshr.u16   q8, #6
++        vrshr.u16   q9, #6
++        vrshr.u16   q10, #6
++        vrshr.u16   q11, #6
++        vstm        r0, {q8-q11}
++        vadd.i16    q8, q12, q0
++A       add         r0, r0, r3, lsl #1
++T       add         r0, r3
++        vld1.16     {d0[0]}, [r2]!
++        vadd.i16    q9, q13, q1
++        vadd.i16    q10, q14, q2
++        vadd.i16    q11, q15, q3
++        vmla.i16    q12, q4, d0[0]
++        vmla.i16    q13, q5, d0[0]
++        vmla.i16    q14, q6, d0[0]
++        vmla.i16    q15, q7, d0[0]
++        vmov.16     d0[0], ip
++        vrshr.u16   q12, #6
++        vrshr.u16   q13, #6
++        vrshr.u16   q14, #6
++        vrshr.u16   q15, #6
++        vstm        r0, {q12-q15}
++        vadd.i16    q12, q8, q0
++        bne         1b
++
++        vpop        {q4-q7}
++        bx          lr
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_c_4_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_c_4_neon_8, export=1
++
++        vld1.8      {q0}, [r1]          @ Top
++        adr         ip, nbx2_3_0_1_4
++        vldr        d2, [r2, #8]        @ Left (lower)
++        mov         r1, #4
++        vldr        d3, [ip, #8]        @ {1,1,2,2,3,3,4,4}
++        lsl         r3, #1
++        vshll.u8    q2, d0, #2
++        vdup.16     d1, d1[0]           @ {t4,t4,t4,t4,t4,t4,t4,t4}
++        vdup.16     d2, d2[0]           @ {l4,l4,l4,l4,l4,l4,l4,l4}
++        vldr        d6, [r2]            @ Left (upper)
++        vmlal.u8    q2, d3, d1
++        vsubl.u8    q0, d2, d0
++        vldr        d7, [ip]            @ {3,3,2,2,1,1,0,0}
++
++@ u8   3..0    [1]  d7
++@ u8  left[y]  [1]  d6
++@ u16 acc      [2]  q2 (even rows) or q8 (odd rows) = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add      [2]  q0 = p[-1][nTbs] - p[x][-1]
++
++        vdup.16     d2, d6[0]
++        vadd.i16    q2, q0
++        vdup.16     d3, d6[1]
++        vadd.i16    q8, q2, q0
++1:
++        vmlal.u8    q2, d7, d2
++        subs        r1, #2
++        vadd.i16    q9, q8, q0
++        vmlal.u8    q8, d7, d3
++        vdup.16     d2, d6[2]
++        vdup.16     d3, d6[3]
++        vrshrn.i16  d20, q2, #3
++        vmov        q2, q9
++        vst1.8      {d20}, [r0], r3
++        vrshrn.i16  d20, q8, #3
++        vadd.i16    q8, q2, q0
++        vst1.8      {d20}, [r0], r3
++        bne         1b
++
++        bx          lr
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_c_4_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_c_4_neon_10, export=1
++
++        adr         ip, nbx2_3_0_1_4
++        vld1.16     {q0}, [r1 :128]!    @ Top (left)
++        lsl         r3, #2
++        vld1.16     {q1}, [ip :128]     @ {3,3,2,2,1,1,0,0,1,1,2,2,3,3,4,4}
++        add         ip, r2, #16
++        vld1.32     {d4[],d5[]}, [r1]   @ Top (right)
++        vshl.s16    q3, q0, #2
++        vmovl.u8    q8, d3              @ {1,1,2,2,3,3,4,4}
++        vld1.32     {d18[],d19[]}, [ip] @ Left (lower)
++        vmla.i16    q3, q8, q2          @ Acc set up
++        vsub.i16    q0, q9, q0          @ Add set up
++        vmovl.u8    q1, d2              @ {3,3,2,2,1,1,0,0}
++        vadd.i16    q2, q3, q0
++
++@ u16  3..0        [1]  q1
++@ u32 left[y]      [1]  [r2]
++@ u16 acc          [1]  q3 = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add          [1]  q0 = p[-1][nTbs] - p[x][-1]
++
++        vld1.32     {d6[],d7[]}, [r2]!
++        vadd.i16    q8, q2, q0
++        vld1.32     {d18[],d19[]}, [r2]!
++        vmla.i16    q2, q1, q3
++        vadd.i16    q3, q8, q0
++        vmla.i16    q8, q1, q9
++
++        vrshr.u16   q9, q2, #3
++        vmov        q2, q3
++        vrshr.u16   q10, q8, #3
++          vld1.32     {d6[],d7[]}, [r2]!
++        vst1.16     {q9}, [r0 :128], r3
++          vadd.i16    q8, q2, q0
++          vld1.32     {d18[],d19[]}, [r2]!
++          vmla.i16    q2, q1, q3
++          vadd.i16    q3, q8, q0
++          vmla.i16    q8, q1, q9
++        vst1.16     {q10}, [r0 :128], r3
++
++          vrshr.u16   q9, q2, #3
++          add         r3, r0
++          vrshr.u16   q10, q8, #3
++          vst1.16     {q9}, [r0 :128]
++          vst1.16     {q10}, [r3 :128]
++
++          bx         lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_c_8_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_c_8_neon_8, export=1
++
++        adr         ip, nbx2_7_0_1_8 + 16
++        vld1.8      {q0}, [r1 :128]!    @ Top (left)
++        add         r2, #16
++        vld1.8      {q1}, [ip: 128]     @ {1,1,2,2,3,3...8,8}
++        lsl         r3, #1
++        vld1.16     {d4[]}, [r1]        @ Top (right)
++        sub         ip, #16
++        vshll.u8    q3, d0, #3
++        mov         r1, #8
++        vshll.u8    q8, d1, #3
++        vld1.16     {d5[]}, [r2]        @ Left (lower)
++        sub         r2, #16
++        vmlal.u8    q3, d2, d4
++        vmlal.u8    q8, d3, d4          @ Acc set up
++        vsubl.u8    q1, d5, d0
++        vsubl.u8    q0, d5, d1          @ Add set up
++        vld1.8      {q2}, [ip :128]     @ {7,7,6,6,5,5...0,0}
++
++@ u8  7..0     [1]  q2
++@ u8  left[y]  [1]  [r2]
++@ u16 acc      [2]  q3,q8 = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add      [2]  q1,q0 = p[-1][nTbs] - p[x][-1]
++
++        vadd.i16    q3, q1
++        vadd.i16    q8, q0
++1:
++        vadd.i16    q10, q3, q1
++        subs        r1, #2
++        vld1.16     {d18[]}, [r2]!
++        vadd.i16    q11, q8, q0
++        vld1.16     {d19[]}, [r2]!
++        vmlal.u8    q3, d4, d18
++        vmlal.u8    q8, d5, d18
++        vadd.i16    q12, q10, q1
++        vmlal.u8    q10, d4, d19
++        vadd.i16    q13, q11, q0
++        vmlal.u8    q11, d5, d19
++        vrshrn.u16  d18, q3, #4
++        vrshrn.u16  d19, q8, #4
++        vmov        q3, q12
++        vst1.8      {q9}, [r0 :128], r3
++        vrshrn.u16  d18, q10, #4
++        vrshrn.u16  d19, q11, #4
++        vmov        q8, q13
++        vst1.8      {q9}, [r0 :128], r3
++        bne         1b
++
++        bx          lr
++
++endfunc
++
++
++@------------------------------------------------------------------------------
++@
++@ Data - has to be in two lumps to ensure we can always reach using adr
++
++        .balign 64
++
++nbx2_15_0_1_16:
++        .byte   15, 15, 14, 14, 13, 13, 12, 12
++        .byte   11, 11, 10, 10,  9,  9,  8,  8
++nbx2_7_0_1_8:
++        .byte    7,  7,  6,  6,  5,  5,  4,  4
++        .byte    3,  3,  2,  2,  1,  1,  0,  0
++        .byte    1,  1,  2,  2,  3,  3,  4,  4
++        .byte    5,  5,  6,  6,  7,  7,  8,  8
++        .byte    9,  9, 10, 10, 11, 11, 12, 12
++        .byte   13, 13, 14, 14, 15, 15, 16, 16
++
++        @ should be back on a 64-byte boundary here
++
++nbx2_3_0_1_4:
++        .byte    3,  3,  2,  2,  1,  1,  0,  0
++        .byte    1,  1,  2,  2,  3,  3,  4,  4
++
++@------------------------------------------------------------------------------
++
++
++@ ff_hevc_rpi_pred_planar_c_8_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_c_8_neon_10, export=1
++
++        @ Load from bytes & expand later - at the very least this uses less
++        @ memory than having a short table
++        adr         ip, nbx2_7_0_1_8 + 16
++        vld1.16     {q0-q1}, [r1 :128]! @ Top (left)
++        add         r2, #32
++        vld1.8      {q2}, [ip :128]     @ {1,1,2,2,3,3...8,8}
++        lsl         r3, #2
++        vld1.32     {d6[],d7[]}, [r1]   @ Top (right)
++        sub         ip, #16
++        vmovl.u8    q8, d4
++        mov         r1, #8
++        vshl.i16    q9, q0, #3
++        vmovl.u8    q2, d5
++        vshl.i16    q10, q1, #3
++        vld1.32     {d22[],d23[]}, [r2] @ Left (lower)
++        sub         r2, #32
++        vld1.8      {q12}, [ip]         @ {7,7,6,6,5,5...0,0}
++        vmla.i16    q9, q8, q3
++        vmla.i16    q10, q2, q3         @ Acc set up
++        vsub.i16    q0, q11, q0
++        vsub.i16    q1, q11, q1         @ Add set up
++        vadd.i16    q2, q9, q0
++        vadd.i16    q3, q10, q1
++        vmovl.u8    q8, d24
++        vmovl.u8    q9, d25
++
++@ u16  7..0        [2]  q8,q9
++@ u32 left[y]      [2]  [r2]
++@ u16 acc          [2]  q2,q3 = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add          [2]  q0,q1 = p[-1][nTbs] - p[x][-1]
++
++1:
++        vadd.i16    q10, q2, q0
++        subs        r1, #2
++        vld1.32     {d24[],d25[]}, [r2]!
++        vadd.i16    q11, q3, q1
++        vld1.32     {d28[],d29[]}, [r2]!
++        vmla.i16    q2, q8, q12
++        vmla.i16    q3, q9, q12
++        vadd.i16    q12, q10, q0
++        vmla.i16    q10, q8, q14
++        vadd.i16    q13, q11, q1
++        vmla.i16    q11, q9, q14
++        vrshr.u16   q14, q2, #4
++        vrshr.u16   q15, q3, #4
++        vmov        q2, q12
++        vst1.16     {q14-q15}, [r0 :128], r3
++        vrshr.u16   q14, q10, #4
++        vrshr.u16   q15, q11, #4
++        vmov        q3, q13
++        vst1.16     {q14-q15}, [r0 :128], r3
++        bne         1b
++
++        bx         lr
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_c_16_neon_8
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_c_16_neon_8, export=1
++
++        vld1.8      {q0-q1}, [r1 :128]! @ Top (left)
++        adr         ip, nbx2_15_0_1_16 + 32
++        vpush       {d8-d12}
++        vld1.8      {q2-q3}, [ip :128]  @ {1,1,2,2,3,3...16,16}
++        add         r2, #32
++        vld1.16     {d8[]}, [r1]        @ Top (right)
++        sub         ip, #32
++        vshll.u8    q8, d0, #4
++        mov         r1, #16
++        vld1.16     {d9[]}, [r2]        @ Left (lower)
++        sub         r2, #32
++        vshll.u8    q9, d1, #4
++        lsl         r3, #1
++        vshll.u8    q10, d2, #4
++        vshll.u8    q11, d3, #4
++        vmlal.u8    q8, d4, d8
++        vsubl.u8    q12, d9, d0
++        vmlal.u8    q9, d5, d8
++        vsubl.u8    q13, d9, d1
++        vmlal.u8    q10, d6, d8
++        vsubl.u8    q14, d9, d2
++        vmlal.u8    q11, d7, d8         @ Acc set up
++        vsubl.u8    q15, d9, d3         @ Add set up
++        vadd.i16    q8, q12
++        vadd.i16    q9, q13
++        vadd.i16    q10, q14
++        vadd.i16    q11, q15
++        vld1.8      {q4-q5}, [ip :128]  @ {15,15,14,14,13,13...0,0}
++
++@ u8  15..0    [2]  q4,q5
++@ u8  left[y]  [2]  [r2]
++@ u16 acc      [4]  q8-q11  = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add      [4]  q12-q15 = p[-1][nTbs] - p[x][-1]
++
++        vld1.16     {d12[]}, [r2]!
++        vadd.i16    q0, q8, q12
++        b           2f
++1:
++          vld1.16     {d12[]}, [r2]!
++        vrshrn.u16  d3, q1, #5
++        vrshrn.u16  d2, q0, #5
++          vadd.i16    q0, q8, q12
++        vrshrn.u16  d4, q2, #5
++        vrshrn.u16  d5, q3, #5
++        vst1.8      {q1-q2}, [r0 :128], r3
++2:        vadd.i16    q1, q9, q13
++          subs        r1, #2
++          vadd.i16    q2, q10, q14
++          vadd.i16    q3, q11, q15
++          vmlal.u8    q8, d8, d12
++          vmlal.u8    q9, d9, d12
++          vmlal.u8    q10, d10, d12
++          vmlal.u8    q11, d11, d12
++            vld1.16     {d12[]}, [r2]!
++          vrshrn.u16  d19, q9, #5
++          vrshrn.u16  d18, q8, #5
++            vadd.i16    q8, q0, q12
++          vrshrn.u16  d20, q10, #5
++          vrshrn.u16  d21, q11, #5
++          vst1.8      {q9-q10}, [r0 :128], r3
++            vadd.i16    q9, q1, q13
++            vadd.i16    q10, q2, q14
++            vadd.i16    q11, q3, q15
++            vmlal.u8    q0, d8, d12
++            vmlal.u8    q1, d9, d12
++            vmlal.u8    q2, d10, d12
++            vmlal.u8    q3, d11, d12
++
++        bne         1b
++
++        vpop        {d8-d12}
++
++        vrshrn.u16  d3, q1, #5
++        vrshrn.u16  d2, q0, #5
++        vrshrn.u16  d4, q2, #5
++        vrshrn.u16  d5, q3, #5
++        vst1.8      {q1-q2}, [r0 :128]
++
++        bx          lr
++
++endfunc
++
++
++@ ff_hevc_rpi_pred_planar_c_16_neon_10
++@       uint8_t *_src,          [r0]
++@       const uint8_t *_top,    [r1]
++@       const uint8_t *_left,   [r2]
++@       ptrdiff_t stride)       [r3]
++
++function ff_hevc_rpi_pred_planar_c_16_neon_10, export=1
++
++        @ Load from bytes & expand later - at the very least this uses less
++        @ memory than having a short table
++        vld1.16     {q0-q1}, [r1 :128]!  @ Top (left)
++        adr         ip, nbx2_15_0_1_16 + 32
++        vpush       {q4-q7}
++        vld1.16     {q2-q3}, [r1 :128]!  @ Top (centre)
++        add         r2, #64
++        vld1.8      {q14-q15}, [ip :128] @ {1,1,2,2,3,3...16,16}
++T       lsl         r3, #2
++        vld1.32     {d8[],d9[]}, [r1]    @ Top (right)
++        sub         ip, #32
++        vmovl.u8    q12, d28
++        mov         r1, #16
++        vmovl.u8    q13, d29
++        vld1.8      {q6-q7}, [ip :128]   @ {15,15,14,14,13,13...0,0}
++        vmovl.u8    q14, d30
++        vmovl.u8    q15, d31
++        vld1.32     {d10[],d11[]}, [r2]  @ Left (lower)
++        sub         r2, #64
++        vshl.i16    q8, q0, #4
++        vshl.i16    q9, q1, #4
++        vshl.i16    q10, q2, #4
++        vshl.i16    q11, q3, #4
++        vmla.i16    q8, q12, q4
++        vsub.i16    q0, q5, q0
++        vmla.i16    q9, q13, q4
++        vpush       {q0}
++        vsub.i16    q1, q5, q1
++        vmla.i16    q10, q14, q4
++        vsub.i16    q2, q5, q2
++        vmla.i16    q11, q15, q4         @ Acc set up
++        vsub.i16    q3, q5, q3           @ Add set up
++        vadd.i16    q8, q0
++        vadd.i16    q9, q1
++        vadd.i16    q10, q2
++        vadd.i16    q11, q3
++        vmovl.u8    q4, d12
++        vmovl.u8    q5, d13
++        vmovl.u8    q6, d14
++        vmovl.u8    q7, d15
++
++@ u16 31..0    [4]  q4-q7
++@ u16 left[y]  [4]  [r2]
++@ u16 acc      [4]  q8-q11 = (x+1)*p[nTbS][-1] + 32*p[x][-1] initially
++@ u16 add      [4]  q0-q3  = p[-1][nTbs] - p[x][-1]
++
++        vadd.i16    q12, q8, q0
++A       sub         r0, r0, r3, lsl #2
++T       sub         r0, r3
++1:
++        vld1.32     {d0[],d1[]}, [r2]!
++A       add         r0, r0, r3, lsl #2
++T       add         r0, r3
++        vadd.i16    q13, q9, q1
++        subs        r1, #2
++        vadd.i16    q14, q10, q2
++        vadd.i16    q15, q11, q3
++        vmla.i16    q8, q4, q0
++        vmla.i16    q9, q5, q0
++        vmla.i16    q10, q6, q0
++        vmla.i16    q11, q7, q0
++        vld1.16     {q0}, [sp]
++        vrshr.u16   q8, #5
++        vrshr.u16   q9, #5
++        vrshr.u16   q10, #5
++        vrshr.u16   q11, #5
++        vstm        r0, {q8-q11}
++        vadd.i16    q8, q12, q0
++A       add         r0, r0, r3, lsl #2
++T       add         r0, r3
++        vld1.32     {d0[],d1[]}, [r2]!
++        vadd.i16    q9, q13, q1
++        vadd.i16    q10, q14, q2
++        vadd.i16    q11, q15, q3
++        vmla.i16    q12, q4, q0
++        vmla.i16    q13, q5, q0
++        vmla.i16    q14, q6, q0
++        vmla.i16    q15, q7, q0
++        vld1.16     {q0}, [sp]
++        vrshr.u16   q12, #5
++        vrshr.u16   q13, #5
++        vrshr.u16   q14, #5
++        vrshr.u16   q15, #5
++        vstm        r0, {q12-q15}
++        vadd.i16    q12, q8, q0
++        bne         1b
++
++        vpop        {q3-q7}
++        bx          lr
++
++endfunc
+--- a/libavcodec/avcodec.h
++++ b/libavcodec/avcodec.h
+@@ -2567,6 +2567,17 @@ typedef struct AVHWAccel {
+      * that avctx->hwaccel_priv_data is invalid.
+      */
+     int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
++
++    /**
++     * Called if parsing fails
++     *
++     * An error has occured, end_frame will not be called
++     * start_frame & decode_slice may or may not have been called
++     * Optional
++     *
++     * @param avctx the codec context
++     */
++    void (*abort_frame)(AVCodecContext *avctx);
+ } AVHWAccel;
+
+ /**
+--- a/libavcodec/cabac.h
++++ b/libavcodec/cabac.h
+@@ -43,7 +43,14 @@ extern const uint8_t ff_h264_cabac_table
+ typedef struct CABACContext{
+     int low;
+     int range;
+-    int outstanding_count;
++    union
++    {
++        int outstanding_count;
++        struct {
++            uint16_t bits;
++            uint16_t range;
++        } by22;
++    };
+     const uint8_t *bytestream_start;
+     const uint8_t *bytestream;
+     const uint8_t *bytestream_end;
+--- a/libavcodec/codec.h
++++ b/libavcodec/codec.h
+@@ -350,6 +350,17 @@ const AVCodec *av_codec_iterate(void **o
+ AVCodec *avcodec_find_decoder(enum AVCodecID id);
+
+ /**
++ * Find a registered decoder with a matching codec ID and pix_fmt.
++ * A decoder will pix_fmt set to NULL will match any fmt.
++ * A fmt of AV_PIX_FMT_NONE will only match a decoder will px_fmt NULL.
++ *
++ * @param id AVCodecID of the requested decoder
++ * @param fmt AVPixelForma that msut be supported by decoder
++ * @return A decoder if one was found, NULL otherwise.
++ */
++AVCodec *avcodec_find_decoder_by_id_and_fmt(enum AVCodecID id, enum AVPixelFormat fmt);
++
++/**
+  * Find a registered decoder with the specified name.
+  *
+  * @param name name of the requested decoder
+--- /dev/null
++++ b/libavcodec/hevc-ctrls-v1.h
+@@ -0,0 +1,229 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are the HEVC state controls for use with stateless HEVC
++ * codec drivers.
++ *
++ * It turns out that these structs are not stable yet and will undergo
++ * more changes. So keep them private until they are stable and ready to
++ * become part of the official public API.
++ */
++
++#ifndef _HEVC_CTRLS_H_
++#define _HEVC_CTRLS_H_
++
++#include <linux/videodev2.h>
++
++/* The pixel format isn't stable at the moment and will likely be renamed. */
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_MPEG_BASE + 1008)
++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_MPEG_BASE + 1009)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_MPEG_BASE + 1010)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX	(V4L2_CID_MPEG_BASE + 1011)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_MPEG_BASE + 1015)
++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_MPEG_BASE + 1016)
++
++/* enum v4l2_ctrl_type type values */
++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
++
++enum v4l2_mpeg_video_hevc_decode_mode {
++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
++};
++
++enum v4l2_mpeg_video_hevc_start_code {
++	V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
++	V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
++};
++
++#define V4L2_HEVC_SLICE_TYPE_B	0
++#define V4L2_HEVC_SLICE_TYPE_P	1
++#define V4L2_HEVC_SLICE_TYPE_I	2
++
++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
++
++/* The controls are not stable at the moment and will likely be reworked. */
++struct v4l2_ctrl_hevc_sps {
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++	__u16	pic_width_in_luma_samples;
++	__u16	pic_height_in_luma_samples;
++	__u8	bit_depth_luma_minus8;
++	__u8	bit_depth_chroma_minus8;
++	__u8	log2_max_pic_order_cnt_lsb_minus4;
++	__u8	sps_max_dec_pic_buffering_minus1;
++	__u8	sps_max_num_reorder_pics;
++	__u8	sps_max_latency_increase_plus1;
++	__u8	log2_min_luma_coding_block_size_minus3;
++	__u8	log2_diff_max_min_luma_coding_block_size;
++	__u8	log2_min_luma_transform_block_size_minus2;
++	__u8	log2_diff_max_min_luma_transform_block_size;
++	__u8	max_transform_hierarchy_depth_inter;
++	__u8	max_transform_hierarchy_depth_intra;
++	__u8	pcm_sample_bit_depth_luma_minus1;
++	__u8	pcm_sample_bit_depth_chroma_minus1;
++	__u8	log2_min_pcm_luma_coding_block_size_minus3;
++	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
++	__u8	num_short_term_ref_pic_sets;
++	__u8	num_long_term_ref_pics_sps;
++	__u8	chroma_format_idc;
++	__u8	sps_max_sub_layers_minus1;
++
++	__u64	flags;
++};
++
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT		(1ULL << 0)
++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
++
++struct v4l2_ctrl_hevc_pps {
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++	__u8	num_extra_slice_header_bits;
++	__s8	init_qp_minus26;
++	__u8	diff_cu_qp_delta_depth;
++	__s8	pps_cb_qp_offset;
++	__s8	pps_cr_qp_offset;
++	__u8	num_tile_columns_minus1;
++	__u8	num_tile_rows_minus1;
++	__u8	column_width_minus1[20];
++	__u8	row_height_minus1[22];
++	__s8	pps_beta_offset_div2;
++	__s8	pps_tc_offset_div2;
++	__u8	log2_parallel_merge_level_minus2;
++
++	__u8	padding[4];
++	__u64	flags;
++};
++
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE	0x01
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER	0x02
++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR		0x03
++
++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
++
++struct v4l2_hevc_dpb_entry {
++	__u64	timestamp;
++	__u8	rps;
++	__u8	field_pic;
++	__u16	pic_order_cnt[2];
++	__u8	padding[2];
++};
++
++struct v4l2_hevc_pred_weight_table {
++	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++	__u8	padding[6];
++
++	__u8	luma_log2_weight_denom;
++	__s8	delta_chroma_log2_weight_denom;
++};
++
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT		(1ULL << 9)
++
++struct v4l2_ctrl_hevc_slice_params {
++	__u32	bit_size;
++	__u32	data_bit_offset;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	__u32	slice_segment_addr;
++	__u32	num_entry_point_offsets;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++	__u8	nal_unit_type;
++	__u8	nuh_temporal_id_plus1;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	__u8	slice_type;
++	__u8	colour_plane_id;
++	__u16	slice_pic_order_cnt;
++	__u8	num_ref_idx_l0_active_minus1;
++	__u8	num_ref_idx_l1_active_minus1;
++	__u8	collocated_ref_idx;
++	__u8	five_minus_max_num_merge_cand;
++	__s8	slice_qp_delta;
++	__s8	slice_cb_qp_offset;
++	__s8	slice_cr_qp_offset;
++	__s8	slice_act_y_qp_offset;
++	__s8	slice_act_cb_qp_offset;
++	__s8	slice_act_cr_qp_offset;
++	__s8	slice_beta_offset_div2;
++	__s8	slice_tc_offset_div2;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++	__u8	pic_struct;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	__u8	num_active_dpb_entries;
++	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++	__u8	num_rps_poc_st_curr_before;
++	__u8	num_rps_poc_st_curr_after;
++	__u8	num_rps_poc_lt_curr;
++
++	__u8	padding;
++
++	__u32	entry_point_offset_minus1[256];
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	struct v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
++	struct v4l2_hevc_pred_weight_table pred_weight_table;
++
++	__u64	flags;
++};
++
++struct v4l2_ctrl_hevc_scaling_matrix {
++	__u8	scaling_list_4x4[6][16];
++	__u8	scaling_list_8x8[6][64];
++	__u8	scaling_list_16x16[6][64];
++	__u8	scaling_list_32x32[2][64];
++	__u8	scaling_list_dc_coef_16x16[6];
++	__u8	scaling_list_dc_coef_32x32[2];
++};
++
++#endif
+--- /dev/null
++++ b/libavcodec/hevc-ctrls-v2.h
+@@ -0,0 +1,257 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/*
++ * These are the HEVC state controls for use with stateless HEVC
++ * codec drivers.
++ *
++ * It turns out that these structs are not stable yet and will undergo
++ * more changes. So keep them private until they are stable and ready to
++ * become part of the official public API.
++ */
++
++#ifndef _HEVC_CTRLS_H_
++#define _HEVC_CTRLS_H_
++
++#include <linux/videodev2.h>
++
++/* The pixel format isn't stable at the moment and will likely be renamed. */
++#define V4L2_PIX_FMT_HEVC_SLICE v4l2_fourcc('S', '2', '6', '5') /* HEVC parsed slices */
++
++#define V4L2_CID_MPEG_VIDEO_HEVC_SPS		(V4L2_CID_CODEC_BASE + 1008)
++#define V4L2_CID_MPEG_VIDEO_HEVC_PPS		(V4L2_CID_CODEC_BASE + 1009)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS	(V4L2_CID_CODEC_BASE + 1010)
++#define V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX	(V4L2_CID_CODEC_BASE + 1011)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS	(V4L2_CID_CODEC_BASE + 1012)
++#define V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE	(V4L2_CID_CODEC_BASE + 1015)
++#define V4L2_CID_MPEG_VIDEO_HEVC_START_CODE	(V4L2_CID_CODEC_BASE + 1016)
++
++/* enum v4l2_ctrl_type type values */
++#define V4L2_CTRL_TYPE_HEVC_SPS 0x0120
++#define V4L2_CTRL_TYPE_HEVC_PPS 0x0121
++#define V4L2_CTRL_TYPE_HEVC_SLICE_PARAMS 0x0122
++#define V4L2_CTRL_TYPE_HEVC_SCALING_MATRIX 0x0123
++#define V4L2_CTRL_TYPE_HEVC_DECODE_PARAMS 0x0124
++
++enum v4l2_mpeg_video_hevc_decode_mode {
++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED,
++	V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED,
++};
++
++enum v4l2_mpeg_video_hevc_start_code {
++	V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE,
++	V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B,
++};
++
++#define V4L2_HEVC_SLICE_TYPE_B	0
++#define V4L2_HEVC_SLICE_TYPE_P	1
++#define V4L2_HEVC_SLICE_TYPE_I	2
++
++#define V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE		(1ULL << 0)
++#define V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED			(1ULL << 1)
++#define V4L2_HEVC_SPS_FLAG_AMP_ENABLED				(1ULL << 2)
++#define V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET		(1ULL << 3)
++#define V4L2_HEVC_SPS_FLAG_PCM_ENABLED				(1ULL << 4)
++#define V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED		(1ULL << 5)
++#define V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT		(1ULL << 6)
++#define V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED		(1ULL << 7)
++#define V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED	(1ULL << 8)
++
++/* The controls are not stable at the moment and will likely be reworked. */
++struct v4l2_ctrl_hevc_sps {
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++	__u16	pic_width_in_luma_samples;
++	__u16	pic_height_in_luma_samples;
++	__u8	bit_depth_luma_minus8;
++	__u8	bit_depth_chroma_minus8;
++	__u8	log2_max_pic_order_cnt_lsb_minus4;
++	__u8	sps_max_dec_pic_buffering_minus1;
++	__u8	sps_max_num_reorder_pics;
++	__u8	sps_max_latency_increase_plus1;
++	__u8	log2_min_luma_coding_block_size_minus3;
++	__u8	log2_diff_max_min_luma_coding_block_size;
++	__u8	log2_min_luma_transform_block_size_minus2;
++	__u8	log2_diff_max_min_luma_transform_block_size;
++	__u8	max_transform_hierarchy_depth_inter;
++	__u8	max_transform_hierarchy_depth_intra;
++	__u8	pcm_sample_bit_depth_luma_minus1;
++	__u8	pcm_sample_bit_depth_chroma_minus1;
++	__u8	log2_min_pcm_luma_coding_block_size_minus3;
++	__u8	log2_diff_max_min_pcm_luma_coding_block_size;
++	__u8	num_short_term_ref_pic_sets;
++	__u8	num_long_term_ref_pics_sps;
++	__u8	chroma_format_idc;
++	__u8	sps_max_sub_layers_minus1;
++
++	__u64	flags;
++};
++
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED	(1ULL << 0)
++#define V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT			(1ULL << 1)
++#define V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED		(1ULL << 2)
++#define V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT			(1ULL << 3)
++#define V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED		(1ULL << 4)
++#define V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED		(1ULL << 5)
++#define V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED			(1ULL << 6)
++#define V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT	(1ULL << 7)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED			(1ULL << 8)
++#define V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED			(1ULL << 9)
++#define V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED		(1ULL << 10)
++#define V4L2_HEVC_PPS_FLAG_TILES_ENABLED			(1ULL << 11)
++#define V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED		(1ULL << 12)
++#define V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED	(1ULL << 13)
++#define V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 14)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED	(1ULL << 15)
++#define V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER	(1ULL << 16)
++#define V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT		(1ULL << 17)
++#define V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT (1ULL << 18)
++#define V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_CONTROL_PRESENT	(1ULL << 19)
++#define V4L2_HEVC_PPS_FLAG_UNIFORM_SPACING			(1ULL << 20)
++
++struct v4l2_ctrl_hevc_pps {
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++	__u8	num_extra_slice_header_bits;
++	__u8	num_ref_idx_l0_default_active_minus1;
++	__u8	num_ref_idx_l1_default_active_minus1;
++	__s8	init_qp_minus26;
++	__u8	diff_cu_qp_delta_depth;
++	__s8	pps_cb_qp_offset;
++	__s8	pps_cr_qp_offset;
++	__u8	num_tile_columns_minus1;
++	__u8	num_tile_rows_minus1;
++	__u8	column_width_minus1[20];
++	__u8	row_height_minus1[22];
++	__s8	pps_beta_offset_div2;
++	__s8	pps_tc_offset_div2;
++	__u8	log2_parallel_merge_level_minus2;
++
++	__u8	padding[4];
++	__u64	flags;
++};
++
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE	0x01
++#define V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER	0x02
++#define V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR		0x03
++
++#define V4L2_HEVC_DPB_ENTRIES_NUM_MAX		16
++
++struct v4l2_hevc_dpb_entry {
++	__u64	timestamp;
++	__u8	rps;
++	__u8	field_pic;
++	__u16	pic_order_cnt[2];
++	__u8	padding[2];
++};
++
++struct v4l2_hevc_pred_weight_table {
++	__s8	delta_luma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	luma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	delta_chroma_weight_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++	__s8	chroma_offset_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++	__s8	delta_luma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	luma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__s8	delta_chroma_weight_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++	__s8	chroma_offset_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX][2];
++
++	__u8	padding[6];
++
++	__u8	luma_log2_weight_denom;
++	__s8	delta_chroma_log2_weight_denom;
++};
++
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA		(1ULL << 0)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA		(1ULL << 1)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED	(1ULL << 2)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO			(1ULL << 3)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT			(1ULL << 4)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0		(1ULL << 5)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_USE_INTEGER_MV		(1ULL << 6)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED (1ULL << 7)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED (1ULL << 8)
++#define V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT	(1ULL << 9)
++
++struct v4l2_ctrl_hevc_slice_params {
++	__u32	bit_size;
++	__u32	data_bit_offset;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	__u32	slice_segment_addr;
++	__u32	num_entry_point_offsets;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++	__u8	nal_unit_type;
++	__u8	nuh_temporal_id_plus1;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	__u8	slice_type;
++	__u8	colour_plane_id;
++	__u16	slice_pic_order_cnt;
++	__u8	num_ref_idx_l0_active_minus1;
++	__u8	num_ref_idx_l1_active_minus1;
++	__u8	collocated_ref_idx;
++	__u8	five_minus_max_num_merge_cand;
++	__s8	slice_qp_delta;
++	__s8	slice_cb_qp_offset;
++	__s8	slice_cr_qp_offset;
++	__s8	slice_act_y_qp_offset;
++	__s8	slice_act_cb_qp_offset;
++	__s8	slice_act_cr_qp_offset;
++	__s8	slice_beta_offset_div2;
++	__s8	slice_tc_offset_div2;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++	__u8	pic_struct;
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++	__u8	ref_idx_l0[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	ref_idx_l1[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++
++	__u8	padding[5];
++
++	__u32	entry_point_offset_minus1[256];
++
++	/* ISO/IEC 23008-2, ITU-T Rec. H.265: Weighted prediction parameter */
++	struct v4l2_hevc_pred_weight_table pred_weight_table;
++
++	__u64	flags;
++};
++
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC		0x1
++#define V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC		0x2
++#define V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR  0x4
++
++struct v4l2_ctrl_hevc_decode_params {
++	__s32	pic_order_cnt_val;
++	__u8	num_active_dpb_entries;
++	struct	v4l2_hevc_dpb_entry dpb[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	num_poc_st_curr_before;
++	__u8	num_poc_st_curr_after;
++	__u8	num_poc_lt_curr;
++	__u8	poc_st_curr_before[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	poc_st_curr_after[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u8	poc_lt_curr[V4L2_HEVC_DPB_ENTRIES_NUM_MAX];
++	__u64	flags;
++};
++
++/*  MPEG-class control IDs specific to the Hantro driver as defined by V4L2 */
++#define V4L2_CID_CODEC_HANTRO_BASE				(V4L2_CTRL_CLASS_CODEC | 0x1200)
++/*
++ * V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP -
++ * the number of data (in bits) to skip in the
++ * slice segment header.
++ * If non-IDR, the bits to be skipped go from syntax element "pic_output_flag"
++ * to before syntax element "slice_temporal_mvp_enabled_flag".
++ * If IDR, the skipped bits are just "pic_output_flag"
++ * (separate_colour_plane_flag is not supported).
++ */
++#define V4L2_CID_HANTRO_HEVC_SLICE_HEADER_SKIP	(V4L2_CID_CODEC_HANTRO_BASE + 0)
++
++struct v4l2_ctrl_hevc_scaling_matrix {
++	__u8	scaling_list_4x4[6][16];
++	__u8	scaling_list_8x8[6][64];
++	__u8	scaling_list_16x16[6][64];
++	__u8	scaling_list_32x32[2][64];
++	__u8	scaling_list_dc_coef_16x16[6];
++	__u8	scaling_list_dc_coef_32x32[2];
++};
++
++#endif
+--- a/libavcodec/hevc_parser.c
++++ b/libavcodec/hevc_parser.c
+@@ -98,6 +98,19 @@ static int hevc_parse_slice_header(AVCod
+     avctx->profile  = ps->sps->ptl.general_ptl.profile_idc;
+     avctx->level    = ps->sps->ptl.general_ptl.level_idc;
+
++    if (ps->sps->chroma_format_idc == 1) {
++        avctx->chroma_sample_location = ps->sps->vui.chroma_loc_info_present_flag ?
++            ps->sps->vui.chroma_sample_loc_type_top_field + 1 :
++            AVCHROMA_LOC_LEFT;
++    }
++    else if (ps->sps->chroma_format_idc == 2 ||
++             ps->sps->chroma_format_idc == 3) {
++        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
++    }
++    else {
++        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
++    }
++
+     if (ps->vps->vps_timing_info_present_flag) {
+         num = ps->vps->vps_num_units_in_tick;
+         den = ps->vps->vps_time_scale;
+--- a/libavcodec/hevcdec.c
++++ b/libavcodec/hevcdec.c
+@@ -332,6 +332,19 @@ static void export_stream_params(HEVCCon
+
+     ff_set_sar(avctx, sps->vui.sar);
+
++    if (sps->chroma_format_idc == 1) {
++        avctx->chroma_sample_location = sps->vui.chroma_loc_info_present_flag ?
++            sps->vui.chroma_sample_loc_type_top_field + 1 :
++            AVCHROMA_LOC_LEFT;
++    }
++    else if (sps->chroma_format_idc == 2 ||
++             sps->chroma_format_idc == 3) {
++        avctx->chroma_sample_location = AVCHROMA_LOC_TOPLEFT;;
++    }
++    else {
++        avctx->chroma_sample_location = AVCHROMA_LOC_UNSPECIFIED;
++    }
++
+     if (sps->vui.video_signal_type_present_flag)
+         avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
+                                                             : AVCOL_RANGE_MPEG;
+@@ -372,14 +385,20 @@ static enum AVPixelFormat get_format(HEV
+ #define HWACCEL_MAX (CONFIG_HEVC_DXVA2_HWACCEL + \
+                      CONFIG_HEVC_D3D11VA_HWACCEL * 2 + \
+                      CONFIG_HEVC_NVDEC_HWACCEL + \
++                     CONFIG_HEVC_V4L2REQUEST_HWACCEL + \
+                      CONFIG_HEVC_VAAPI_HWACCEL + \
+                      CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL + \
++                     CONFIG_HEVC_RPI4_8_HWACCEL + \
++                     CONFIG_HEVC_RPI4_10_HWACCEL + \
+                      CONFIG_HEVC_VDPAU_HWACCEL)
+     enum AVPixelFormat pix_fmts[HWACCEL_MAX + 2], *fmt = pix_fmts;
+
+     switch (sps->pix_fmt) {
+     case AV_PIX_FMT_YUV420P:
+     case AV_PIX_FMT_YUVJ420P:
++#if CONFIG_HEVC_RPI4_8_HWACCEL
++        *fmt++ = AV_PIX_FMT_RPI4_8;
++#endif
+ #if CONFIG_HEVC_DXVA2_HWACCEL
+         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
+ #endif
+@@ -399,8 +418,14 @@ static enum AVPixelFormat get_format(HEV
+ #if CONFIG_HEVC_VIDEOTOOLBOX_HWACCEL
+         *fmt++ = AV_PIX_FMT_VIDEOTOOLBOX;
+ #endif
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++        *fmt++ = AV_PIX_FMT_DRM_PRIME;
++#endif
+         break;
+     case AV_PIX_FMT_YUV420P10:
++#if CONFIG_HEVC_RPI4_10_HWACCEL
++        *fmt++ = AV_PIX_FMT_RPI4_10;
++#endif
+ #if CONFIG_HEVC_DXVA2_HWACCEL
+         *fmt++ = AV_PIX_FMT_DXVA2_VLD;
+ #endif
+@@ -417,6 +442,9 @@ static enum AVPixelFormat get_format(HEV
+ #if CONFIG_HEVC_NVDEC_HWACCEL
+         *fmt++ = AV_PIX_FMT_CUDA;
+ #endif
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++        *fmt++ = AV_PIX_FMT_DRM_PRIME;
++#endif
+         break;
+     case AV_PIX_FMT_YUV444P:
+ #if CONFIG_HEVC_VDPAU_HWACCEL
+@@ -3230,7 +3258,14 @@ static int hevc_decode_frame(AVCodecCont
+     s->ref = NULL;
+     ret    = decode_nal_units(s, avpkt->data, avpkt->size);
+     if (ret < 0)
++    {
++        // Ensure that hwaccel knows this frame is over
++        if (s->avctx->hwaccel && s->avctx->hwaccel->abort_frame) {
++            s->avctx->hwaccel->abort_frame(s->avctx);
++        }
++
+         return ret;
++    }
+
+     if (avctx->hwaccel) {
+         if (s->ref && (ret = avctx->hwaccel->end_frame(avctx)) < 0) {
+@@ -3585,6 +3620,15 @@ AVCodec ff_hevc_decoder = {
+ #if CONFIG_HEVC_NVDEC_HWACCEL
+                                HWACCEL_NVDEC(hevc),
+ #endif
++#if CONFIG_HEVC_RPI4_8_HWACCEL
++                               HWACCEL_RPI4_8(hevc),
++#endif
++#if CONFIG_HEVC_RPI4_10_HWACCEL
++                               HWACCEL_RPI4_10(hevc),
++#endif
++#if CONFIG_HEVC_V4L2REQUEST_HWACCEL
++                               HWACCEL_V4L2REQUEST(hevc),
++#endif
+ #if CONFIG_HEVC_VAAPI_HWACCEL
+                                HWACCEL_VAAPI(hevc),
+ #endif
+--- a/libavcodec/hwaccels.h
++++ b/libavcodec/hwaccels.h
+@@ -34,6 +34,9 @@ extern const AVHWAccel ff_hevc_d3d11va_h
+ extern const AVHWAccel ff_hevc_d3d11va2_hwaccel;
+ extern const AVHWAccel ff_hevc_dxva2_hwaccel;
+ extern const AVHWAccel ff_hevc_nvdec_hwaccel;
++extern const AVHWAccel ff_hevc_rpi4_8_hwaccel;
++extern const AVHWAccel ff_hevc_rpi4_10_hwaccel;
++extern const AVHWAccel ff_hevc_v4l2request_hwaccel;
+ extern const AVHWAccel ff_hevc_vaapi_hwaccel;
+ extern const AVHWAccel ff_hevc_vdpau_hwaccel;
+ extern const AVHWAccel ff_hevc_videotoolbox_hwaccel;
+--- a/libavcodec/hwconfig.h
++++ b/libavcodec/hwconfig.h
+@@ -24,6 +24,7 @@
+
+
+ #define HWACCEL_CAP_ASYNC_SAFE      (1 << 0)
++#define HWACCEL_CAP_MT_SAFE         (1 << 1)
+
+
+ typedef struct AVCodecHWConfigInternal {
+@@ -70,6 +71,12 @@ typedef struct AVCodecHWConfigInternal {
+     HW_CONFIG_HWACCEL(1, 1, 0, D3D11,        D3D11VA,      ff_ ## codec ## _d3d11va2_hwaccel)
+ #define HWACCEL_NVDEC(codec) \
+     HW_CONFIG_HWACCEL(1, 1, 0, CUDA,         CUDA,         ff_ ## codec ## _nvdec_hwaccel)
++#define HWACCEL_RPI4_8(codec) \
++    HW_CONFIG_HWACCEL(0, 0, 1, RPI4_8,       NONE,         ff_ ## codec ## _rpi4_8_hwaccel)
++#define HWACCEL_RPI4_10(codec) \
++    HW_CONFIG_HWACCEL(0, 0, 1, RPI4_10,      NONE,         ff_ ## codec ## _rpi4_10_hwaccel)
++#define HWACCEL_V4L2REQUEST(codec) \
++    HW_CONFIG_HWACCEL(1, 0, 0, DRM_PRIME,    DRM,          ff_ ## codec ## _v4l2request_hwaccel)
+ #define HWACCEL_VAAPI(codec) \
+     HW_CONFIG_HWACCEL(1, 1, 1, VAAPI,        VAAPI,        ff_ ## codec ## _vaapi_hwaccel)
+ #define HWACCEL_VDPAU(codec) \
+--- a/libavcodec/mmaldec.c
++++ b/libavcodec/mmaldec.c
+@@ -24,6 +24,9 @@
+  * MMAL Video Decoder
+  */
+
++#pragma GCC diagnostic push
++// Many many redundant decls in the header files
++#pragma GCC diagnostic ignored "-Wredundant-decls"
+ #include <bcm_host.h>
+ #include <interface/mmal/mmal.h>
+ #include <interface/mmal/mmal_parameters_video.h>
+@@ -31,6 +34,7 @@
+ #include <interface/mmal/util/mmal_util_params.h>
+ #include <interface/mmal/util/mmal_default_components.h>
+ #include <interface/mmal/vc/mmal_vc_api.h>
++#pragma GCC diagnostic pop
+ #include <stdatomic.h>
+
+ #include "avcodec.h"
+--- a/libavcodec/pthread_frame.c
++++ b/libavcodec/pthread_frame.c
+@@ -191,7 +191,8 @@ static attribute_align_arg void *frame_w
+
+         /* if the previous thread uses hwaccel then we take the lock to ensure
+          * the threads don't run concurrently */
+-        if (avctx->hwaccel) {
++        if (avctx->hwaccel &&
++            !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE)) {
+             pthread_mutex_lock(&p->parent->hwaccel_mutex);
+             p->hwaccel_serializing = 1;
+         }
+@@ -614,7 +615,9 @@ void ff_thread_finish_setup(AVCodecConte
+
+     if (!(avctx->active_thread_type&FF_THREAD_FRAME)) return;
+
+-    if (avctx->hwaccel && !p->hwaccel_serializing) {
++    if (avctx->hwaccel &&
++        !(avctx->hwaccel->caps_internal & HWACCEL_CAP_MT_SAFE) &&
++        !p->hwaccel_serializing) {
+         pthread_mutex_lock(&p->parent->hwaccel_mutex);
+         p->hwaccel_serializing = 1;
+     }
+--- a/libavcodec/raw.c
++++ b/libavcodec/raw.c
+@@ -293,6 +293,12 @@ const PixelFormatTag ff_raw_pix_fmt_tags
+     { AV_PIX_FMT_RGB565LE,MKTAG( 3 ,  0 ,  0 ,  0 ) }, /* flipped RGB565LE */
+     { AV_PIX_FMT_YUV444P, MKTAG('Y', 'V', '2', '4') }, /* YUV444P, swapped UV */
+
++    /* RPI (Might as well define for everything) */
++    { AV_PIX_FMT_SAND128,     MKTAG('S', 'A', 'N', 'D') },
++    { AV_PIX_FMT_RPI4_8,      MKTAG('S', 'A', 'N', 'D') },
++    { AV_PIX_FMT_SAND64_10,   MKTAG('S', 'N', 'D', 'A') },
++    { AV_PIX_FMT_RPI4_10,     MKTAG('S', 'N', 'D', 'B') },
++
+     { AV_PIX_FMT_NONE, 0 },
+ };
+
+--- a/libavcodec/rawenc.c
++++ b/libavcodec/rawenc.c
+@@ -24,6 +24,7 @@
+  * Raw Video Encoder
+  */
+
++#include "config.h"
+ #include "avcodec.h"
+ #include "raw.h"
+ #include "internal.h"
+@@ -31,6 +32,10 @@
+ #include "libavutil/intreadwrite.h"
+ #include "libavutil/imgutils.h"
+ #include "libavutil/internal.h"
++#include "libavutil/avassert.h"
++#if CONFIG_SAND
++#include "libavutil/rpi_sand_fns.h"
++#endif
+
+ static av_cold int raw_encode_init(AVCodecContext *avctx)
+ {
+@@ -49,22 +54,114 @@ FF_ENABLE_DEPRECATION_WARNINGS
+     return 0;
+ }
+
++#if CONFIG_SAND
++static int raw_sand8_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++                      const AVFrame *frame)
++{
++    const int width = av_frame_cropped_width(frame);
++    const int height = av_frame_cropped_height(frame);
++    const int x0 = frame->crop_left;
++    const int y0 = frame->crop_top;
++    const int size = width * height * 3 / 2;
++    uint8_t * dst;
++    int ret;
++
++    if ((ret = ff_alloc_packet2(avctx, pkt, size, size)) < 0)
++        return ret;
++
++    dst = pkt->data;
++
++    av_rpi_sand_to_planar_y8(dst, width, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
++    dst += width * height;
++    av_rpi_sand_to_planar_c8(dst, width / 2, dst + width * height / 4, width / 2,
++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0 / 2, y0 / 2, width / 2, height / 2);
++    return 0;
++}
++
++static int raw_sand16_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++                      const AVFrame *frame)
++{
++    const int width = av_frame_cropped_width(frame);
++    const int height = av_frame_cropped_height(frame);
++    const int x0 = frame->crop_left;
++    const int y0 = frame->crop_top;
++    const int size = width * height * 3;
++    uint8_t * dst;
++    int ret;
++
++    if ((ret = ff_alloc_packet2(avctx, pkt, size, size)) < 0)
++        return ret;
++
++    dst = pkt->data;
++
++    av_rpi_sand_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0 * 2, y0, width * 2, height);
++    dst += width * height * 2;
++    av_rpi_sand_to_planar_c16(dst, width, dst + width * height / 2, width,
++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0, y0 / 2, width, height / 2);
++    return 0;
++}
++
++static int raw_sand30_as_yuv420(AVCodecContext *avctx, AVPacket *pkt,
++                      const AVFrame *frame)
++{
++    const int width = av_frame_cropped_width(frame);
++    const int height = av_frame_cropped_height(frame);
++    const int x0 = frame->crop_left;
++    const int y0 = frame->crop_top;
++    const int size = width * height * 3;
++    uint8_t * dst;
++    int ret;
++
++    if ((ret = ff_alloc_packet2(avctx, pkt, size, size)) < 0)
++        return ret;
++
++    dst = pkt->data;
++
++    av_rpi_sand30_to_planar_y16(dst, width * 2, frame->data[0], frame->linesize[0], frame->linesize[3], x0, y0, width, height);
++    dst += width * height * 2;
++    av_rpi_sand30_to_planar_c16(dst, width, dst + width * height / 2, width,
++                          frame->data[1], frame->linesize[1], av_rpi_sand_frame_stride2(frame), x0/2, y0 / 2, width/2, height / 2);
++    return 0;
++}
++#endif
++
++
+ static int raw_encode(AVCodecContext *avctx, AVPacket *pkt,
+-                      const AVFrame *frame, int *got_packet)
++                      const AVFrame *src_frame, int *got_packet)
+ {
+-    int ret = av_image_get_buffer_size(frame->format,
+-                                       frame->width, frame->height, 1);
++    int ret;
++    AVFrame * frame = NULL;
+
+-    if (ret < 0)
++#if CONFIG_SAND
++    if (av_rpi_is_sand_frame(src_frame)) {
++        ret = av_rpi_is_sand8_frame(src_frame) ? raw_sand8_as_yuv420(avctx, pkt, src_frame) :
++            av_rpi_is_sand16_frame(src_frame) ? raw_sand16_as_yuv420(avctx, pkt, src_frame) :
++            av_rpi_is_sand30_frame(src_frame) ? raw_sand30_as_yuv420(avctx, pkt, src_frame) : -1;
++        *got_packet = (ret == 0);
+         return ret;
++    }
++#endif
++
++    if ((frame = av_frame_clone(src_frame)) == NULL) {
++        ret = AVERROR(ENOMEM);
++        goto fail;
++    }
++
++    if ((ret = av_frame_apply_cropping(frame, AV_FRAME_CROP_UNALIGNED)) < 0)
++        goto fail;
++
++    ret = av_image_get_buffer_size(frame->format,
++                                       frame->width, frame->height, 1);
++    if (ret < 0)
++        goto fail;
+
+     if ((ret = ff_alloc_packet2(avctx, pkt, ret, ret)) < 0)
+-        return ret;
++        goto fail;
+     if ((ret = av_image_copy_to_buffer(pkt->data, pkt->size,
+                                        (const uint8_t **)frame->data, frame->linesize,
+                                        frame->format,
+                                        frame->width, frame->height, 1)) < 0)
+-        return ret;
++        goto fail;
+
+     if(avctx->codec_tag == AV_RL32("yuv2") && ret > 0 &&
+        frame->format   == AV_PIX_FMT_YUYV422) {
+@@ -81,8 +178,14 @@ static int raw_encode(AVCodecContext *av
+         }
+     }
+     pkt->flags |= AV_PKT_FLAG_KEY;
++    av_frame_free(&frame);
+     *got_packet = 1;
+     return 0;
++
++fail:
++    av_frame_free(&frame);
++    *got_packet = 0;
++    return ret;
+ }
+
+ AVCodec ff_rawvideo_encoder = {
+--- /dev/null
++++ b/libavcodec/rpi_hevc_cabac.c
+@@ -0,0 +1,2257 @@
++/*
++ * HEVC CABAC decoding
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2012 - 2013 Gildas Cocherel
++ * Copyright (C) 2012 - 2013 Gildas Cocherel
++ * Copyright (C) 2018 John Cox, Ben Avison, Peter de Rivaz for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#define UNCHECKED_BITSTREAM_READER 1
++
++#include "libavutil/attributes.h"
++#include "libavutil/common.h"
++
++#include "cabac_functions.h"
++#include "rpi_hevc_data.h"
++#include "hevc.h"
++#include "rpi_hevcdec.h"
++#include "rpi_hevc_cabac_fns.h"
++
++#include "libavutil/rpi_sand_fns.h"
++
++// BY22 is probably faster than simple bypass if the processor has
++// either a fast 32-bit divide or a fast 32x32->64[63:32] instruction
++// x86 has fast int divide
++// Arm doesn't have divide or general fast 64 bit, but does have the multiply
++// * Beware: ARCH_xxx isn't set if configure --disable-asm is used
++#define USE_BY22 (HAVE_FAST_64BIT || ARCH_ARM || ARCH_X86)
++// Use native divide if we have a fast one - otherwise use mpy 1/x
++// x86 has a fast integer divide - arm doesn't - unsure about other
++// architectures
++#define USE_BY22_DIV  ARCH_X86
++
++// Special case blocks with a single significant ceoff
++// Decreases the complexity of the code for a common case but increases the
++// code size.
++#define USE_N_END_1 1
++
++#if !USE_BY22_DIV
++// * 1/x @ 32 bits gets us 22 bits of accuracy
++#define CABAC_BY22_PEEK_BITS  22
++#else
++// A real 32-bit divide gets us another bit
++// If we have a 64 bit int & a unit time divider then we should get a lot
++// of bits (55)  but that is untested and it is unclear if it would give
++// us a large advantage
++#define CABAC_BY22_PEEK_BITS  23
++#endif
++
++#define CABAC_MAX_BIN 31
++
++
++#if USE_BY22 && !USE_BY22_DIV
++#define I(x) (uint32_t)((0x10000000000ULL / (uint64_t)(x)) + 1ULL)
++
++static const uint32_t cabac_by22_inv_range[256] = {
++                                                    0,      I(257), I(258), I(259),
++    I(260), I(261), I(262), I(263), I(264), I(265), I(266), I(267), I(268), I(269),
++    I(270), I(271), I(272), I(273), I(274), I(275), I(276), I(277), I(278), I(279),
++    I(280), I(281), I(282), I(283), I(284), I(285), I(286), I(287), I(288), I(289),
++    I(290), I(291), I(292), I(293), I(294), I(295), I(296), I(297), I(298), I(299),
++    I(300), I(301), I(302), I(303), I(304), I(305), I(306), I(307), I(308), I(309),
++    I(310), I(311), I(312), I(313), I(314), I(315), I(316), I(317), I(318), I(319),
++    I(320), I(321), I(322), I(323), I(324), I(325), I(326), I(327), I(328), I(329),
++    I(330), I(331), I(332), I(333), I(334), I(335), I(336), I(337), I(338), I(339),
++    I(340), I(341), I(342), I(343), I(344), I(345), I(346), I(347), I(348), I(349),
++    I(350), I(351), I(352), I(353), I(354), I(355), I(356), I(357), I(358), I(359),
++    I(360), I(361), I(362), I(363), I(364), I(365), I(366), I(367), I(368), I(369),
++    I(370), I(371), I(372), I(373), I(374), I(375), I(376), I(377), I(378), I(379),
++    I(380), I(381), I(382), I(383), I(384), I(385), I(386), I(387), I(388), I(389),
++    I(390), I(391), I(392), I(393), I(394), I(395), I(396), I(397), I(398), I(399),
++    I(400), I(401), I(402), I(403), I(404), I(405), I(406), I(407), I(408), I(409),
++    I(410), I(411), I(412), I(413), I(414), I(415), I(416), I(417), I(418), I(419),
++    I(420), I(421), I(422), I(423), I(424), I(425), I(426), I(427), I(428), I(429),
++    I(430), I(431), I(432), I(433), I(434), I(435), I(436), I(437), I(438), I(439),
++    I(440), I(441), I(442), I(443), I(444), I(445), I(446), I(447), I(448), I(449),
++    I(450), I(451), I(452), I(453), I(454), I(455), I(456), I(457), I(458), I(459),
++    I(460), I(461), I(462), I(463), I(464), I(465), I(466), I(467), I(468), I(469),
++    I(470), I(471), I(472), I(473), I(474), I(475), I(476), I(477), I(478), I(479),
++    I(480), I(481), I(482), I(483), I(484), I(485), I(486), I(487), I(488), I(489),
++    I(490), I(491), I(492), I(493), I(494), I(495), I(496), I(497), I(498), I(499),
++    I(500), I(501), I(502), I(503), I(504), I(505), I(506), I(507), I(508), I(509),
++    I(510), I(511)
++};
++#undef I
++#endif  // USE_BY22
++
++#if ARCH_ARM
++#include "arm/rpi_hevc_cabac.h"
++#endif
++
++/**
++ * number of bin by SyntaxElement.
++ */
++static const int8_t num_bins_in_se[] = {
++     1, // sao_merge_flag
++     1, // sao_type_idx
++     0, // sao_eo_class
++     0, // sao_band_position
++     0, // sao_offset_abs
++     0, // sao_offset_sign
++     0, // end_of_slice_flag
++     3, // split_coding_unit_flag
++     1, // cu_transquant_bypass_flag
++     3, // skip_flag
++     3, // cu_qp_delta
++     1, // pred_mode
++     4, // part_mode
++     0, // pcm_flag
++     1, // prev_intra_luma_pred_mode
++     0, // mpm_idx
++     0, // rem_intra_luma_pred_mode
++     2, // intra_chroma_pred_mode
++     1, // merge_flag
++     1, // merge_idx
++     5, // inter_pred_idc
++     2, // ref_idx_l0
++     2, // ref_idx_l1
++     2, // abs_mvd_greater0_flag
++     2, // abs_mvd_greater1_flag
++     0, // abs_mvd_minus2
++     0, // mvd_sign_flag
++     1, // mvp_lx_flag
++     1, // no_residual_data_flag
++     3, // split_transform_flag
++     2, // cbf_luma
++     4, // cbf_cb, cbf_cr
++     2, // transform_skip_flag[][]
++     2, // explicit_rdpcm_flag[][]
++     2, // explicit_rdpcm_dir_flag[][]
++    18, // last_significant_coeff_x_prefix
++    18, // last_significant_coeff_y_prefix
++     0, // last_significant_coeff_x_suffix
++     0, // last_significant_coeff_y_suffix
++     4, // significant_coeff_group_flag
++    44, // significant_coeff_flag
++    24, // coeff_abs_level_greater1_flag
++     6, // coeff_abs_level_greater2_flag
++     0, // coeff_abs_level_remaining
++     0, // coeff_sign_flag
++     8, // log2_res_scale_abs
++     2, // res_scale_sign_flag
++     1, // cu_chroma_qp_offset_flag
++     1, // cu_chroma_qp_offset_idx
++};
++
++/**
++ * Offset to ctxIdx 0 in init_values and states, indexed by SyntaxElement.
++ */
++static const int elem_offset[sizeof(num_bins_in_se)] = {
++    0, // sao_merge_flag
++    1, // sao_type_idx
++    2, // sao_eo_class
++    2, // sao_band_position
++    2, // sao_offset_abs
++    2, // sao_offset_sign
++    2, // end_of_slice_flag
++    2, // split_coding_unit_flag
++    5, // cu_transquant_bypass_flag
++    6, // skip_flag
++    9, // cu_qp_delta
++    12, // pred_mode
++    13, // part_mode
++    17, // pcm_flag
++    17, // prev_intra_luma_pred_mode
++    18, // mpm_idx
++    18, // rem_intra_luma_pred_mode
++    18, // intra_chroma_pred_mode
++    20, // merge_flag
++    21, // merge_idx
++    22, // inter_pred_idc
++    27, // ref_idx_l0
++    29, // ref_idx_l1
++    31, // abs_mvd_greater0_flag
++    33, // abs_mvd_greater1_flag
++    35, // abs_mvd_minus2
++    35, // mvd_sign_flag
++    35, // mvp_lx_flag
++    36, // no_residual_data_flag
++    37, // split_transform_flag
++    40, // cbf_luma
++    42, // cbf_cb, cbf_cr
++    46, // transform_skip_flag[][]
++    48, // explicit_rdpcm_flag[][]
++    50, // explicit_rdpcm_dir_flag[][]
++    52, // last_significant_coeff_x_prefix
++    70, // last_significant_coeff_y_prefix
++    88, // last_significant_coeff_x_suffix
++    88, // last_significant_coeff_y_suffix
++    88, // significant_coeff_group_flag
++    92, // significant_coeff_flag
++    136, // coeff_abs_level_greater1_flag
++    160, // coeff_abs_level_greater2_flag
++    166, // coeff_abs_level_remaining
++    166, // coeff_sign_flag
++    166, // log2_res_scale_abs
++    174, // res_scale_sign_flag
++    176, // cu_chroma_qp_offset_flag
++    177, // cu_chroma_qp_offset_idx
++};
++
++#define CNU 154
++/**
++ * Indexed by init_type
++ */
++static const uint8_t init_values[3][HEVC_CONTEXTS] = {
++    { // sao_merge_flag
++      153,
++      // sao_type_idx
++      200,
++      // split_coding_unit_flag
++      139, 141, 157,
++      // cu_transquant_bypass_flag
++      154,
++      // skip_flag
++      CNU, CNU, CNU,
++      // cu_qp_delta
++      154, 154, 154,
++      // pred_mode
++      CNU,
++      // part_mode
++      184, CNU, CNU, CNU,
++      // prev_intra_luma_pred_mode
++      184,
++      // intra_chroma_pred_mode
++      63, 139,
++      // merge_flag
++      CNU,
++      // merge_idx
++      CNU,
++      // inter_pred_idc
++      CNU, CNU, CNU, CNU, CNU,
++      // ref_idx_l0
++      CNU, CNU,
++      // ref_idx_l1
++      CNU, CNU,
++      // abs_mvd_greater1_flag
++      CNU, CNU,
++      // abs_mvd_greater1_flag
++      CNU, CNU,
++      // mvp_lx_flag
++      CNU,
++      // no_residual_data_flag
++      CNU,
++      // split_transform_flag
++      153, 138, 138,
++      // cbf_luma
++      111, 141,
++      // cbf_cb, cbf_cr
++      94, 138, 182, 154,
++      // transform_skip_flag
++      139, 139,
++      // explicit_rdpcm_flag
++      139, 139,
++      // explicit_rdpcm_dir_flag
++      139, 139,
++      // last_significant_coeff_x_prefix
++      110, 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
++       79, 108, 123,  63,
++      // last_significant_coeff_y_prefix
++      110, 110, 124, 125, 140, 153, 125, 127, 140, 109, 111, 143, 127, 111,
++       79, 108, 123,  63,
++      // significant_coeff_group_flag
++      91, 171, 134, 141,
++      // significant_coeff_flag
++      111, 111, 125, 110, 110,  94, 124, 108, 124, 107, 125, 141, 179, 153,
++      125, 107, 125, 141, 179, 153, 125, 107, 125, 141, 179, 153, 125, 140,
++      139, 182, 182, 152, 136, 152, 136, 153, 136, 139, 111, 136, 139, 111,
++      141, 111,
++      // coeff_abs_level_greater1_flag
++      140,  92, 137, 138, 140, 152, 138, 139, 153,  74, 149,  92, 139, 107,
++      122, 152, 140, 179, 166, 182, 140, 227, 122, 197,
++      // coeff_abs_level_greater2_flag
++      138, 153, 136, 167, 152, 152,
++      // log2_res_scale_abs
++      154, 154, 154, 154, 154, 154, 154, 154,
++      // res_scale_sign_flag
++      154, 154,
++      // cu_chroma_qp_offset_flag
++      154,
++      // cu_chroma_qp_offset_idx
++      154,
++    },
++    { // sao_merge_flag
++      153,
++      // sao_type_idx
++      185,
++      // split_coding_unit_flag
++      107, 139, 126,
++      // cu_transquant_bypass_flag
++      154,
++      // skip_flag
++      197, 185, 201,
++      // cu_qp_delta
++      154, 154, 154,
++      // pred_mode
++      149,
++      // part_mode
++      154, 139, 154, 154,
++      // prev_intra_luma_pred_mode
++      154,
++      // intra_chroma_pred_mode
++      152, 139,
++      // merge_flag
++      110,
++      // merge_idx
++      122,
++      // inter_pred_idc
++      95, 79, 63, 31, 31,
++      // ref_idx_l0
++      153, 153,
++      // ref_idx_l1
++      153, 153,
++      // abs_mvd_greater1_flag
++      140, 198,
++      // abs_mvd_greater1_flag
++      140, 198,
++      // mvp_lx_flag
++      168,
++      // no_residual_data_flag
++      79,
++      // split_transform_flag
++      124, 138, 94,
++      // cbf_luma
++      153, 111,
++      // cbf_cb, cbf_cr
++      149, 107, 167, 154,
++      // transform_skip_flag
++      139, 139,
++      // explicit_rdpcm_flag
++      139, 139,
++      // explicit_rdpcm_dir_flag
++      139, 139,
++      // last_significant_coeff_x_prefix
++      125, 110,  94, 110,  95,  79, 125, 111, 110,  78, 110, 111, 111,  95,
++       94, 108, 123, 108,
++      // last_significant_coeff_y_prefix
++      125, 110,  94, 110,  95,  79, 125, 111, 110,  78, 110, 111, 111,  95,
++       94, 108, 123, 108,
++      // significant_coeff_group_flag
++      121, 140, 61, 154,
++      // significant_coeff_flag
++      155, 154, 139, 153, 139, 123, 123,  63, 153, 166, 183, 140, 136, 153,
++      154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170,
++      153, 123, 123, 107, 121, 107, 121, 167, 151, 183, 140, 151, 183, 140,
++      140, 140,
++      // coeff_abs_level_greater1_flag
++      154, 196, 196, 167, 154, 152, 167, 182, 182, 134, 149, 136, 153, 121,
++      136, 137, 169, 194, 166, 167, 154, 167, 137, 182,
++      // coeff_abs_level_greater2_flag
++      107, 167, 91, 122, 107, 167,
++      // log2_res_scale_abs
++      154, 154, 154, 154, 154, 154, 154, 154,
++      // res_scale_sign_flag
++      154, 154,
++      // cu_chroma_qp_offset_flag
++      154,
++      // cu_chroma_qp_offset_idx
++      154,
++    },
++    { // sao_merge_flag
++      153,
++      // sao_type_idx
++      160,
++      // split_coding_unit_flag
++      107, 139, 126,
++      // cu_transquant_bypass_flag
++      154,
++      // skip_flag
++      197, 185, 201,
++      // cu_qp_delta
++      154, 154, 154,
++      // pred_mode
++      134,
++      // part_mode
++      154, 139, 154, 154,
++      // prev_intra_luma_pred_mode
++      183,
++      // intra_chroma_pred_mode
++      152, 139,
++      // merge_flag
++      154,
++      // merge_idx
++      137,
++      // inter_pred_idc
++      95, 79, 63, 31, 31,
++      // ref_idx_l0
++      153, 153,
++      // ref_idx_l1
++      153, 153,
++      // abs_mvd_greater1_flag
++      169, 198,
++      // abs_mvd_greater1_flag
++      169, 198,
++      // mvp_lx_flag
++      168,
++      // no_residual_data_flag
++      79,
++      // split_transform_flag
++      224, 167, 122,
++      // cbf_luma
++      153, 111,
++      // cbf_cb, cbf_cr
++      149, 92, 167, 154,
++      // transform_skip_flag
++      139, 139,
++      // explicit_rdpcm_flag
++      139, 139,
++      // explicit_rdpcm_dir_flag
++      139, 139,
++      // last_significant_coeff_x_prefix
++      125, 110, 124, 110,  95,  94, 125, 111, 111,  79, 125, 126, 111, 111,
++       79, 108, 123,  93,
++      // last_significant_coeff_y_prefix
++      125, 110, 124, 110,  95,  94, 125, 111, 111,  79, 125, 126, 111, 111,
++       79, 108, 123,  93,
++      // significant_coeff_group_flag
++      121, 140, 61, 154,
++      // significant_coeff_flag
++      170, 154, 139, 153, 139, 123, 123,  63, 124, 166, 183, 140, 136, 153,
++      154, 166, 183, 140, 136, 153, 154, 166, 183, 140, 136, 153, 154, 170,
++      153, 138, 138, 122, 121, 122, 121, 167, 151, 183, 140, 151, 183, 140,
++      140, 140,
++      // coeff_abs_level_greater1_flag
++      154, 196, 167, 167, 154, 152, 167, 182, 182, 134, 149, 136, 153, 121,
++      136, 122, 169, 208, 166, 167, 154, 152, 167, 182,
++      // coeff_abs_level_greater2_flag
++      107, 167, 91, 107, 107, 167,
++      // log2_res_scale_abs
++      154, 154, 154, 154, 154, 154, 154, 154,
++      // res_scale_sign_flag
++      154, 154,
++      // cu_chroma_qp_offset_flag
++      154,
++      // cu_chroma_qp_offset_idx
++      154,
++    },
++};
++
++static const uint8_t scan_1x1[1] = {
++    0,
++};
++
++static const uint8_t horiz_scan2x2_x[4] = {
++    0, 1, 0, 1,
++};
++
++static const uint8_t horiz_scan2x2_y[4] = {
++    0, 0, 1, 1
++};
++
++static const uint8_t horiz_scan4x4_x[16] = {
++    0, 1, 2, 3,
++    0, 1, 2, 3,
++    0, 1, 2, 3,
++    0, 1, 2, 3,
++};
++
++static const uint8_t horiz_scan4x4_y[16] = {
++    0, 0, 0, 0,
++    1, 1, 1, 1,
++    2, 2, 2, 2,
++    3, 3, 3, 3,
++};
++
++static const uint8_t horiz_scan8x8_inv[8][8] = {
++    {  0,  1,  2,  3, 16, 17, 18, 19, },
++    {  4,  5,  6,  7, 20, 21, 22, 23, },
++    {  8,  9, 10, 11, 24, 25, 26, 27, },
++    { 12, 13, 14, 15, 28, 29, 30, 31, },
++    { 32, 33, 34, 35, 48, 49, 50, 51, },
++    { 36, 37, 38, 39, 52, 53, 54, 55, },
++    { 40, 41, 42, 43, 56, 57, 58, 59, },
++    { 44, 45, 46, 47, 60, 61, 62, 63, },
++};
++
++static const uint8_t diag_scan2x2_x[4] = {
++    0, 0, 1, 1,
++};
++
++static const uint8_t diag_scan2x2_y[4] = {
++    0, 1, 0, 1,
++};
++
++static const uint8_t diag_scan2x2_inv[2][2] = {
++    { 0, 2, },
++    { 1, 3, },
++};
++
++static const uint8_t diag_scan4x4_inv[4][4] = {
++    { 0,  2,  5,  9, },
++    { 1,  4,  8, 12, },
++    { 3,  7, 11, 14, },
++    { 6, 10, 13, 15, },
++};
++
++static const uint8_t diag_scan8x8_inv[8][8] = {
++    {  0,  2,  5,  9, 14, 20, 27, 35, },
++    {  1,  4,  8, 13, 19, 26, 34, 42, },
++    {  3,  7, 12, 18, 25, 33, 41, 48, },
++    {  6, 11, 17, 24, 32, 40, 47, 53, },
++    { 10, 16, 23, 31, 39, 46, 52, 57, },
++    { 15, 22, 30, 38, 45, 51, 56, 60, },
++    { 21, 29, 37, 44, 50, 55, 59, 62, },
++    { 28, 36, 43, 49, 54, 58, 61, 63, },
++};
++
++
++typedef struct
++{
++    uint16_t coeff;
++    uint16_t scale;
++} xy_off_t;
++
++#define XYT_C(x,y,t) ((x) + ((y) << (t)))
++#define SCALE_TRAFO(t) ((t) > 3 ? 3 : (t))
++#define SCALE_SHR(t) ((t) - SCALE_TRAFO(t))
++#define XYT_S(x,y,t) (((x) >> SCALE_SHR(t)) + (((y) >> SCALE_SHR(t)) << SCALE_TRAFO(t)))
++
++#define XYT(x,y,t) {XYT_C(x,y,t), XYT_S(x,y,t)}
++
++#define OFF_DIAG(t) {\
++    XYT(0,0,t), XYT(0,1,t), XYT(1,0,t), XYT(0,2,t),\
++    XYT(1,1,t), XYT(2,0,t), XYT(0,3,t), XYT(1,2,t),\
++    XYT(2,1,t), XYT(3,0,t), XYT(1,3,t), XYT(2,2,t),\
++    XYT(3,1,t), XYT(2,3,t), XYT(3,2,t), XYT(3,3,t)\
++}
++
++#define OFF_HORIZ(t) {\
++    XYT(0,0,t), XYT(1,0,t), XYT(2,0,t), XYT(3,0,t),\
++    XYT(0,1,t), XYT(1,1,t), XYT(2,1,t), XYT(3,1,t),\
++    XYT(0,2,t), XYT(1,2,t), XYT(2,2,t), XYT(3,2,t),\
++    XYT(0,3,t), XYT(1,3,t), XYT(2,3,t), XYT(3,3,t)\
++}
++
++#define OFF_VERT(t) {\
++    XYT(0,0,t), XYT(0,1,t), XYT(0,2,t), XYT(0,3,t),\
++    XYT(1,0,t), XYT(1,1,t), XYT(1,2,t), XYT(1,3,t),\
++    XYT(2,0,t), XYT(2,1,t), XYT(2,2,t), XYT(2,3,t),\
++    XYT(3,0,t), XYT(3,1,t), XYT(3,2,t), XYT(3,3,t)\
++}
++
++static const xy_off_t off_xys[3][4][16] =
++{
++    {OFF_DIAG(2), OFF_DIAG(3), OFF_DIAG(4), OFF_DIAG(5)},
++    {OFF_HORIZ(2), OFF_HORIZ(3), OFF_HORIZ(4), OFF_HORIZ(5)},
++    {OFF_VERT(2), OFF_VERT(3), OFF_VERT(4), OFF_VERT(5)}
++};
++
++
++// Helper fns
++#ifndef hevc_mem_bits32
++static av_always_inline uint32_t hevc_mem_bits32(const void * buf, const unsigned int offset)
++{
++    return AV_RB32((const uint8_t *)buf + (offset >> 3)) << (offset & 7);
++}
++#endif
++
++#if AV_GCC_VERSION_AT_LEAST(3,4) && !defined(hevc_clz32)
++#define hevc_clz32 hevc_clz32_builtin
++static av_always_inline unsigned int hevc_clz32_builtin(const uint32_t x)
++{
++    // __builtin_clz says it works on ints - so adjust if int is >32 bits long
++    return __builtin_clz(x) - (sizeof(int) * 8 - 32);
++}
++#endif
++
++// It is unlikely that we will ever need this but include for completeness
++#ifndef hevc_clz32
++static inline unsigned int hevc_clz32(unsigned int x)
++{
++    unsigned int n = 1;
++    if ((x & 0xffff0000) == 0) {
++        n += 16;
++        x <<= 16;
++    }
++    if ((x & 0xff000000) == 0) {
++        n += 8;
++        x <<= 8;
++    }
++    if ((x & 0xf0000000) == 0) {
++        n += 4;
++        x <<= 4;
++    }
++    if ((x & 0xc0000000) == 0) {
++        n += 2;
++        x <<= 2;
++    }
++    return n - ((x >> 31) & 1);
++}
++#endif
++
++static inline int cabac_overflow(const CABACContext * const cc)
++{
++    av_assert0(cc->bytestream >= cc->bytestream_start);
++    return cc->bytestream >= cc->bytestream_end + 4;
++}
++
++int ff_hevc_rpi_cabac_overflow(const HEVCRpiLocalContext * const lc)
++{
++    return cabac_overflow(&lc->cc);
++}
++
++#if !USE_BY22
++// If no by22 then _by22 functions will revert to normal and so _peek/_flush
++// will no longer be called but the setup calls will still exist and we want
++// to null them out
++#define bypass_start(s)
++#define bypass_finish(s)
++#else
++// Use BY22 for residual bypass block
++
++#define bypass_start(cc) get_cabac_by22_start(cc)
++#define bypass_finish(cc) get_cabac_by22_finish(cc)
++
++// BY22 notes that bypass is simply a divide into the bitstream and so we
++// can peek out large quantities of bits at once and treat the result as if
++// it was VLC.  In many cases this will lead to O(1) processing rather than
++// O(n) though the setup and teardown is sufficiently expensive that it is
++// only worth using if we expect to be dealing with more than a few bits
++// The definition of "a few bits" will vary from platform to platform but
++// tests on ARM show that it probably isn't worth it for a single coded
++// residual, but is for >1 - it also seems likely that if there are
++// more residuals then they are likely to be bigger and this will make the
++// O(1) nature of the code more worthwhile.
++
++
++// Bypass block start
++// Must be called before _by22_peek is used as it sets the CABAC environment
++// into the correct state.  _by22_finish must be called to return to 'normal'
++// (i.e. non-bypass) cabac decoding
++#ifndef get_cabac_by22_start
++static inline void get_cabac_by22_start(CABACContext * const c)
++{
++    const unsigned int bits = __builtin_ctz(c->low);
++    const uint32_t m = hevc_mem_bits32(c->bytestream, 0);
++    uint32_t x = (c->low << (22 - CABAC_BITS)) ^ ((m ^ 0x80000000U) >> (9 + CABAC_BITS - bits));
++#if !USE_BY22_DIV
++    const uint32_t inv = cabac_by22_inv_range[c->range & 0xff];
++#endif
++
++    c->bytestream -= (CABAC_BITS / 8);
++    c->by22.bits = bits;
++#if !USE_BY22_DIV
++    c->by22.range = c->range;
++    c->range = inv;
++#endif
++    c->low = x;
++}
++#endif
++
++// Bypass block finish
++// Must be called at the end of the bypass block to return to normal operation
++static inline void get_cabac_by22_finish(CABACContext * const c)
++{
++    unsigned int used = c->by22.bits;
++    unsigned int bytes_used = (used / CABAC_BITS) * (CABAC_BITS / 8);
++    unsigned int bits_used = used & (CABAC_BITS == 16 ? 15 : 7);
++
++    c->bytestream += bytes_used + (CABAC_BITS / 8);
++    c->low = (((uint32_t)c->low >> (22 - CABAC_BITS + bits_used)) | 1) << bits_used;
++#if !USE_BY22_DIV
++    c->range = c->by22.range;
++#endif
++}
++
++// Peek bypass bits
++// _by22_start must be called before _by22_peek is called and _by22_flush
++// must be called afterwards to flush any used bits
++// The actual number of valid bits returned is
++// min(<coded bypass block length>, CABAC_BY22_PEEK_BITS). CABAC_BY22_PEEK_BITS
++// will be at least 22 which should be long enough for any prefix or suffix
++// though probably not long enough for the worst case combination
++#ifndef get_cabac_by22_peek
++static inline uint32_t get_cabac_by22_peek(const CABACContext * const c)
++{
++#if USE_BY22_DIV
++    return ((unsigned int)c->low / (unsigned int)c->range) << 9;
++#else
++    uint32_t x = c->low & ~1U;
++    const uint32_t inv = c->range;
++
++    if (inv != 0)
++        x = (uint32_t)(((uint64_t)x * (uint64_t)inv) >> 32);
++
++    return x << 1;
++#endif
++}
++#endif
++
++// Flush bypass bits peeked by _by22_peek
++// Flush n bypass bits. n must be >= 1 to guarantee correct operation
++// val is an unmodified copy of whatever _by22_peek returned
++#ifndef get_cabac_by22_flush
++static inline void get_cabac_by22_flush(CABACContext * c, const unsigned int n, const uint32_t val)
++{
++    // Subtract the bits used & reshift up to the top of the word
++#if USE_BY22_DIV
++    const uint32_t low = (((unsigned int)c->low << n) - (((val >> (32 - n)) * (unsigned int)c->range) << 23));
++#else
++    const uint32_t low = (((uint32_t)c->low << n) - (((val >> (32 - n)) * c->by22.range) << 23));
++#endif
++
++    // and refill lower bits
++    // We will probably OR over some existing bits but that doesn't matter
++    c->by22.bits += n;
++    c->low = low | (hevc_mem_bits32(c->bytestream, c->by22.bits) >> 9);
++}
++#endif
++
++#endif  // USE_BY22
++
++
++void ff_hevc_rpi_save_states(HEVCRpiContext *s, const HEVCRpiLocalContext * const lc)
++{
++    memcpy(s->cabac_save->rice, lc->stat_coeff, 4);
++    memcpy(s->cabac_save->state, lc->cabac_state, HEVC_CONTEXTS);
++}
++
++static void load_states(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
++{
++    memcpy(lc->stat_coeff, s->cabac_save->rice, 4);
++    memcpy(lc->cabac_state, s->cabac_save->state, HEVC_CONTEXTS);
++}
++
++int ff_hevc_rpi_cabac_init_decoder(HEVCRpiLocalContext * const lc)
++{
++    GetBitContext * const gb = &lc->gb;
++    skip_bits(gb, 1);
++    align_get_bits(gb);
++    return ff_init_cabac_decoder(&lc->cc,
++                          gb->buffer + get_bits_count(gb) / 8,
++                          (get_bits_left(gb) + 7) / 8);
++}
++
++static void cabac_init_state(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
++{
++    int init_type = 2 - s->sh.slice_type;
++    int i;
++
++    if (s->sh.cabac_init_flag && s->sh.slice_type != HEVC_SLICE_I)
++        init_type ^= 3;
++
++    for (i = 0; i < HEVC_CONTEXTS; i++) {
++        int init_value = init_values[init_type][i];
++        int m = (init_value >> 4) * 5 - 45;
++        int n = ((init_value & 15) << 3) - 16;
++        int pre = 2 * (((m * av_clip(s->sh.slice_qp, 0, 51)) >> 4) + n) - 127;
++
++        pre ^= pre >> 31;
++        if (pre > 124)
++            pre = 124 + (pre & 1);
++        lc->cabac_state[i] = pre;
++    }
++
++    for (i = 0; i < 4; i++)
++        lc->stat_coeff[i] = 0;
++}
++
++void ff_hevc_rpi_cabac_init(const HEVCRpiContext * const s, HEVCRpiLocalContext *const lc, const unsigned int ctb_flags)
++{
++    if (lc->cabac_init_req == 1 || (ctb_flags & CTB_TS_FLAGS_CIREQ) != 0)
++    {
++        lc->qPy_pred = s->sh.slice_qp;
++        cabac_init_state(s, lc);
++    }
++    else if ((ctb_flags & CTB_TS_FLAGS_CLOAD) != 0)
++    {
++        lc->qPy_pred = s->sh.slice_qp;
++        load_states(s, lc);
++    }
++    lc->cabac_init_req = 0;
++}
++
++#define GET_CABAC_LC(ctx) get_cabac(&lc->cc, lc->cabac_state + (ctx))
++
++int ff_hevc_rpi_get_cabac(CABACContext * const c, uint8_t * const state)
++{
++    return get_cabac_inline(c, state);
++}
++
++int ff_hevc_rpi_get_cabac_terminate(CABACContext * const c)
++{
++    return get_cabac_terminate(c);
++}
++
++int ff_hevc_rpi_sao_type_idx_decode(HEVCRpiLocalContext * const lc)
++{
++    if (!GET_CABAC_LC(elem_offset[SAO_TYPE_IDX]))
++        return 0;
++
++    if (!get_cabac_bypass(&lc->cc))
++        return SAO_BAND;
++    return SAO_EDGE;
++}
++
++int ff_hevc_rpi_sao_band_position_decode(HEVCRpiLocalContext * const lc)
++{
++    int i;
++    int value = get_cabac_bypass(&lc->cc);
++
++    for (i = 0; i < 4; i++)
++        value = (value << 1) | get_cabac_bypass(&lc->cc);
++    return value;
++}
++
++int ff_hevc_rpi_sao_offset_abs_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
++{
++    int i = 0;
++    int length = (1 << (FFMIN(s->ps.sps->bit_depth, 10) - 5)) - 1;
++
++    while (i < length && get_cabac_bypass(&lc->cc))
++        i++;
++    return i;
++}
++
++int ff_hevc_rpi_sao_offset_sign_decode(HEVCRpiLocalContext * const lc)
++{
++    return get_cabac_bypass(&lc->cc);
++}
++
++int ff_hevc_rpi_sao_eo_class_decode(HEVCRpiLocalContext * const lc)
++{
++    int ret = get_cabac_bypass(&lc->cc) << 1;
++    ret    |= get_cabac_bypass(&lc->cc);
++    return ret;
++}
++
++int ff_hevc_rpi_cu_qp_delta(HEVCRpiLocalContext * const lc)
++{
++    int val = 1;
++
++    if (get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CU_QP_DELTA) == 0)
++        return 0;
++
++    while (val < 5 &&
++           get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CU_QP_DELTA + 1) != 0)
++        val++;
++
++    if (val >= 5) {
++        unsigned int k = 0;
++        while (k < CABAC_MAX_BIN && get_cabac_bypass(&lc->cc)) {
++            val += 1 << k;
++            k++;
++        }
++//        if (k == CABAC_MAX_BIN)
++//            av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", k);
++
++        while (k--)
++            val += get_cabac_bypass(&lc->cc) << k;
++    }
++    return get_cabac_bypass(&lc->cc) ? -val : val;
++}
++
++int ff_hevc_rpi_cu_chroma_qp_offset_idx(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
++{
++    int c_max= FFMAX(5, s->ps.pps->chroma_qp_offset_list_len_minus1);
++    int i = 0;
++
++    while (i < c_max && GET_CABAC_LC(elem_offset[CU_CHROMA_QP_OFFSET_IDX]))
++        i++;
++
++    return i;
++}
++
++int ff_hevc_rpi_part_mode_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int log2_cb_size)
++{
++    if (GET_CABAC_LC(elem_offset[PART_MODE])) // 1
++        return PART_2Nx2N;
++    if (log2_cb_size == s->ps.sps->log2_min_cb_size) {
++        if (lc->cu.pred_mode == MODE_INTRA) // 0
++            return PART_NxN;
++        if (GET_CABAC_LC(elem_offset[PART_MODE] + 1)) // 01
++            return PART_2NxN;
++        if (log2_cb_size == 3) // 00
++            return PART_Nx2N;
++        if (GET_CABAC_LC(elem_offset[PART_MODE] + 2)) // 001
++            return PART_Nx2N;
++        return PART_NxN; // 000
++    }
++
++    if (!s->ps.sps->amp_enabled_flag) {
++        if (GET_CABAC_LC(elem_offset[PART_MODE] + 1)) // 01
++            return PART_2NxN;
++        return PART_Nx2N;
++    }
++
++    if (GET_CABAC_LC(elem_offset[PART_MODE] + 1)) { // 01X, 01XX
++        if (GET_CABAC_LC(elem_offset[PART_MODE] + 3)) // 011
++            return PART_2NxN;
++        if (get_cabac_bypass(&lc->cc)) // 0101
++            return PART_2NxnD;
++        return PART_2NxnU; // 0100
++    }
++
++    if (GET_CABAC_LC(elem_offset[PART_MODE] + 3)) // 001
++        return PART_Nx2N;
++    if (get_cabac_bypass(&lc->cc)) // 0001
++        return PART_nRx2N;
++    return PART_nLx2N;  // 0000
++}
++
++int ff_hevc_rpi_mpm_idx_decode(HEVCRpiLocalContext * const lc)
++{
++    int i = 0;
++    while (i < 2 && get_cabac_bypass(&lc->cc))
++        i++;
++    return i;
++}
++
++int ff_hevc_rpi_rem_intra_luma_pred_mode_decode(HEVCRpiLocalContext * const lc)
++{
++    int i;
++    int value = get_cabac_bypass(&lc->cc);
++
++    for (i = 0; i < 4; i++)
++        value = (value << 1) | get_cabac_bypass(&lc->cc);
++    return value;
++}
++
++int ff_hevc_rpi_intra_chroma_pred_mode_decode(HEVCRpiLocalContext * const lc)
++{
++    int ret;
++    if (!GET_CABAC_LC(elem_offset[INTRA_CHROMA_PRED_MODE]))
++        return 4;
++
++    ret  = get_cabac_bypass(&lc->cc) << 1;
++    ret |= get_cabac_bypass(&lc->cc);
++    return ret;
++}
++
++int ff_hevc_rpi_merge_idx_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
++{
++    int i = GET_CABAC_LC(elem_offset[MERGE_IDX]);
++
++    if (i != 0) {
++        while (i < s->sh.max_num_merge_cand-1 && get_cabac_bypass(&lc->cc))
++            i++;
++    }
++    return i;
++}
++
++int ff_hevc_rpi_inter_pred_idc_decode(HEVCRpiLocalContext * const lc, int nPbW, int nPbH)
++{
++    if (nPbW + nPbH == 12)
++        return GET_CABAC_LC(elem_offset[INTER_PRED_IDC] + 4);
++    if (GET_CABAC_LC(elem_offset[INTER_PRED_IDC] + lc->ct_depth))
++        return PRED_BI;
++
++    return GET_CABAC_LC(elem_offset[INTER_PRED_IDC] + 4);
++}
++
++int ff_hevc_rpi_ref_idx_lx_decode(HEVCRpiLocalContext * const lc, const int num_ref_idx_lx)
++{
++    int i = 0;
++    int max = num_ref_idx_lx - 1;
++    int max_ctx = FFMIN(max, 2);
++
++    while (i < max_ctx && GET_CABAC_LC(elem_offset[REF_IDX_L0] + i))
++        i++;
++    if (i == 2) {
++        while (i < max && get_cabac_bypass(&lc->cc))
++            i++;
++    }
++
++    return i;
++}
++
++static av_always_inline int abs_mvd_greater0_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return GET_CABAC_LC(elem_offset[ABS_MVD_GREATER0_FLAG]);
++}
++
++static av_always_inline int abs_mvd_greater1_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return GET_CABAC_LC(elem_offset[ABS_MVD_GREATER1_FLAG] + 1);
++}
++
++#if !USE_BY22
++static av_always_inline int mvd_decode(HEVCRpiLocalContext * const lc)
++{
++    int ret = 2;
++    int k = 1;
++
++    while (k < CABAC_MAX_BIN && get_cabac_bypass(&lc->cc)) {
++        ret += 1U << k;
++        k++;
++    }
++    if (k == CABAC_MAX_BIN) {
++        av_log(NULL, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", k);
++        return 0;
++    }
++
++    while (k--)
++        ret += get_cabac_bypass(&lc->cc) << k;
++    return get_cabac_bypass_sign(&lc->cc, -ret);
++}
++#endif
++
++static av_always_inline int mvd_sign_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return get_cabac_bypass_sign(&lc->cc, -1);
++}
++
++static int hevc_transform_skip_flag_decode(HEVCRpiLocalContext * const lc, int c_idx_nz)
++{
++    return GET_CABAC_LC(elem_offset[TRANSFORM_SKIP_FLAG] + c_idx_nz);
++}
++
++static int explicit_rdpcm_flag_decode(HEVCRpiLocalContext * const lc, int c_idx_nz)
++{
++    return GET_CABAC_LC(elem_offset[EXPLICIT_RDPCM_FLAG] + c_idx_nz);
++}
++
++static int explicit_rdpcm_dir_flag_decode(HEVCRpiLocalContext * const lc, int c_idx_nz)
++{
++    return GET_CABAC_LC(elem_offset[EXPLICIT_RDPCM_DIR_FLAG] + c_idx_nz);
++}
++
++
++int ff_hevc_rpi_log2_res_scale_abs(HEVCRpiLocalContext * const lc, const int idx) {
++    int i =0;
++
++    while (i < 4 && GET_CABAC_LC(elem_offset[LOG2_RES_SCALE_ABS] + 4 * idx + i))
++        i++;
++
++    return i;
++}
++
++static av_always_inline void last_significant_coeff_xy_prefix_decode(HEVCRpiLocalContext * const lc, int c_idx_nz,
++                                                   int log2_size, int *last_scx_prefix, int *last_scy_prefix)
++{
++    int i = 0;
++    int max = (log2_size << 1) - 1;
++    int ctx_offset, ctx_shift;
++
++    if (!c_idx_nz) {
++        ctx_offset = 3 * (log2_size - 2)  + ((log2_size - 1) >> 2);
++        ctx_shift = (log2_size + 1) >> 2;
++    } else {
++        ctx_offset = 15;
++        ctx_shift = log2_size - 2;
++    }
++    while (i < max &&
++           GET_CABAC_LC(elem_offset[LAST_SIGNIFICANT_COEFF_X_PREFIX] + (i >> ctx_shift) + ctx_offset))
++        i++;
++    *last_scx_prefix = i;
++
++    i = 0;
++    while (i < max &&
++           GET_CABAC_LC(elem_offset[LAST_SIGNIFICANT_COEFF_Y_PREFIX] + (i >> ctx_shift) + ctx_offset))
++        i++;
++    *last_scy_prefix = i;
++}
++
++static av_always_inline int last_significant_coeff_suffix_decode(HEVCRpiLocalContext * const lc,
++                                                 int last_significant_coeff_prefix)
++{
++    int i;
++    int length = (last_significant_coeff_prefix >> 1) - 1;
++    int value = get_cabac_bypass(&lc->cc);
++
++    for (i = 1; i < length; i++)
++        value = (value << 1) | get_cabac_bypass(&lc->cc);
++    return value;
++}
++
++static av_always_inline int significant_coeff_group_flag_decode(HEVCRpiLocalContext * const lc, int c_idx_nz, int ctx_cg)
++{
++    int inc;
++
++    inc = (ctx_cg != 0) + (c_idx_nz << 1);
++
++    return GET_CABAC_LC(elem_offset[SIGNIFICANT_COEFF_GROUP_FLAG] + inc);
++}
++
++static av_always_inline int significant_coeff_flag_decode_0(HEVCRpiLocalContext * const lc, int offset)
++{
++    return GET_CABAC_LC(elem_offset[SIGNIFICANT_COEFF_FLAG] + offset);
++}
++
++#if !USE_BY22
++#define coeff_abs_level_remaining_decode_bypass(s,r) coeff_abs_level_remaining_decode(s, r)
++#endif
++
++
++#ifndef coeff_abs_level_remaining_decode_bypass
++static int coeff_abs_level_remaining_decode_bypass(CABACContext * const c, const unsigned int rice_param)
++{
++    uint32_t y;
++    unsigned int prefix;
++    unsigned int last_coeff_abs_level_remaining;
++    unsigned int n;
++
++    y = get_cabac_by22_peek(c);
++    prefix = hevc_clz32(~y);
++    // y << prefix will always have top bit 0
++
++    if (prefix < 3) {
++        const unsigned int suffix = (y << prefix) >> (31 - rice_param);
++        last_coeff_abs_level_remaining = (prefix << rice_param) + suffix;
++        n = prefix + 1 + rice_param;
++    }
++    else if (prefix * 2 + rice_param <= CABAC_BY22_PEEK_BITS + 2)
++    {
++        const uint32_t suffix = ((y << prefix) | 0x80000000) >> (34 - (prefix + rice_param));
++
++        last_coeff_abs_level_remaining = (2 << rice_param) + suffix;
++        n = prefix * 2 + rice_param - 2;
++    }
++    else {
++        unsigned int suffix;
++
++        get_cabac_by22_flush(c, prefix, y);
++        y = get_cabac_by22_peek(c);
++
++        suffix = (y | 0x80000000) >> (34 - (prefix + rice_param));
++        last_coeff_abs_level_remaining = (2 << rice_param) + suffix;
++        n = prefix + rice_param - 2;
++    }
++
++    get_cabac_by22_flush(c, n, y);
++
++    return last_coeff_abs_level_remaining;
++}
++#endif
++
++static int coeff_abs_level_remaining_decode(CABACContext * const c, int rc_rice_param)
++{
++    int prefix = 0;
++    int suffix = 0;
++    int last_coeff_abs_level_remaining;
++    int i;
++
++    while (prefix < CABAC_MAX_BIN && get_cabac_bypass(c))
++        prefix++;
++    if (prefix == CABAC_MAX_BIN) {
++//        av_log(s->avctx, AV_LOG_ERROR, "CABAC_MAX_BIN : %d\n", prefix);
++        return 0;
++    }
++
++    if (prefix < 3) {
++        for (i = 0; i < rc_rice_param; i++)
++            suffix = (suffix << 1) | get_cabac_bypass(c);
++        last_coeff_abs_level_remaining = (prefix << rc_rice_param) + suffix;
++    } else {
++        int prefix_minus3 = prefix - 3;
++        for (i = 0; i < prefix_minus3 + rc_rice_param; i++)
++            suffix = (suffix << 1) | get_cabac_bypass(c);
++        last_coeff_abs_level_remaining = (((1 << prefix_minus3) + 3 - 1)
++                                              << rc_rice_param) + suffix;
++    }
++
++    return last_coeff_abs_level_remaining;
++}
++
++#if !USE_BY22
++#define coeff_sign_flag_decode_bypass coeff_sign_flag_decode
++static inline uint32_t coeff_sign_flag_decode(CABACContext * const c, const unsigned int nb)
++{
++    unsigned int i;
++    uint32_t ret = 0;
++
++    for (i = 0; i < nb; i++)
++        ret = (ret << 1) | get_cabac_bypass(c);
++
++    return ret << (32 - nb);
++}
++#endif
++
++#ifndef coeff_sign_flag_decode_bypass
++static inline uint32_t coeff_sign_flag_decode_bypass(CABACContext * const c, const unsigned int nb)
++{
++    uint32_t y;
++    y = get_cabac_by22_peek(c);
++    get_cabac_by22_flush(c, nb, y);
++    return y & ~(0xffffffffU >> nb);
++}
++#endif
++
++
++#ifndef get_cabac_greater1_bits
++static inline unsigned int get_cabac_greater1_bits(CABACContext * const c, const unsigned int n,
++    uint8_t * const state0)
++{
++    unsigned int i;
++    unsigned int rv = 0;
++    for (i = 0; i != n; ++i) {
++        const unsigned int idx = rv != 0 ? 0 : i < 3 ? i + 1 : 3;
++        const unsigned int b = get_cabac(c, state0 + idx);
++        rv = (rv << 1) | b;
++    }
++    return rv;
++}
++#endif
++
++
++// N.B. levels returned are the values assuming coeff_abs_level_remaining
++// is uncoded, so 1 must be added if it is coded.  sum_abs also reflects
++// this version of events.
++static inline uint32_t get_greaterx_bits(HEVCRpiLocalContext * const lc, const unsigned int n_end, int * const levels,
++    int * const pprev_subset_coded, int * const psum,
++    const unsigned int idx0_gt1, const unsigned int idx_gt2)
++{
++    CABACContext * const c = &lc->cc;
++    uint8_t * const state0 = lc->cabac_state + idx0_gt1;
++    uint8_t * const state_gt2 = lc->cabac_state + idx_gt2;
++    unsigned int rv;
++    unsigned int i;
++    const unsigned int n = FFMIN(n_end, 8);
++
++    // Really this is i != n but the simple unconditional loop is cheaper
++    // and faster
++    for (i = 0; i != 8; ++i)
++        levels[i] = 1;
++
++    rv = get_cabac_greater1_bits(c, n, state0);
++
++    *pprev_subset_coded = 0;
++    *psum = n;
++
++    rv <<= (32 - n);
++    if (rv != 0)
++    {
++        *pprev_subset_coded = 1;
++        *psum = n + 1;
++        i = hevc_clz32(rv);
++        levels[i] = 2;
++        if (get_cabac(c, state_gt2) == 0)
++        {
++            // Unset first coded bit
++            rv &= ~(0x80000000U >> i);
++        }
++    }
++
++    if (n_end > 8) {
++        const unsigned int g8 = n_end - 8;
++        rv |= ((1 << g8) - 1) << (24 - g8);
++        for (i = 0; i != g8; ++i) {
++            levels[i + 8] = 0;
++        }
++    }
++
++    return rv;
++}
++
++// extended_precision_processing_flag must be false given we are
++// putting the result into a 16-bit array
++// So trans_coeff_level must fit in 16 bits too (7.4.9.1 definition of coeff_abs_level_remaining)
++// scale_m is uint8_t
++//
++// scale is [40 - 72] << [0..12] based on qp- worst case is (45 << 12)
++//   or it can be 2 (if we have transquant_bypass)
++// shift is set to one less than we really want but would normally be
++//   s->ps.sps->bit_depth (max 16, min 8) + log2_trafo_size (max 5, min 2?) - 5 = max 16 min 5?
++// however the scale shift is substracted from shift to a min 0 so scale_m worst = 45 << 6
++// This can still theoretically lead to overflow but the coding would have to be very odd (& inefficient)
++// to achieve it
++
++#ifndef trans_scale_sat
++static inline int trans_scale_sat(const int level, const unsigned int scale, const unsigned int scale_m, const unsigned int shift)
++{
++    return av_clip_int16((((level * (int)(scale * scale_m)) >> shift) + 1) >> 1);
++}
++#endif
++
++
++#ifndef update_rice
++static inline void update_rice(uint8_t * const stat_coeff,
++    const unsigned int last_coeff_abs_level_remaining,
++    const unsigned int c_rice_param)
++{
++    const unsigned int x = (last_coeff_abs_level_remaining << 1) >> c_rice_param;
++    if (x >= 6)
++        (*stat_coeff)++;
++    else if (x == 0 && *stat_coeff > 0)
++        (*stat_coeff)--;
++}
++#endif
++
++
++// n must be > 0 on entry
++#ifndef get_cabac_sig_coeff_flag_idxs
++static inline uint8_t * get_cabac_sig_coeff_flag_idxs(CABACContext * const c, uint8_t * const state0,
++    unsigned int n,
++    const uint8_t const * ctx_map,
++    uint8_t * p)
++{
++    do {
++        if (get_cabac(c, state0 + ctx_map[n]))
++            *p++ = n;
++    } while (--n != 0);
++    return p;
++}
++#endif
++
++
++static int get_sig_coeff_flag_idxs(CABACContext * const c, uint8_t * const state0,
++    unsigned int n,
++    const uint8_t * ctx_map,  // const ptr here but not in asm
++    uint8_t * const flag_idx)
++{
++    int rv;
++
++    rv = get_cabac_sig_coeff_flag_idxs(c, state0, n, ctx_map, flag_idx) - flag_idx;
++
++    return rv;
++}
++
++#define H4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\
++     x0,  x1,  x2,  x3,\
++     x4,  x5,  x6,  x7,\
++     x8,  x9, x10, x11,\
++    x12, x13, x14, x15}
++
++#define V4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\
++     x0,  x4,  x8, x12,\
++     x1,  x5,  x9, x13,\
++     x2,  x6, x10, x14,\
++     x3,  x7, x11, x15}
++
++#define D4x4(x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15) {\
++     x0,  x4,  x1,  x8,\
++     x5,  x2, x12,  x9,\
++     x6,  x3, x13, x10,\
++     x7, x14, x11, x15}
++
++
++static inline int next_subset(HEVCRpiLocalContext * const lc, int i, const int c_idx_nz,
++    uint8_t * const significant_coeff_group_flag,
++    const uint8_t * const scan_x_cg, const uint8_t * const scan_y_cg,
++    int * const pPrev_sig)
++{
++    while (--i >= 0) {
++        uint8_t * const gf_y = scan_y_cg[i] + significant_coeff_group_flag;
++        const unsigned int x_cg = scan_x_cg[i];
++
++        // For the flag decode we only care about Z/NZ but
++        // we use the full Right * 2 + Down when calculating
++        // significant coeff flags so we obtain it here.
++        //
++        // The group flag array is one longer than it needs to
++        // be so we don't need to check for y_cg limits
++        const unsigned int prev_sig = ((gf_y[0] >> x_cg) & 2) | ((gf_y[1] >> x_cg) & 1);
++
++        if (i == 0 ||
++            significant_coeff_group_flag_decode(lc, c_idx_nz, prev_sig))
++        {
++            gf_y[0] |= (1 << x_cg);
++            *pPrev_sig = prev_sig;
++            break;
++        }
++    }
++
++    return i;
++}
++
++static void rpi_add_residual(const HEVCRpiContext *const s, HEVCRpiJob * const jb,
++    const unsigned int log2_trafo_size, const unsigned int c_idx,
++    const unsigned int x0, const unsigned int y0, const int16_t * const coeffs)
++{
++    const AVFrame * const frame = s->frame;
++    const unsigned int stride = frame_stride1(s->frame, c_idx);
++    const unsigned int x = x0 >> ctx_hshift(s, c_idx);
++    const unsigned int y = y0 >> ctx_vshift(s, c_idx);
++    const int is_sliced = 1;  // av_rpi_is_sand_frame(frame);
++    uint8_t * const dst = !is_sliced ?
++            s->frame->data[c_idx] + y * stride + (x << s->ps.sps->pixel_shift) :
++        c_idx == 0 ?
++            av_rpi_sand_frame_pos_y(frame, x, y) :
++            av_rpi_sand_frame_pos_c(frame, x, y);
++
++    const unsigned int i = jb->intra.n;
++    HEVCPredCmd *const pc = jb->intra.cmds + i - 1;
++
++    if (i != 0 && c_idx == 2 && pc->type == RPI_PRED_ADD_RESIDUAL_U &&
++        pc->ta.dst == dst)
++    {
++        av_assert1(pc->size == log2_trafo_size &&
++                   pc->c_idx == 1 &&
++                   pc->ta.stride == stride);
++
++        pc->type = RPI_PRED_ADD_RESIDUAL_C;
++    }
++    else if (i != 0 && c_idx == 2 && pc->type == RPI_PRED_ADD_DC_U &&
++        pc->dc.dst == dst)
++    {
++        const int16_t dc = (int16_t)pc->dc.dc;  // Discard top bits
++        av_assert1(pc->size == log2_trafo_size &&
++                   pc->c_idx == 1 &&
++                   pc->dc.stride == stride);
++
++        // Rewrite as add residual - must rewrite all fields as different union member
++        pc->type = RPI_PRED_ADD_RESIDUAL_V;
++        pc->ta.buf = coeffs;
++        pc->ta.dst = dst;
++        pc->ta.stride = stride;
++        pc->ta.dc = dc;
++    }
++    else
++    {
++        HEVCPredCmd * const cmd = pc + 1;
++        jb->intra.n = i + 1;
++
++        cmd->type = RPI_PRED_ADD_RESIDUAL + (is_sliced ? c_idx : 0);
++        cmd->size = log2_trafo_size;
++        cmd->ta.buf = coeffs;
++        cmd->ta.dst = dst;
++        cmd->ta.stride = stride;
++        cmd->ta.dc = 0;
++    }
++}
++
++
++static void rpi_add_dc(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
++    const unsigned int log2_trafo_size, const unsigned int c_idx,
++    const unsigned int x0, const unsigned int y0, const int16_t * const coeffs)
++{
++    const AVFrame * const frame = s->frame;
++    const unsigned int stride = frame_stride1(s->frame, c_idx);
++    const unsigned int x = x0 >> ctx_hshift(s, c_idx);
++    const unsigned int y = y0 >> ctx_vshift(s, c_idx);
++    const int is_sliced = 1;
++    uint8_t * const dst = !is_sliced ?
++            s->frame->data[c_idx] + y * stride + (x << s->ps.sps->pixel_shift) :
++        c_idx == 0 ?
++            av_rpi_sand_frame_pos_y(frame, x, y) :
++            av_rpi_sand_frame_pos_c(frame, x, y);
++
++    const unsigned int shift = FFMAX(14 - s->ps.sps->bit_depth, 0);
++    const int coeff = (coeffs[0] + (1 | (1 << shift))) >> (shift + 1);
++
++    const unsigned int i = jb->intra.n;
++    HEVCPredCmd *const pc = jb->intra.cmds + i - 1;
++
++    if (i != 0 && c_idx == 2 && pc->type == RPI_PRED_ADD_RESIDUAL_U &&
++        pc->ta.dst == dst)
++    {
++        av_assert1(pc->size == log2_trafo_size &&
++                   pc->c_idx == 1 &&
++                   pc->ta.stride == stride);
++
++        pc->ta.dc = (int16_t)coeff;
++    }
++    else if (i != 0 && c_idx == 2 && pc->type == RPI_PRED_ADD_DC_U &&
++        pc->dc.dst == dst)
++    {
++        av_assert1(pc->size == log2_trafo_size &&
++                   pc->c_idx == 1 &&
++                   pc->dc.stride == stride &&
++                   (pc->dc.dc & ~0xffff) == 0);
++
++        pc->dc.dc |= (coeff << 16);
++    }
++    else
++    {
++        HEVCPredCmd * const cmd = pc + 1;
++        jb->intra.n = i + 1;
++
++        cmd->type = RPI_PRED_ADD_DC + c_idx;
++        cmd->size = log2_trafo_size;
++        cmd->dc.dst = dst;
++        cmd->dc.stride = stride;
++        cmd->dc.dc = c_idx == 0 ? coeff : c_idx == 2 ? coeff << 16 : coeff & 0xffff;
++    }
++}
++
++
++void ff_hevc_rpi_hls_residual_coding(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                const int x0, const int y0,
++                                const int log2_trafo_size, const enum ScanType scan_idx,
++                                const int c_idx)
++{
++    int trans_skip_or_bypass = lc->cu.cu_transquant_bypass_flag;
++
++    int last_significant_coeff_x, last_significant_coeff_y;
++    int num_coeff = 0;
++    int prev_subset_coded = 0;
++
++    int num_last_subset;
++    int x_cg_last_sig, y_cg_last_sig;
++
++    const uint8_t *scan_x_cg, *scan_y_cg;
++    const xy_off_t * const scan_xy_off = off_xys[scan_idx][log2_trafo_size - 2];
++
++    int use_vpu;
++#if RPI_COMPRESS_COEFFS
++    int num_nonzero = 0;
++    int use_compress = 0;
++    int *coeffs32;
++#endif
++    int use_dc = 0;
++    int16_t *coeffs;
++    uint8_t significant_coeff_group_flag[9] = {0};  // Allow 1 final byte that is always zero
++    int explicit_rdpcm_flag = 0;
++    int explicit_rdpcm_dir_flag;
++
++    int i;
++    int shift,scale;
++    const uint8_t *scale_matrix = NULL;
++    uint8_t dc_scale;
++    const int c_idx_nz = (c_idx != 0);
++    const int pred_mode_intra = c_idx_nz ? lc->tu.intra_pred_mode_c : lc->tu.intra_pred_mode;
++    int prev_sig = 0;
++    int may_hide_sign;
++
++    int16_t dummy_coeffs[16];
++
++    // Derive QP for dequant
++    if (!lc->cu.cu_transquant_bypass_flag) {
++        may_hide_sign = s->ps.pps->sign_data_hiding_flag;
++
++        if (s->ps.pps->transform_skip_enabled_flag &&
++            log2_trafo_size <= s->ps.pps->log2_max_transform_skip_block_size) {
++            int transform_skip_flag = hevc_transform_skip_flag_decode(lc, c_idx_nz);
++            if (transform_skip_flag) {
++                trans_skip_or_bypass = 1;
++                if (lc->cu.pred_mode ==  MODE_INTRA  &&
++                    s->ps.sps->implicit_rdpcm_enabled_flag &&
++                    (pred_mode_intra == 10 || pred_mode_intra == 26)) {
++                    may_hide_sign = 0;
++                }
++            }
++        }
++
++        {
++            static const uint8_t level_scale[8] = {
++                40, 45, 51, 57, 64, 72, 0, 0  // Pad to 8
++            };
++            const int qp6 = (int8_t)lc->tu.qp_divmod6[c_idx][lc->qp_y];
++
++            // Shift is set to one less than will actually occur as the scale
++            // and saturate step adds 1 and then shifts right again
++            scale = level_scale[qp6 & 7];
++//            shift = s->ps.sps->bit_depth + log2_trafo_size - (int)(qp6 >> 3);
++            shift = log2_trafo_size - (qp6 >> 3);
++
++            if (shift < 0) {
++                scale <<= -shift;
++                shift = 0;
++            }
++        }
++
++        if (s->ps.sps->scaling_list_enable_flag && !(trans_skip_or_bypass && log2_trafo_size > 2)) {
++            const ScalingList * const sl = s->ps.pps->scaling_list_data_present_flag ?
++                &s->ps.pps->scaling_list : &s->ps.sps->scaling_list;
++            const unsigned int matrix_id =
++                lc->cu.pred_mode != MODE_INTRA ? 3 + c_idx : c_idx;
++
++            scale_matrix = sl->sl[log2_trafo_size - 2][matrix_id];
++            dc_scale = scale_matrix[0];
++            if (log2_trafo_size >= 4)
++                dc_scale = sl->sl_dc[log2_trafo_size - 4][matrix_id];
++        }
++        else
++        {
++            static const uint8_t sixteen_scale[64] = {
++                16, 16, 16, 16, 16, 16, 16, 16,
++                16, 16, 16, 16, 16, 16, 16, 16,
++                16, 16, 16, 16, 16, 16, 16, 16,
++                16, 16, 16, 16, 16, 16, 16, 16,
++                16, 16, 16, 16, 16, 16, 16, 16,
++                16, 16, 16, 16, 16, 16, 16, 16,
++                16, 16, 16, 16, 16, 16, 16, 16,
++                16, 16, 16, 16, 16, 16, 16, 16
++            };
++            scale_matrix = sixteen_scale;
++            dc_scale = 16;
++        }
++    } else {
++        static const uint8_t unit_scale[64] = {
++            1, 1, 1, 1, 1, 1, 1, 1,
++            1, 1, 1, 1, 1, 1, 1, 1,
++            1, 1, 1, 1, 1, 1, 1, 1,
++            1, 1, 1, 1, 1, 1, 1, 1,
++            1, 1, 1, 1, 1, 1, 1, 1,
++            1, 1, 1, 1, 1, 1, 1, 1,
++            1, 1, 1, 1, 1, 1, 1, 1,
++            1, 1, 1, 1, 1, 1, 1, 1,
++        };
++        scale_matrix = unit_scale;
++        shift        = 0;
++        scale        = 2;  // We will shift right to kill this
++        dc_scale     = 1;
++
++        may_hide_sign = 0;
++    }
++
++
++
++
++    if (lc->cu.pred_mode == MODE_INTER && s->ps.sps->explicit_rdpcm_enabled_flag &&
++        trans_skip_or_bypass) {
++        explicit_rdpcm_flag = explicit_rdpcm_flag_decode(lc, c_idx_nz);
++        if (explicit_rdpcm_flag) {
++            may_hide_sign = 0;
++            explicit_rdpcm_dir_flag = explicit_rdpcm_dir_flag_decode(lc, c_idx_nz);
++        }
++    }
++
++    last_significant_coeff_xy_prefix_decode(lc, c_idx_nz, log2_trafo_size,
++                                           &last_significant_coeff_x, &last_significant_coeff_y);
++
++    if (last_significant_coeff_x > 3) {
++        int suffix = last_significant_coeff_suffix_decode(lc, last_significant_coeff_x);
++        last_significant_coeff_x = (1 << ((last_significant_coeff_x >> 1) - 1)) *
++        (2 + (last_significant_coeff_x & 1)) +
++        suffix;
++    }
++
++    if (last_significant_coeff_y > 3) {
++        int suffix = last_significant_coeff_suffix_decode(lc, last_significant_coeff_y);
++        last_significant_coeff_y = (1 << ((last_significant_coeff_y >> 1) - 1)) *
++        (2 + (last_significant_coeff_y & 1)) +
++        suffix;
++    }
++
++    if (scan_idx == SCAN_VERT)
++        FFSWAP(int, last_significant_coeff_x, last_significant_coeff_y);
++
++    x_cg_last_sig = last_significant_coeff_x >> 2;
++    y_cg_last_sig = last_significant_coeff_y >> 2;
++
++    switch (scan_idx) {
++    case SCAN_DIAG: {
++        int last_x_c = last_significant_coeff_x & 3;
++        int last_y_c = last_significant_coeff_y & 3;
++
++        num_coeff = diag_scan4x4_inv[last_y_c][last_x_c];
++
++        switch (log2_trafo_size) {
++        case 2:
++            scan_x_cg = scan_1x1;
++            scan_y_cg = scan_1x1;
++            break;
++        case 3:
++            num_coeff += diag_scan2x2_inv[y_cg_last_sig][x_cg_last_sig] << 4;
++            scan_x_cg = diag_scan2x2_x;
++            scan_y_cg = diag_scan2x2_y;
++            break;
++        case 4:
++            num_coeff += diag_scan4x4_inv[y_cg_last_sig][x_cg_last_sig] << 4;
++            scan_x_cg = ff_hevc_rpi_diag_scan4x4_x;
++            scan_y_cg = ff_hevc_rpi_diag_scan4x4_y;
++            break;
++        case 5:
++        default:
++            num_coeff += diag_scan8x8_inv[y_cg_last_sig][x_cg_last_sig] << 4;
++            scan_x_cg = ff_hevc_rpi_diag_scan8x8_x;
++            scan_y_cg = ff_hevc_rpi_diag_scan8x8_y;
++            break;
++        }
++        break;
++    }
++    case SCAN_HORIZ:
++        scan_x_cg = horiz_scan2x2_x;
++        scan_y_cg = horiz_scan2x2_y;
++        num_coeff = horiz_scan8x8_inv[last_significant_coeff_y][last_significant_coeff_x];
++        break;
++    default: //SCAN_VERT
++        scan_x_cg = horiz_scan2x2_y;
++        scan_y_cg = horiz_scan2x2_x;
++        num_coeff = horiz_scan8x8_inv[last_significant_coeff_x][last_significant_coeff_y];
++        break;
++    }
++    num_coeff++;
++    num_last_subset = (num_coeff - 1) >> 4;
++
++    significant_coeff_group_flag[y_cg_last_sig] = 1 << x_cg_last_sig; // 1st subset always significant
++
++    {
++        const unsigned int ccount = 1 << (log2_trafo_size * 2);
++        const int special = trans_skip_or_bypass /* || lc->tu.cross_pf */;  // These need special processing
++        use_vpu = 0;
++        use_dc = (num_coeff == 1) && !special &&
++            !(lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2);
++
++        if (use_dc) {
++            // Just need a little empty space
++            coeffs = dummy_coeffs;
++            // No need to clear
++        }
++        else
++        {
++            use_vpu = !special && log2_trafo_size >= 4;
++#if RPI_COMPRESS_COEFFS
++            use_compress = use_vpu && lc->jb0->coeffs.s[log2_trafo_size - 2].packed;
++#endif
++            coeffs = rpi_alloc_coeff_buf(lc->jb0, !use_vpu ? 0 : log2_trafo_size - 2, ccount);
++#if RPI_COMPRESS_COEFFS
++            coeffs32 = (int*)coeffs;
++            if (!use_compress)
++#endif
++#if HAVE_NEON
++            rpi_zap_coeff_vals_neon(coeffs, log2_trafo_size - 2);
++#else
++            memset(coeffs, 0, ccount * sizeof(int16_t));
++#endif
++        }
++    }
++
++    i = num_last_subset;
++    do {
++        int implicit_non_zero_coeff = 0;
++        int n_end;
++
++        uint8_t significant_coeff_flag_idx[16];
++        unsigned int nb_significant_coeff_flag = 0;
++
++        if (i == num_last_subset) {
++            // First time through
++            int last_scan_pos = num_coeff - (i << 4) - 1;
++            n_end = last_scan_pos - 1;
++            significant_coeff_flag_idx[0] = last_scan_pos;
++            nb_significant_coeff_flag = 1;
++        } else {
++            n_end = 15;
++            implicit_non_zero_coeff = (i != 0);
++        }
++
++        if (n_end >= 0) {
++            static const uint8_t ctx_idx_maps_ts2[3][16] = {
++                D4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8), // log2_trafo_size == 2
++                H4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8), // log2_trafo_size == 2
++                V4x4(0, 1, 4, 5, 2, 3, 4, 5, 6, 6, 8, 8, 7, 7, 8, 8)  // log2_trafo_size == 2
++            };
++            // N.B. prev_sig = Right * 2 + Down
++            static const uint8_t ctx_idx_maps[3][4][16] = {
++                {
++                    D4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0
++                    D4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 1
++                    D4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 2
++                    D4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)  // prev_sig == 3, default
++                },
++                {
++                    H4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0
++                    H4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 1
++                    H4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 2
++                    H4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)  // prev_sig == 3, default
++                },
++                {
++                    V4x4(1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 0
++                    V4x4(2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0, 2, 1, 0, 0), // prev_sig == 1
++                    V4x4(2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0), // prev_sig == 2
++                    V4x4(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2)  // prev_sig == 3, default
++                }
++            };
++            const uint8_t *ctx_idx_map_p;
++            int scf_offset = 0;
++
++            if (s->ps.sps->transform_skip_context_enabled_flag && trans_skip_or_bypass) {
++                ctx_idx_map_p = ctx_idx_maps[0][3];
++                scf_offset = 40 + c_idx_nz;
++            } else {
++                if (c_idx_nz != 0)
++                    scf_offset = 27;
++
++                if (log2_trafo_size == 2) {
++                    ctx_idx_map_p = ctx_idx_maps_ts2[scan_idx];
++                } else {
++                    ctx_idx_map_p = ctx_idx_maps[scan_idx][prev_sig];
++                    if (!c_idx_nz) {
++                        if (i != 0)
++                            scf_offset += 3;
++
++                        if (log2_trafo_size == 3) {
++                            scf_offset += (scan_idx == SCAN_DIAG) ? 9 : 15;
++                        } else {
++                            scf_offset += 21;
++                        }
++                    } else {
++                        if (log2_trafo_size == 3)
++                            scf_offset += 9;
++                        else
++                            scf_offset += 12;
++                    }
++                }
++            }
++
++            if (n_end > 0) {
++                int cnt = get_sig_coeff_flag_idxs(&lc->cc,
++                    lc->cabac_state + elem_offset[SIGNIFICANT_COEFF_FLAG] + scf_offset,
++                    n_end, ctx_idx_map_p,
++                    significant_coeff_flag_idx + nb_significant_coeff_flag);
++
++                nb_significant_coeff_flag += cnt;
++                if (cnt != 0) {
++                    implicit_non_zero_coeff = 0;
++                }
++            }
++
++            if (implicit_non_zero_coeff == 0) {
++                if (s->ps.sps->transform_skip_context_enabled_flag && trans_skip_or_bypass) {
++                    scf_offset = 42 + c_idx_nz;
++                } else {
++                    if (i == 0) {
++                        scf_offset = c_idx_nz ? 27 : 0;
++                    } else {
++                        scf_offset = 2 + scf_offset;
++                    }
++                }
++                if (significant_coeff_flag_decode_0(lc, scf_offset) == 1) {
++                    significant_coeff_flag_idx[nb_significant_coeff_flag] = 0;
++                    nb_significant_coeff_flag++;
++                }
++            } else {
++                significant_coeff_flag_idx[nb_significant_coeff_flag] = 0;
++                nb_significant_coeff_flag++;
++            }
++        }
++#if RPI_COMPRESS_COEFFS
++        if (use_compress && (nb_significant_coeff_flag + num_nonzero + 1 >= (1<<(2*log2_trafo_size-1)))) { // Overflow when half-full!
++          int16_t temp[32*32];
++          const unsigned int ccount = 1 << (log2_trafo_size * 2);
++          lc->jb0->coeffs.s[log2_trafo_size - 2].packed = 0;
++          lc->jb0->coeffs.s[log2_trafo_size - 2].packed_n = lc->jb0->coeffs.s[log2_trafo_size - 2].n - ccount; // Don't want to unpack the last buffer
++          memcpy(temp, coeffs, sizeof(int)*num_nonzero);
++          coeffs32 = (int *)temp;
++          memset(coeffs, 0, ccount * sizeof(int16_t));
++          num_nonzero--;
++          while (num_nonzero >= 0) {
++            const unsigned int res = coeffs32[num_nonzero];
++            const unsigned int offset = res & 0xffff;
++            coeffs[ offset ] = res >> 16;
++            num_nonzero--;
++          }
++          use_compress = 0;
++        }
++#endif
++
++        if (nb_significant_coeff_flag != 0) {
++            const unsigned int gt1_idx_delta = (c_idx_nz << 2) |
++                ((i != 0 && !c_idx_nz) ? 2 : 0) |
++                prev_subset_coded;
++            const unsigned int idx0_gt1 = elem_offset[COEFF_ABS_LEVEL_GREATER1_FLAG] +
++                (gt1_idx_delta << 2);
++            const unsigned int idx_gt2 = elem_offset[COEFF_ABS_LEVEL_GREATER2_FLAG] +
++                gt1_idx_delta;
++
++            const unsigned int x_cg = scan_x_cg[i];
++            const unsigned int y_cg = scan_y_cg[i];
++            int16_t * const blk_coeffs = coeffs +
++                ((x_cg + (y_cg << log2_trafo_size)) << 2);
++            // This calculation is 'wrong' for log2_traffo_size == 2
++            // but that doesn't matter as in this case x_cg & y_cg
++            // are always 0 so result is correct (0) anyway
++            const uint8_t * const blk_scale = scale_matrix +
++                (((x_cg + (y_cg << 3)) << (5 - log2_trafo_size)));
++
++            // * The following code block doesn't deal with these flags:
++            //   (nor did the one it replaces)
++            //
++            // cabac_bypass_alignment_enabled_flag
++            //    This should be easy but I can't find a test case
++            // extended_precision_processing_flag
++            //    This can extend the required precision past 16bits
++            //    so is probably tricky - also no example found yet
++
++#if USE_N_END_1
++            if (nb_significant_coeff_flag == 1) {
++                // There is a small gain to be had from special casing the single
++                // transform coefficient case.  The reduction in complexity
++                // makes up for the code duplicatioon.
++
++                int trans_coeff_level = 1;
++                int coeff_sign_flag;
++                int coded_val = 0;
++
++                // initialize first elem of coeff_bas_level_greater1_flag
++                prev_subset_coded = 0;
++
++                if (get_cabac(&lc->cc, lc->cabac_state + idx0_gt1 + 1)) {
++                    trans_coeff_level = 2;
++                    prev_subset_coded = 1;
++                    coded_val = get_cabac(&lc->cc, lc->cabac_state + idx_gt2);
++                }
++
++                // Probably not worth the overhead of starting by22 for just one value
++                coeff_sign_flag = get_cabac_bypass(&lc->cc);
++
++                if (coded_val)
++                {
++                    if (!s->ps.sps->persistent_rice_adaptation_enabled_flag) {
++                        trans_coeff_level = 3 + coeff_abs_level_remaining_decode(&lc->cc, 0);
++                    } else {
++                        uint8_t * const stat_coeff =
++                            lc->stat_coeff + trans_skip_or_bypass + 2 - ((c_idx_nz) << 1);
++                        const unsigned int c_rice_param = *stat_coeff >> 2;
++                        const int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode(&lc->cc, c_rice_param);
++
++                        trans_coeff_level = 3 + last_coeff_abs_level_remaining;
++                        update_rice(stat_coeff, last_coeff_abs_level_remaining, c_rice_param);
++                    }
++                }
++
++                {
++                    const xy_off_t * const xy_off = scan_xy_off + significant_coeff_flag_idx[0];
++                    const int k = (int32_t)(coeff_sign_flag << 31) >> 31;
++                    const unsigned int scale_m = blk_scale[xy_off->scale];
++                    const int res = trans_scale_sat(
++                        (trans_coeff_level ^ k) - k,  // Apply sign
++                        scale,
++                        i == 0 && xy_off->coeff == 0 ? dc_scale : scale_m,
++                        shift);
++#if RPI_COMPRESS_COEFFS
++                      if (use_compress)
++                        coeffs32[num_nonzero++] = (res<<16) + (&blk_coeffs[xy_off->coeff] - coeffs);
++                      else
++#endif
++                      blk_coeffs[xy_off->coeff] = res;
++                }
++            }
++            else
++#endif
++            {
++                int sign_hidden = may_hide_sign;
++                int levels[16]; // Should be able to get away with int16_t but that fails some tests
++                uint32_t coeff_sign_flags;
++                uint32_t coded_vals = 0;
++                // Sum(abs(level[]))
++                // In fact we only need the bottom bit and in some future
++                // version that may be all we calculate
++                unsigned int sum_abs;
++
++                coded_vals = get_greaterx_bits(lc, nb_significant_coeff_flag, levels,
++                    &prev_subset_coded, &sum_abs, idx0_gt1, idx_gt2);
++
++                if (significant_coeff_flag_idx[0] - significant_coeff_flag_idx[nb_significant_coeff_flag - 1] <= 3)
++                    sign_hidden = 0;
++
++                // -- Start bypass block
++
++                bypass_start(&lc->cc);
++
++                coeff_sign_flags = coeff_sign_flag_decode_bypass(&lc->cc, nb_significant_coeff_flag - sign_hidden);
++
++                if (coded_vals != 0)
++                {
++                    const int rice_adaptation_enabled = s->ps.sps->persistent_rice_adaptation_enabled_flag;
++                    uint8_t * stat_coeff = !rice_adaptation_enabled ? NULL :
++                        lc->stat_coeff + trans_skip_or_bypass + 2 - ((c_idx_nz) << 1);
++                    int c_rice_param = !rice_adaptation_enabled ? 0 : *stat_coeff >> 2;
++                    int * level = levels - 1;
++
++                    do {
++                        {
++                            const unsigned int z = hevc_clz32(coded_vals) + 1;
++                            level += z;
++                            coded_vals <<= z;
++                        }
++
++                        {
++                            const int last_coeff_abs_level_remaining = coeff_abs_level_remaining_decode_bypass(&lc->cc, c_rice_param);
++                            const int trans_coeff_level = *level + last_coeff_abs_level_remaining + 1;
++
++                            sum_abs += last_coeff_abs_level_remaining + 1;
++                            *level = trans_coeff_level;
++
++                            if (stat_coeff != NULL)
++                                update_rice(stat_coeff, last_coeff_abs_level_remaining, c_rice_param);
++                            stat_coeff = NULL;
++
++                            if (trans_coeff_level > (3 << c_rice_param) &&
++                                (c_rice_param < 4 || rice_adaptation_enabled))
++                                ++c_rice_param;
++                        }
++                    } while (coded_vals != 0);
++                }
++
++                // sign_hidden = 0 or 1 so we can combine the tests
++                if ((sign_hidden & sum_abs) != 0) {
++                    levels[nb_significant_coeff_flag - 1] = -levels[nb_significant_coeff_flag - 1];
++                }
++
++                bypass_finish(&lc->cc);
++
++                // -- Finish bypass block
++
++                // Scale loop
++                {
++                    int m = nb_significant_coeff_flag - 1;
++
++                    // Deal with DC component (if any) first
++                    if (i == 0 && significant_coeff_flag_idx[m] == 0)
++                    {
++                        const int k = (int32_t)(coeff_sign_flags << m) >> 31;
++                        const int res = trans_scale_sat(
++                            (levels[m] ^ k) - k, scale, dc_scale, shift);
++#if RPI_COMPRESS_COEFFS
++                        if (use_compress)
++                        {
++                            coeffs32[num_nonzero++] = (res<<16) + (blk_coeffs - coeffs);
++                        }
++                        else
++#endif
++                        {
++                            blk_coeffs[0] = res;
++                        }
++                        --m;
++                    }
++
++#if !USE_N_END_1
++                    // If N_END_1 set then m was at least 1 initially
++                    if (m >= 0)
++#endif
++                    {
++                        do {
++                            const xy_off_t * const xy_off = scan_xy_off +
++                                significant_coeff_flag_idx[m];
++                            const int k = (int32_t)(coeff_sign_flags << m) >> 31;
++                            const int res = trans_scale_sat(
++                                (levels[m] ^ k) - k,
++                                scale,
++                                blk_scale[xy_off->scale],
++                                shift);
++#if RPI_COMPRESS_COEFFS
++                            if (use_compress) {
++                              coeffs32[num_nonzero++] = (res<<16) + (&blk_coeffs[xy_off->coeff] - coeffs);
++                            } else
++#endif
++                              blk_coeffs[xy_off->coeff] = res;
++                        } while (--m >= 0);
++                    }
++                }
++
++            }
++        }
++    } while ((i = next_subset(lc, i, c_idx_nz,
++                              significant_coeff_group_flag, scan_x_cg, scan_y_cg, &prev_sig)) >= 0 &&
++             !cabac_overflow(&lc->cc));
++
++    if (lc->cu.cu_transquant_bypass_flag) {
++        if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag &&
++                                    (pred_mode_intra == 10 || pred_mode_intra == 26))) {
++            int mode = s->ps.sps->implicit_rdpcm_enabled_flag ? (pred_mode_intra == 26) : explicit_rdpcm_dir_flag;
++
++            s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
++        }
++    } else {
++        if (trans_skip_or_bypass) { // Must be trans_skip as we've already dealt with bypass
++            int rot = s->ps.sps->transform_skip_rotation_enabled_flag &&
++                      log2_trafo_size == 2 &&
++                      lc->cu.pred_mode == MODE_INTRA;
++            if (rot) {
++                for (i = 0; i < 8; i++)
++                    FFSWAP(int16_t, coeffs[i], coeffs[16 - i - 1]);
++            }
++
++            s->hevcdsp.dequant(coeffs, log2_trafo_size);
++
++            if (explicit_rdpcm_flag || (s->ps.sps->implicit_rdpcm_enabled_flag &&
++                                        lc->cu.pred_mode == MODE_INTRA &&
++                                        (pred_mode_intra == 10 || pred_mode_intra == 26))) {
++                int mode = explicit_rdpcm_flag ? explicit_rdpcm_dir_flag : (pred_mode_intra == 26);
++
++                s->hevcdsp.transform_rdpcm(coeffs, log2_trafo_size, mode);
++            }
++        } else if (lc->cu.pred_mode == MODE_INTRA && c_idx == 0 && log2_trafo_size == 2) {
++            s->hevcdsp.transform_4x4_luma(coeffs);
++        }
++        else if (!use_vpu)
++        {
++            int max_xy = FFMAX(last_significant_coeff_x, last_significant_coeff_y);
++            if (max_xy == 0)
++            {
++                if (use_dc)
++                    rpi_add_dc(s, lc->jb0, log2_trafo_size, c_idx, x0, y0, coeffs);
++                else
++                    s->hevcdsp.idct_dc[log2_trafo_size - 2](coeffs);
++            }
++            else {
++                int col_limit = last_significant_coeff_x + last_significant_coeff_y + 4;
++                if (max_xy < 4)
++                    col_limit = FFMIN(4, col_limit);
++                else if (max_xy < 8)
++                    col_limit = FFMIN(8, col_limit);
++                else if (max_xy < 12)
++                    col_limit = FFMIN(24, col_limit);
++                s->hevcdsp.idct[log2_trafo_size - 2](coeffs, col_limit);
++            }
++        }
++    }
++
++#if 0
++    // Mildly rotted - we support no mode where cross is valid
++    if (lc->tu.cross_pf) {
++        int16_t * const coeffs_y = (int16_t*)lc->edge_emu_buffer;
++        const int ccount = 1 << (log2_trafo_size * 2);
++
++        for (i = 0; i < ccount; i++) {
++            coeffs[i] = coeffs[i] + ((lc->tu.res_scale_val * coeffs_y[i]) >> 3);
++        }
++    }
++#endif
++
++    if (!use_dc) {
++#if RPI_COMPRESS_COEFFS
++        if (use_compress) {
++          coeffs32[num_nonzero] = 0;
++        }
++#endif
++        rpi_add_residual(s, lc->jb0, log2_trafo_size, c_idx, x0, y0, coeffs);
++    }
++}
++
++#if !USE_BY22
++// Stores results to lc
++MvXY ff_hevc_rpi_hls_mvd_coding(HEVCRpiLocalContext * const lc)
++{
++    int x = abs_mvd_greater0_flag_decode(lc);
++    int y = abs_mvd_greater0_flag_decode(lc);
++
++    if (x)
++        x += abs_mvd_greater1_flag_decode(lc);
++    if (y)
++        y += abs_mvd_greater1_flag_decode(lc);
++
++    switch (x) {
++    case 2: x = mvd_decode(lc);           break;
++    case 1: x = mvd_sign_flag_decode(lc); break;
++    case 0: x = 0;                       break;
++    }
++
++    switch (y) {
++    case 2: y = mvd_decode(lc);           break;
++    case 1: y = mvd_sign_flag_decode(lc); break;
++    case 0: y = 0;                       break;
++    }
++    return MV_XY(x,y);
++}
++#else
++MvXY ff_hevc_rpi_hls_mvd_coding(HEVCRpiLocalContext * const lc)
++{
++    int x = abs_mvd_greater0_flag_decode(lc);
++    int y = abs_mvd_greater0_flag_decode(lc);
++
++    if ((x | y) == 0)
++        return 0;
++
++    if (x != 0)
++        x += abs_mvd_greater1_flag_decode(lc);
++    if (y != 0)
++        y += abs_mvd_greater1_flag_decode(lc);
++
++    if ((x | y) == 1)
++    {
++        // Not worth starting BY22
++        if (x != 0)
++            x = mvd_sign_flag_decode(lc);
++        if (y != 0)
++            y = mvd_sign_flag_decode(lc);
++    }
++    else
++    {
++        CABACContext * const cc = &lc->cc;
++        uint32_t val;
++        uint32_t b;
++        unsigned int n = 0;
++
++        bypass_start(cc);
++        b = val = get_cabac_by22_peek(cc);
++
++        if (x == 1) {
++            x = ((int32_t)b >> 31) | 1;
++            n = 1;
++            b <<= 1;
++        }
++        else if (x == 2) {
++            // EG1 so we have (leading one bits + 1) of suffix
++            // This makes prefix & suffix lengths the same
++            const unsigned int k = hevc_clz32(~b) + 1;
++            int s;
++
++            av_assert2(k <= 15);
++
++            b <<= k;
++            n = 2 * k + 1; // Includes suffix & sign
++
++            // We need to have k*2 + 2 (prefix, suffix, sign, y-sign) bits peeked
++            // if we are going to do this without a flush
++            if (k > CABAC_BY22_PEEK_BITS / 2 - 1)
++            {
++                // Need too many bits - flush
++                // n = k
++                get_cabac_by22_flush(cc, k, val);
++                b = val = get_cabac_by22_peek(cc);
++                n = k + 1;
++            }
++
++            x = (b >> (32 - k)) + (1 << k);
++            b <<= k;
++            s = (int32_t)b >> 31;
++            x = (x ^ s) - s;
++            b <<= 1;
++
++            // Max abs value of an mv is 2^15 - 1 (i.e. a prefix len of 15 bits)
++            if (y > 1 && n > CABAC_BY22_PEEK_BITS - 15)
++            {
++                get_cabac_by22_flush(cc, n, val);
++                b = val = get_cabac_by22_peek(cc);
++                n = 0;
++            }
++        }
++
++        if (y == 1) {
++            y = ((int32_t)b >> 31) | 1;
++            ++n;
++            // don't care about b anymore
++        }
++        else if (y == 2) {
++            const unsigned int k = hevc_clz32(~b) + 1;
++            int s;
++
++            av_assert2(k <= 15);
++
++            // We need to have k*2 + 1 (prefix, suffix, sign) bits peeked
++            // if we are going to do this without a flush
++            b <<= k;
++            n += 2 * k + 1;
++
++            if (n > CABAC_BY22_PEEK_BITS)
++            {
++                // Need too many bits - flush
++                get_cabac_by22_flush(cc, n - (k + 1), val);
++                b = val = get_cabac_by22_peek(cc);
++                n = k + 1;
++            }
++
++            y = (b >> (32 - k)) + (1 << k);
++            s = (int32_t)(b << k) >> 31;
++            y = (y ^ s) - s;
++            // don't care about b anymore
++        }
++
++        get_cabac_by22_flush(cc, n, val);
++        bypass_finish(cc);
++    }
++
++    return MV_XY(x, y);
++}
++#endif
+--- /dev/null
++++ b/libavcodec/rpi_hevc_cabac_fns.h
+@@ -0,0 +1,217 @@
++/*
++ * HEVC CABAC decoding
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2012 - 2013 Gildas Cocherel
++ * Copyright (C) 2012 - 2013 Gildas Cocherel
++ * Copyright (C) 2018 John Cox
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++#ifndef AVCODEC_RPI_HEVC_CABAC_FNS_H
++#define AVCODEC_RPI_HEVC_CABAC_FNS_H
++
++#include "config.h"
++#include "rpi_hevcdec.h"
++
++void ff_hevc_rpi_save_states(HEVCRpiContext *s, const HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_cabac_init_decoder(HEVCRpiLocalContext * const lc);
++void ff_hevc_rpi_cabac_init(const HEVCRpiContext * const s, HEVCRpiLocalContext *const lc, const unsigned int ctb_flags);
++int ff_hevc_rpi_sao_type_idx_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_sao_band_position_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_sao_offset_abs_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_sao_offset_sign_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_sao_eo_class_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_part_mode_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int log2_cb_size);
++int ff_hevc_rpi_mpm_idx_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_rem_intra_luma_pred_mode_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_intra_chroma_pred_mode_decode(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_merge_idx_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_inter_pred_idc_decode(HEVCRpiLocalContext * const lc, int nPbW, int nPbH);
++int ff_hevc_rpi_ref_idx_lx_decode(HEVCRpiLocalContext * const lc, const int num_ref_idx_lx);
++int ff_hevc_rpi_log2_res_scale_abs(HEVCRpiLocalContext * const lc, const int idx);
++
++//int ff_hevc_rpi_cu_qp_delta_sign_flag(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_cu_qp_delta(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_cu_chroma_qp_offset_idx(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc);
++void ff_hevc_rpi_hls_residual_coding(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                const int x0, const int y0,
++                                const int log2_trafo_size, const enum ScanType scan_idx,
++                                const int c_idx);
++
++MvXY ff_hevc_rpi_hls_mvd_coding(HEVCRpiLocalContext * const lc);
++int ff_hevc_rpi_cabac_overflow(const HEVCRpiLocalContext * const lc);
++
++#define HEVC_BIN_SAO_MERGE_FLAG                         0
++#define HEVC_BIN_SAO_TYPE_IDX                           1
++#define HEVC_BIN_SAO_EO_CLASS                           2
++#define HEVC_BIN_SAO_BAND_POSITION                      2
++#define HEVC_BIN_SAO_OFFSET_ABS                         2
++#define HEVC_BIN_SAO_OFFSET_SIGN                        2
++#define HEVC_BIN_END_OF_SLICE_FLAG                      2
++#define HEVC_BIN_SPLIT_CODING_UNIT_FLAG                 2
++#define HEVC_BIN_CU_TRANSQUANT_BYPASS_FLAG              5
++#define HEVC_BIN_SKIP_FLAG                              6
++#define HEVC_BIN_CU_QP_DELTA                            9
++#define HEVC_BIN_PRED_MODE                              12
++#define HEVC_BIN_PART_MODE                              13
++#define HEVC_BIN_PCM_FLAG                               17
++#define HEVC_BIN_PREV_INTRA_LUMA_PRED_MODE              17
++#define HEVC_BIN_MPM_IDX                                18
++#define HEVC_BIN_REM_INTRA_LUMA_PRED_MODE               18
++#define HEVC_BIN_INTRA_CHROMA_PRED_MODE                 18
++#define HEVC_BIN_MERGE_FLAG                             20
++#define HEVC_BIN_MERGE_IDX                              21
++#define HEVC_BIN_INTER_PRED_IDC                         22
++#define HEVC_BIN_REF_IDX_L0                             27
++#define HEVC_BIN_REF_IDX_L1                             29
++#define HEVC_BIN_ABS_MVD_GREATER0_FLAG                  31
++#define HEVC_BIN_ABS_MVD_GREATER1_FLAG                  33
++#define HEVC_BIN_ABS_MVD_MINUS2                         35
++#define HEVC_BIN_MVD_SIGN_FLAG                          35
++#define HEVC_BIN_MVP_LX_FLAG                            35
++#define HEVC_BIN_NO_RESIDUAL_DATA_FLAG                  36
++#define HEVC_BIN_SPLIT_TRANSFORM_FLAG                   37
++#define HEVC_BIN_CBF_LUMA                               40
++#define HEVC_BIN_CBF_CB_CR                              42
++#define HEVC_BIN_TRANSFORM_SKIP_FLAG                    46
++#define HEVC_BIN_EXPLICIT_RDPCM_FLAG                    48
++#define HEVC_BIN_EXPLICIT_RDPCM_DIR_FLAG                50
++#define HEVC_BIN_LAST_SIGNIFICANT_COEFF_X_PREFIX        52
++#define HEVC_BIN_LAST_SIGNIFICANT_COEFF_Y_PREFIX        70
++#define HEVC_BIN_LAST_SIGNIFICANT_COEFF_X_SUFFIX        88
++#define HEVC_BIN_LAST_SIGNIFICANT_COEFF_Y_SUFFIX        88
++#define HEVC_BIN_SIGNIFICANT_COEFF_GROUP_FLAG           88
++#define HEVC_BIN_SIGNIFICANT_COEFF_FLAG                 92
++#define HEVC_BIN_COEFF_ABS_LEVEL_GREATER1_FLAG          136
++#define HEVC_BIN_COEFF_ABS_LEVEL_GREATER2_FLAG          160
++#define HEVC_BIN_COEFF_ABS_LEVEL_REMAINING              166
++#define HEVC_BIN_COEFF_SIGN_FLAG                        166
++#define HEVC_BIN_LOG2_RES_SCALE_ABS                     166
++#define HEVC_BIN_RES_SCALE_SIGN_FLAG                    174
++#define HEVC_BIN_CU_CHROMA_QP_OFFSET_FLAG               176
++#define HEVC_BIN_CU_CHROMA_QP_OFFSET_IDX                177
++
++
++int ff_hevc_rpi_get_cabac(CABACContext * const c, uint8_t * const state);
++int ff_hevc_rpi_get_cabac_terminate(CABACContext * const c);
++
++static inline const uint8_t* ff_hevc_rpi_cabac_skip_bytes(CABACContext * const c, int n) {
++    const uint8_t *ptr = c->bytestream;
++
++    if (c->low & 0x1)
++        ptr--;
++#if CABAC_BITS == 16
++    if (c->low & 0x1FF)
++        ptr--;
++#endif
++    if ((int) (c->bytestream_end - ptr) < n)
++        return NULL;
++    if (ff_init_cabac_decoder(c, ptr + n, c->bytestream_end - ptr - n) < 0)
++        return NULL;
++
++    return ptr;
++}
++
++static inline int ff_hevc_rpi_sao_merge_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_SAO_MERGE_FLAG);
++}
++
++static inline int ff_hevc_rpi_cu_transquant_bypass_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CU_TRANSQUANT_BYPASS_FLAG);
++}
++
++static inline int ff_hevc_rpi_cu_chroma_qp_offset_flag(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CU_CHROMA_QP_OFFSET_FLAG);
++}
++
++static inline int ff_hevc_rpi_split_coding_unit_flag_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                                            const unsigned int ct_depth,
++                                                            const unsigned int x0, const unsigned int y0)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_SPLIT_CODING_UNIT_FLAG +
++                                 ((s->cabac_stash_left[y0 >> 3] >> 1) > ct_depth) +
++                                 ((s->cabac_stash_up[x0 >> 3] >> 1) > ct_depth));
++}
++
++static inline int ff_hevc_rpi_skip_flag_decode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                             const int x0, const int y0, const int x_cb, const int y_cb)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_SKIP_FLAG +
++                                 (s->cabac_stash_left[y0 >> 3] & 1) +
++                                 (s->cabac_stash_up[x0 >> 3] & 1));
++}
++
++static inline int ff_hevc_rpi_pred_mode_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_PRED_MODE);
++}
++
++static inline int ff_hevc_rpi_pcm_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac_terminate(&lc->cc);
++}
++
++static inline int ff_hevc_rpi_prev_intra_luma_pred_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_PREV_INTRA_LUMA_PRED_MODE);
++}
++
++static inline int ff_hevc_rpi_merge_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_MERGE_FLAG);
++}
++
++static inline int ff_hevc_rpi_mvp_lx_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_MVP_LX_FLAG);
++}
++
++static inline int ff_hevc_rpi_no_residual_syntax_flag_decode(HEVCRpiLocalContext * const lc)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_NO_RESIDUAL_DATA_FLAG);
++}
++
++static inline int ff_hevc_rpi_cbf_cb_cr_decode(HEVCRpiLocalContext * const lc, const int trafo_depth)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CBF_CB_CR + trafo_depth);
++}
++
++static inline int ff_hevc_rpi_cbf_luma_decode(HEVCRpiLocalContext * const lc, const int trafo_depth)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_CBF_LUMA + !trafo_depth);
++}
++
++static inline int ff_hevc_rpi_split_transform_flag_decode(HEVCRpiLocalContext * const lc, const int log2_trafo_size)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_SPLIT_TRANSFORM_FLAG + 5 - log2_trafo_size);
++}
++
++static inline int ff_hevc_rpi_res_scale_sign_flag(HEVCRpiLocalContext *const lc, const int idx)
++{
++    return ff_hevc_rpi_get_cabac(&lc->cc, lc->cabac_state + HEVC_BIN_RES_SCALE_SIGN_FLAG + idx);
++}
++
++
++
++#endif
++
+--- /dev/null
++++ b/libavcodec/rpi_hevc_data.c
+@@ -0,0 +1,75 @@
++/*
++ * HEVC shared tables
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include <stdint.h>
++
++#include "rpi_hevc_data.h"
++
++const uint8_t ff_hevc_rpi_diag_scan4x4_x[16] = {
++    0, 0, 1, 0,
++    1, 2, 0, 1,
++    2, 3, 1, 2,
++    3, 2, 3, 3,
++};
++
++const uint8_t ff_hevc_rpi_diag_scan4x4_y[16] = {
++    0, 1, 0, 2,
++    1, 0, 3, 2,
++    1, 0, 3, 2,
++    1, 3, 2, 3,
++};
++
++const uint8_t ff_hevc_rpi_diag_scan8x8_x[64] = {
++    0, 0, 1, 0,
++    1, 2, 0, 1,
++    2, 3, 0, 1,
++    2, 3, 4, 0,
++    1, 2, 3, 4,
++    5, 0, 1, 2,
++    3, 4, 5, 6,
++    0, 1, 2, 3,
++    4, 5, 6, 7,
++    1, 2, 3, 4,
++    5, 6, 7, 2,
++    3, 4, 5, 6,
++    7, 3, 4, 5,
++    6, 7, 4, 5,
++    6, 7, 5, 6,
++    7, 6, 7, 7,
++};
++
++const uint8_t ff_hevc_rpi_diag_scan8x8_y[64] = {
++    0, 1, 0, 2,
++    1, 0, 3, 2,
++    1, 0, 4, 3,
++    2, 1, 0, 5,
++    4, 3, 2, 1,
++    0, 6, 5, 4,
++    3, 2, 1, 0,
++    7, 6, 5, 4,
++    3, 2, 1, 0,
++    7, 6, 5, 4,
++    3, 2, 1, 7,
++    6, 5, 4, 3,
++    2, 7, 6, 5,
++    4, 3, 7, 6,
++    5, 4, 7, 6,
++    5, 7, 6, 7,
++};
+--- /dev/null
++++ b/libavcodec/rpi_hevc_data.h
+@@ -0,0 +1,31 @@
++/*
++ * HEVC shared data tables
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_RPI_HEVC_DATA_H
++#define AVCODEC_RPI_HEVC_DATA_H
++
++#include <stdint.h>
++
++extern const uint8_t ff_hevc_rpi_diag_scan4x4_x[16];
++extern const uint8_t ff_hevc_rpi_diag_scan4x4_y[16];
++extern const uint8_t ff_hevc_rpi_diag_scan8x8_x[64];
++extern const uint8_t ff_hevc_rpi_diag_scan8x8_y[64];
++
++#endif /* AVCODEC_RPI_HEVC_DATA_H */
+--- /dev/null
++++ b/libavcodec/rpi_hevc_filter.c
+@@ -0,0 +1,1210 @@
++/*
++ * HEVC video decoder
++ *
++ * Originally by:
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2013 Seppo Tomperi
++ * Copyright (C) 2013 Wassim Hamidouche
++ *
++ * Substantially rewritten:
++ * Copyright (C) 2018 John Cox, Ben Avison for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++//#define DISABLE_SAO
++//#define DISABLE_DEBLOCK
++//#define DISABLE_STRENGTHS
++// define DISABLE_DEBLOCK_NONREF for a 6% speed boost (by skipping deblocking on unimportant frames)
++//#define DISABLE_DEBLOCK_NONREF
++
++#include "libavutil/common.h"
++#include "libavutil/internal.h"
++
++#include "rpi_hevcdec.h"
++
++#include "bit_depth_template.c"
++
++#include "rpi_qpu.h"
++#include "rpi_zc.h"
++#include "libavutil/rpi_sand_fns.h"
++
++#define LUMA 0
++#define CB 1
++#define CR 2
++
++// tcoffset: -12,12; qp: 0,51; (bs-1)*2: 0,2
++// so -12,75 overall
++static const uint8_t tctablex[] = {
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  // -ve quant padding
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,                          // -12..-1
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  0,  0,  0,  0, 0, 0, 1, // QP  0...18
++    1, 1, 1, 1, 1, 1, 1,  1,  2,  2,  2,  2,  3,  3,  3,  3, 4, 4, 4, // QP 19...37
++    5, 5, 6, 6, 7, 8, 9, 10, 11, 13, 14, 16, 18, 20, 22, 24,          // QP 38...53
++    24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24                    // 54..75
++};
++#define tctable (tctablex + 12 + 6*8)
++
++static const uint8_t betatablex[] = {
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,  // -ve quant padding
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,
++
++    0, 0, 0, 0, 0, 0, 0,  0,  0,  0,  0,  0,                          // -12..-1
++     0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  6,  7,  8, // QP 0...18
++     9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, // QP 19...37
++    38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60, 62, 64,                      // QP 38...51
++    64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64                    // 52..73
++};
++#define betatable (betatablex + 12 + 6*8)
++
++static inline int chroma_tc(const HEVCRpiContext * const s, const int qp_y,
++                            const int c_idx, const int tc_offset)
++{
++    return tctable[(int)s->ps.pps->qp_dblk_x[c_idx][qp_y] + tc_offset + 2];
++}
++
++static inline int get_qPy_pred(const HEVCRpiContext * const s, const HEVCRpiLocalContext * const lc,
++                               const unsigned int xBase, const unsigned int yBase)
++{
++    const unsigned int ctb_size_mask        = (1 << s->ps.sps->log2_ctb_size) - 1;
++    const unsigned int MinCuQpDeltaSizeMask = ~0U << s->ps.pps->log2_min_cu_qp_delta_size;
++    const unsigned int xQgBase              = xBase & MinCuQpDeltaSizeMask;
++    const unsigned int yQgBase              = yBase & MinCuQpDeltaSizeMask;
++    const unsigned int min_cb_width         = s->ps.sps->min_cb_width;
++    const unsigned int x_cb                 = xQgBase >> s->ps.sps->log2_min_cb_size;
++    const unsigned int y_cb                 = yQgBase >> s->ps.sps->log2_min_cb_size;
++    const int qPy_pred = lc->qPy_pred;
++
++    return (((xQgBase & ctb_size_mask) == 0 ? qPy_pred :
++             s->qp_y_tab[(x_cb - 1) + y_cb * min_cb_width]) +
++            ((yQgBase & ctb_size_mask) == 0 ? qPy_pred :
++             s->qp_y_tab[x_cb + (y_cb - 1) * min_cb_width]) + 1) >> 1;
++}
++
++// * Only called from bitstream decode in foreground
++//   so should be safe
++void ff_hevc_rpi_set_qPy(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int xBase, int yBase)
++{
++    const int qp_y = get_qPy_pred(s, lc, xBase, yBase);
++
++    if (lc->tu.cu_qp_delta != 0) {
++        // ?? I suspect that the -bd_offset here leads to us adding it elsewhere
++        int off = s->ps.sps->qp_bd_offset;
++        lc->qp_y = FFUMOD(qp_y + lc->tu.cu_qp_delta + 52 + 2 * off,
++                                 52 + off) - off;
++    } else
++        lc->qp_y = qp_y;
++}
++
++static inline unsigned int pixel_shift(const HEVCRpiContext * const s, const unsigned int c_idx)
++{
++    return c_idx != 0 ? 1 + s->ps.sps->pixel_shift : s->ps.sps->pixel_shift;
++}
++
++// "DSP" these?
++static void copy_pixel(uint8_t *dst, const uint8_t *src, int pixel_shift)
++{
++    switch (pixel_shift)
++    {
++        case 2:
++            *(uint32_t *)dst = *(uint32_t *)src;
++            break;
++        case 1:
++            *(uint16_t *)dst = *(uint16_t *)src;
++            break;
++        default:
++            *dst = *src;
++            break;
++    }
++}
++
++static void copy_CTB_to_hv(const HEVCRpiContext * const s, const uint8_t * const src,
++                           ptrdiff_t stride_src, int x, int y, int width, int height,
++                           int c_idx, int x_ctb, int y_ctb)
++{
++    const unsigned int sh = pixel_shift(s, c_idx);
++    const unsigned int w = s->ps.sps->width >> ctx_hshift(s, c_idx);
++    const unsigned int h = s->ps.sps->height >> ctx_vshift(s, c_idx);
++
++    /* copy horizontal edges */
++    memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb) * w + x) << sh),
++        src, width << sh);
++    memcpy(s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 1) * w + x) << sh),
++        src + stride_src * (height - 1), width << sh);
++
++    /* copy vertical edges */
++    ff_hevc_rpi_copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb) * h + y) << sh), src, sh, height, 1 << sh, stride_src);
++
++    ff_hevc_rpi_copy_vert(s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 1) * h + y) << sh), src + ((width - 1) << sh), sh, height, 1 << sh, stride_src);
++}
++
++// N.B. Src & dst are swapped as this is a restore!
++// x0 & y0 are in luma coords
++// Width & height are in Y/C pels as appropriate
++// * Clear scope for optimsation here but not used enough to be worth it
++static void restore_tqb_pixels(const HEVCRpiContext * const s,
++                               uint8_t *src1, const uint8_t *dst1,
++                               const ptrdiff_t stride_src, const ptrdiff_t stride_dst,
++                               const unsigned int x0, const unsigned int y0,
++                               const unsigned int width, const int height,
++                               const int c_idx)
++{
++    if (s->ps.pps->transquant_bypass_enable_flag ||
++        s->ps.sps->pcm.loop_filter_disable_flag)
++    {
++        const uint8_t *pcm = s->is_pcm + (x0 >> 6) + (y0 >> 3) * s->ps.sps->pcm_width;
++        int blks_y = height >> (c_idx == 0 ? 3 : 2);
++        const unsigned int bwidth = 8 << s->ps.sps->pixel_shift;  // Y & C have the same width in sand
++        const unsigned int bheight = (c_idx == 0) ? 8 : 4;
++        const unsigned int sh = ((x0 >> 3) & 7);
++        const unsigned int mask = (1 << (width >> (c_idx == 0 ? 3 : 2))) - 1;
++
++        do {
++            unsigned int m = (*pcm >> sh) & mask;
++            uint8_t * bd = src1;
++            const uint8_t * bs = dst1;
++            while (m != 0) {
++                if ((m & 1) != 0) {
++                    s->hevcdsp.cpy_blk(bd, stride_src, bs, stride_dst, bwidth, bheight);
++                }
++                m >>= 1;
++                bs += bwidth;
++                bd += bwidth;
++            }
++            src1 += stride_src * bheight;
++            dst1 += stride_dst * bheight;
++            pcm += s->ps.sps->pcm_width;
++        } while (--blks_y > 0);
++    }
++}
++
++#define CTB(tab, x, y) ((tab)[(y) * s->ps.sps->ctb_width + (x)])
++
++static void sao_filter_CTB(const HEVCRpiContext * const s, const int x, const int y)
++{
++#if SAO_FILTER_N == 5
++    static const uint8_t sao_tab[8] = { 0 /* 8 */, 1 /* 16 */, 2 /* 24 */, 2 /* 32 */, 3, 3 /* 48 */, 4, 4 /* 64 */};
++#elif SAO_FILTER_N == 6
++    static const uint8_t sao_tab[8] = { 0 /* 8 */, 1 /* 16 */, 5 /* 24 */, 2 /* 32 */, 3, 3 /* 48 */, 4, 4 /* 64 */};
++#else
++#error Confused by size of sao fn array
++#endif
++    int c_idx;
++    int edges[4];  // 0 left 1 top 2 right 3 bottom
++    int x_ctb                = x >> s->ps.sps->log2_ctb_size;
++    int y_ctb                = y >> s->ps.sps->log2_ctb_size;
++    int ctb_addr_rs          = y_ctb * s->ps.sps->ctb_width + x_ctb;
++    int ctb_addr_ts          = s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs];
++    RpiSAOParams *sao           = &CTB(s->sao, x_ctb, y_ctb);
++    // flags indicating unfilterable edges
++    uint8_t vert_edge[]      = { 0, 0 };
++    uint8_t horiz_edge[]     = { 0, 0 };
++    uint8_t diag_edge[]      = { 0, 0, 0, 0 };
++    uint8_t lfase            = CTB(s->filter_slice_edges, x_ctb, y_ctb);
++    uint8_t no_tile_filter   = s->ps.pps->tiles_enabled_flag &&
++                               !s->ps.pps->loop_filter_across_tiles_enabled_flag;
++    uint8_t restore          = no_tile_filter || !lfase;
++    uint8_t left_tile_edge   = 0;
++    uint8_t right_tile_edge  = 0;
++    uint8_t up_tile_edge     = 0;
++    uint8_t bottom_tile_edge = 0;
++    const int sliced = 1;
++    const int plane_count = sliced ? 2 : (ctx_cfmt(s) != 0 ? 3 : 1);
++
++    edges[0]   = x_ctb == 0;
++    edges[1]   = y_ctb == 0;
++    edges[2]   = x_ctb == s->ps.sps->ctb_width  - 1;
++    edges[3]   = y_ctb == s->ps.sps->ctb_height - 1;
++
++#ifdef DISABLE_SAO
++    return;
++#endif
++
++    if (restore) {
++        if (!edges[0]) {
++            left_tile_edge  = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs-1]];
++            vert_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb)) || left_tile_edge;
++        }
++        if (!edges[2]) {
++            right_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs+1]];
++            vert_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb)) || right_tile_edge;
++        }
++        if (!edges[1]) {
++            up_tile_edge     = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs - s->ps.sps->ctb_width]];
++            horiz_edge[0]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb - 1)) || up_tile_edge;
++        }
++        if (!edges[3]) {
++            bottom_tile_edge = no_tile_filter && s->ps.pps->tile_id[ctb_addr_ts] != s->ps.pps->tile_id[s->ps.pps->ctb_addr_rs_to_ts[ctb_addr_rs + s->ps.sps->ctb_width]];
++            horiz_edge[1]    = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb, y_ctb + 1)) || bottom_tile_edge;
++        }
++        if (!edges[0] && !edges[1]) {
++            diag_edge[0] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb - 1)) || left_tile_edge || up_tile_edge;
++        }
++        if (!edges[1] && !edges[2]) {
++            diag_edge[1] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb - 1)) || right_tile_edge || up_tile_edge;
++        }
++        if (!edges[2] && !edges[3]) {
++            diag_edge[2] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb + 1, y_ctb + 1)) || right_tile_edge || bottom_tile_edge;
++        }
++        if (!edges[0] && !edges[3]) {
++            diag_edge[3] = (!lfase && CTB(s->tab_slice_address, x_ctb, y_ctb) != CTB(s->tab_slice_address, x_ctb - 1, y_ctb + 1)) || left_tile_edge || bottom_tile_edge;
++        }
++    }
++
++    for (c_idx = 0; c_idx < plane_count; c_idx++) {
++        const unsigned int vshift = ctx_vshift(s, c_idx);
++        const unsigned int hshift = ctx_hshift(s, c_idx);
++        const int x0 = x >> hshift;
++        const int y0 = y >> vshift;
++        const ptrdiff_t stride_src = frame_stride1(s->frame, c_idx);
++        const int ctb_size_h = (1 << (s->ps.sps->log2_ctb_size)) >> hshift;
++        const int ctb_size_v = (1 << (s->ps.sps->log2_ctb_size)) >> vshift;
++        const int width    = FFMIN(ctb_size_h, (s->ps.sps->width  >> hshift) - x0);
++        const int height = FFMIN(ctb_size_v, (s->ps.sps->height >> vshift) - y0);
++        int tab      = sao_tab[(FFALIGN(width, 8) >> 3) - 1];
++        ptrdiff_t stride_dst;
++        uint8_t *dst;
++
++        const unsigned int sh = s->ps.sps->pixel_shift + (sliced && c_idx != 0);
++        const int wants_lr = sao->type_idx[c_idx] == SAO_EDGE && sao->eo_class[c_idx] != 1 /* Vertical */;
++        uint8_t * const src = !sliced ?
++                &s->frame->data[c_idx][y0 * stride_src + (x0 << sh)] :
++            c_idx == 0 ?
++                av_rpi_sand_frame_pos_y(s->frame, x0, y0) :
++                av_rpi_sand_frame_pos_c(s->frame, x0, y0);
++        const uint8_t * const src_l = edges[0] || !wants_lr ? NULL :
++            !sliced ? src - (1 << sh) :
++            c_idx == 0 ?
++                av_rpi_sand_frame_pos_y(s->frame, x0 - 1, y0) :
++                av_rpi_sand_frame_pos_c(s->frame, x0 - 1, y0);
++        const uint8_t * const src_r = edges[2] || !wants_lr ? NULL :
++            !sliced ? src + (width << sh) :
++            c_idx == 0 ?
++                av_rpi_sand_frame_pos_y(s->frame, x0 + width, y0) :
++                av_rpi_sand_frame_pos_c(s->frame, x0 + width, y0);
++
++        if (sliced && c_idx > 1) {
++            break;
++        }
++
++//        if (c_idx == 1)
++//            printf("%d: %dx%d %d,%d: lr=%d\n", c_idx, width, height, x0, y0, wants_lr);
++
++        switch (sao->type_idx[c_idx]) {
++        case SAO_BAND:
++            copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
++                           x_ctb, y_ctb);
++            if (s->ps.pps->transquant_bypass_enable_flag ||
++                s->ps.sps->pcm.loop_filter_disable_flag)
++            {
++                // Can't use the edge buffer here as it may be in use by the foreground
++                DECLARE_ALIGNED(64, uint8_t, dstbuf)
++                    [2*MAX_PB_SIZE*MAX_PB_SIZE];
++                dst = dstbuf;
++                stride_dst = 2*MAX_PB_SIZE;
++                s->hevcdsp.cpy_blk(dst, stride_dst, src, stride_src, width << sh, height);
++                if (sliced && c_idx != 0)
++                {
++                    s->hevcdsp.sao_band_filter_c[tab](src, dst, stride_src, stride_dst,
++                                                    sao->offset_val[1], sao->band_position[1],
++                                                    sao->offset_val[2], sao->band_position[2],
++                                                    width, height);
++                }
++                else
++                {
++                    s->hevcdsp.sao_band_filter[tab](src, dst, stride_src, stride_dst,
++                                                    sao->offset_val[c_idx], sao->band_position[c_idx],
++                                                    width, height);
++                }
++                restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
++                                   x, y, width, height, c_idx);
++            } else {
++                if (sliced && c_idx != 0)
++                {
++                    s->hevcdsp.sao_band_filter_c[tab](src, src, stride_src, stride_src,
++                                                    sao->offset_val[1], sao->band_position[1],
++                                                    sao->offset_val[2], sao->band_position[2],
++                                                    width, height);
++                }
++                else
++                {
++                    s->hevcdsp.sao_band_filter[tab](src, src, stride_src, stride_src,
++                                                    sao->offset_val[c_idx], sao->band_position[c_idx],
++                                                    width, height);
++                }
++            }
++            sao->type_idx[c_idx] = SAO_APPLIED;
++            break;
++        case SAO_EDGE:
++        {
++            const int w = s->ps.sps->width >> hshift;
++            const int h = s->ps.sps->height >> vshift;
++            int top_edge = edges[1];
++            int bottom_edge = edges[3];
++            // Can't use the edge buffer here as it may be in use by the foreground
++            DECLARE_ALIGNED(64, uint8_t, dstbuf)
++                [RPI_HEVC_SAO_BUF_STRIDE * (MAX_PB_SIZE + 2) + 64];
++
++            stride_dst = RPI_HEVC_SAO_BUF_STRIDE;
++            dst = dstbuf + stride_dst + 32;
++
++            if (!top_edge) {
++                uint8_t *dst1;
++                int src_idx;
++                const uint8_t * const src_spb = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb - 1) * w + x0) << sh);
++
++                dst1 = dst - stride_dst;
++
++                if (src_l != NULL) {
++                    src_idx = (CTB(s->sao, x_ctb-1, y_ctb-1).type_idx[c_idx] ==
++                               SAO_APPLIED);
++                    copy_pixel(dst1 - (1 << sh), src_idx ? src_spb - (1 << sh) : src_l - stride_src, sh);
++                }
++
++                src_idx = (CTB(s->sao, x_ctb, y_ctb-1).type_idx[c_idx] ==
++                           SAO_APPLIED);
++                memcpy(dst1, src_idx ? src_spb : src - stride_src, width << sh);
++
++                if (src_r != NULL) {
++                    src_idx = (CTB(s->sao, x_ctb+1, y_ctb-1).type_idx[c_idx] ==
++                               SAO_APPLIED);
++                    copy_pixel(dst1 + (width << sh), src_idx ? src_spb + (width << sh) : src_r - stride_src, sh);
++                }
++            }
++            if (!bottom_edge) {
++                uint8_t * const dst1 = dst + height * stride_dst;
++                int src_idx;
++                const uint8_t * const src_spb = s->sao_pixel_buffer_h[c_idx] + (((2 * y_ctb + 2) * w + x0) << sh);
++                const unsigned int hoff = height * stride_src;
++
++                if (src_l != NULL) {
++                    src_idx = (CTB(s->sao, x_ctb-1, y_ctb+1).type_idx[c_idx] ==
++                               SAO_APPLIED);
++                    copy_pixel(dst1 - (1 << sh), src_idx ? src_spb - (1 << sh) : src_l + hoff, sh);
++                }
++
++                src_idx = (CTB(s->sao, x_ctb, y_ctb+1).type_idx[c_idx] ==
++                           SAO_APPLIED);
++                memcpy(dst1, src_idx ? src_spb : src + hoff, width << sh);
++
++                if (src_r != NULL) {
++                    src_idx = (CTB(s->sao, x_ctb+1, y_ctb+1).type_idx[c_idx] ==
++                               SAO_APPLIED);
++                    copy_pixel(dst1 + (width << sh), src_idx ? src_spb + (width << sh) : src_r + hoff, sh);
++                }
++            }
++            if (src_l != NULL) {
++                if (CTB(s->sao, x_ctb-1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
++                    ff_hevc_rpi_copy_vert(dst - (1 << sh),
++                              s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb - 1) * h + y0) << sh),
++                              sh, height, stride_dst, 1 << sh);
++                } else {
++                    ff_hevc_rpi_copy_vert(dst - (1 << sh),
++                              src_l,
++                              sh, height, stride_dst, stride_src);
++                }
++            }
++            if (src_r != NULL) {
++                if (CTB(s->sao, x_ctb+1, y_ctb).type_idx[c_idx] == SAO_APPLIED) {
++                    ff_hevc_rpi_copy_vert(dst + (width << sh),
++                              s->sao_pixel_buffer_v[c_idx] + (((2 * x_ctb + 2) * h + y0) << sh),
++                              sh, height, stride_dst, 1 << sh);
++                } else {
++                    ff_hevc_rpi_copy_vert(dst + (width << sh),
++                              src_r,
++                              sh, height, stride_dst, stride_src);
++                }
++            }
++
++            s->hevcdsp.cpy_blk(dst, stride_dst, src, stride_src, width << sh, height);
++
++            copy_CTB_to_hv(s, src, stride_src, x0, y0, width, height, c_idx,
++                           x_ctb, y_ctb);
++            if (sliced && c_idx != 0)
++            {
++                // Class always the same for both U & V (which is just as well :-))
++                s->hevcdsp.sao_edge_filter_c[tab](src, dst, stride_src,
++                                                sao->offset_val[1], sao->offset_val[2], sao->eo_class[1],
++                                                width, height);
++                s->hevcdsp.sao_edge_restore_c[restore](src, dst,
++                                                    stride_src, stride_dst,
++                                                    sao,
++                                                    edges, width,
++                                                    height, c_idx,
++                                                    vert_edge,
++                                                    horiz_edge,
++                                                    diag_edge);
++            }
++            else
++            {
++                s->hevcdsp.sao_edge_filter[tab](src, dst, stride_src, sao->offset_val[c_idx],
++                                                sao->eo_class[c_idx], width, height);
++                s->hevcdsp.sao_edge_restore[restore](src, dst,
++                                                    stride_src, stride_dst,
++                                                    sao,
++                                                    edges, width,
++                                                    height, c_idx,
++                                                    vert_edge,
++                                                    horiz_edge,
++                                                    diag_edge);
++            }
++            restore_tqb_pixels(s, src, dst, stride_src, stride_dst,
++                               x, y, width, height, c_idx);
++            sao->type_idx[c_idx] = SAO_APPLIED;
++            break;
++        }
++        }
++    }
++
++#if RPI_ZC_SAND_8_IN_10_BUF
++    if (s->frame->format == AV_PIX_FMT_SAND64_10 && s->frame->buf[RPI_ZC_SAND_8_IN_10_BUF] != NULL &&
++        (((x + (1 << (s->ps.sps->log2_ctb_size))) & 255) == 0 || edges[2]))
++    {
++        const unsigned int stride1 = frame_stride1(s->frame, 1);
++        const unsigned int stride2 = av_rpi_sand_frame_stride2(s->frame);
++        const unsigned int xoff = (x >> 8) * stride2 * stride1;
++        const unsigned int ctb_size = (1 << s->ps.sps->log2_ctb_size);
++        const uint8_t * const sy = s->frame->data[0] + xoff * 4 + y * stride1;
++        uint8_t * const dy = s->frame->buf[4]->data + xoff * 2 + y * stride1;
++        const uint8_t * const sc = s->frame->data[1] + xoff * 4 + (y >> 1) * stride1;
++        uint8_t * const dc = s->frame->buf[4]->data + (s->frame->data[1] - s->frame->data[0]) + xoff * 2 + (y >> 1) * stride1;
++        const unsigned int wy = !edges[2] ? 256 : s->ps.sps->width - (x & ~255);
++        const unsigned int hy = !edges[3] ? ctb_size : s->ps.sps->height - y;
++
++//        printf("dy=%p/%p, stride1=%d, stride2=%d, sy=%p/%p, wy=%d, hy=%d, x=%d, y=%d, cs=%d\n", dy, dc, stride1, stride2, sy, sc, wy, hy, x, y, ctb_size);
++        av_rpi_sand16_to_sand8(dy, stride1, stride2, sy, stride1, stride2, wy, hy, 3);
++        av_rpi_sand16_to_sand8(dc, stride1, stride2, sc, stride1, stride2, wy, hy >> 1, 3);
++    }
++#endif
++}
++
++// When bits are delivered to deblock we want them
++//#define TL 1
++//#define TR 2
++//#define BL 4
++//#define BR 8
++
++// pcm4 returns them as b0 = tl, b1 = tr, b16 = bl, b17 = br
++// so we need to rearrange before passing on
++
++static inline uint32_t pcm4(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y)
++{
++    const uint8_t * const pcm = s->is_pcm + (x >> 6) + (y >> 3) * s->ps.sps->pcm_width;
++    return (pcm[0] |
++        (pcm[1] << 8) |
++        (pcm[s->ps.sps->pcm_width] << 16) |
++        (pcm[s->ps.sps->pcm_width + 1] << 24)) >> ((x >> 3) & 7);
++}
++
++static inline uint32_t pcm2(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y)
++{
++    const uint8_t * const pcm = s->is_pcm + (x >> 6) + (y >> 3) * s->ps.sps->pcm_width;
++    return (pcm[0] | (pcm[1] << 8)) >> ((x >> 3) & 7);
++}
++
++// We cast away const here as we want this to work for both get and set
++static inline uint32_t * bs_ptr32(const uint8_t * bs, const unsigned int stride2, const unsigned int x, const unsigned int y)
++{
++    return (uint32_t *)(bs +
++#if (~3U & (HEVC_RPI_BS_STRIDE1_PEL_MASK >> HEVC_RPI_BS_PELS_PER_BYTE_SHIFT)) != 0
++#warning Unexpected masks
++        // As it happens we end up with stride1 = sizeof(uint32_t) so this expr vanishes
++        ((x >> HEVC_RPI_BS_PELS_PER_BYTE_SHIFT) &
++            (~3 & (HEVC_RPI_BS_STRIDE1_PEL_MASK >> HEVC_RPI_BS_PELS_PER_BYTE_SHIFT))) +
++#elif HEVC_RPI_BS_STRIDE1_BYTES < 4
++#error Stride1 < return size
++#endif
++        ((y >> HEVC_RPI_BS_Y_SHR) << HEVC_RPI_BS_STRIDE1_BYTE_SHIFT) +
++        (x >> HEVC_RPI_BS_STRIDE1_PEL_SHIFT) * stride2);
++}
++
++static inline uint8_t * bs_ptr8(const uint8_t * bs, const unsigned int stride2, const unsigned int x, const unsigned int y)
++{
++    return (uint8_t *)(bs +
++        ((x >> HEVC_RPI_BS_PELS_PER_BYTE_SHIFT) &
++            (HEVC_RPI_BS_STRIDE1_PEL_MASK >> HEVC_RPI_BS_PELS_PER_BYTE_SHIFT)) +
++        ((y >> HEVC_RPI_BS_Y_SHR) << HEVC_RPI_BS_STRIDE1_BYTE_SHIFT) +
++        (x >> HEVC_RPI_BS_STRIDE1_PEL_SHIFT) * stride2);
++}
++
++
++// Get block strength
++// Given how we call we will always get within the 32bit boundries
++static inline uint32_t bs_get32(const uint8_t * bs, unsigned int stride2,
++                                unsigned int xl, unsigned int xr, const unsigned int y)
++{
++    if (xr <= xl) {
++        return 0;
++    }
++    else
++    {
++#if HAVE_ARMV6T2_INLINE
++#if (~3U & (HEVC_RPI_BS_STRIDE1_PEL_MASK >> HEVC_RPI_BS_PELS_PER_BYTE_SHIFT)) != 0
++#error This case not yet handled in bs_get32
++#elif HEVC_RPI_BS_STRIDE1_BYTES < 4
++#error Stride1 < return size
++#endif
++        uint32_t tmp;
++        __asm__ (
++            "lsr         %[tmp], %[xl], %[xl_shift]                  \n\t"
++            "rsb         %[xr], %[xl], %[xr]                         \n\t"
++            "mla         %[stride2], %[stride2], %[tmp], %[bs]       \n\t"
++            "add         %[xr], %[xr], #7                            \n\t"
++            "lsr         %[bs], %[y], %[y_shift1]                    \n\t"
++            "bic         %[xr], %[xr], #7                            \n\t"
++            "ubfx        %[xl], %[xl], #1, #5                        \n\t"
++            "lsr         %[xr], %[xr], #1                            \n\t"
++            "cmp         %[xr], #32                                  \n\t"
++            "mvn         %[tmp], #0                                  \n\t"
++            "ldr         %[bs], [%[stride2], %[bs], lsl %[y_shift2]] \n\t"
++            "lsl         %[tmp], %[tmp], %[xr]                       \n\t"
++            "lsr         %[xl], %[bs], %[xl]                         \n\t"
++            "it ne                                                   \n\t"
++            "bicne       %[bs], %[xl], %[tmp]                        \n\t"
++            :  // Outputs
++                      [bs]"+r"(bs),
++                 [stride2]"+r"(stride2),
++                      [xl]"+r"(xl),
++                      [xr]"+r"(xr),
++                     [tmp]"=&r"(tmp)
++            :  // Inputs
++                       [y]"r"(y),
++                [xl_shift]"M"(HEVC_RPI_BS_STRIDE1_PEL_SHIFT),
++                [y_shift1]"M"(HEVC_RPI_BS_Y_SHR),
++                [y_shift2]"M"(HEVC_RPI_BS_STRIDE1_BYTE_SHIFT)
++            :  // Clobbers
++                "cc"
++        );
++        return (uint32_t) bs;
++#else
++        const uint32_t a = *bs_ptr32(bs, stride2, xl, y);
++        const unsigned int n = ((xr - xl + 7) & ~7) >> 1;
++
++        return n == 32 ? a :
++            (a >> ((xl >> 1) & 31)) & ~(~0U << n);
++#endif
++    }
++}
++
++static inline uint32_t hbs_get32(const HEVCRpiContext * const s, const unsigned int xl, const unsigned int xr, const unsigned int y)
++{
++    av_assert2(((xl ^ (xr - 1)) >> s->ps.sps->log2_ctb_size) == 0);
++    return bs_get32(s->bs_horizontal, s->bs_stride2, xl, xr, y);
++}
++
++static inline uint32_t vbs_get32(const HEVCRpiContext * const s, const unsigned int xl, const unsigned int xr, const unsigned int y)
++{
++    av_assert2(((xl ^ (xr - 1)) >> s->ps.sps->log2_ctb_size) == 0);
++    return bs_get32(s->bs_vertical, s->bs_stride2, xl, xr, y);
++}
++
++
++static void deblock_y_blk(const HEVCRpiContext * const s, const RpiBlk bounds, const int end_x, const int end_y)
++{
++    const unsigned int log2_ctb_size = s->ps.sps->log2_ctb_size;
++    const unsigned int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
++    const unsigned int ctb_size = (1 << log2_ctb_size);
++    const unsigned int cb_r = bounds.x + bounds.w - (end_x ? 0 :  1);
++    const unsigned int ctb_n = (bounds.x + bounds.y * s->ps.sps->ctb_width) >> log2_ctb_size;
++    const DBParams * cb_dbp = s->deblock + ctb_n;
++    const unsigned int b_b = bounds.y + bounds.h - (end_y ? 0 : 8);
++
++    unsigned int cb_x;
++
++    // Do in CTB-shaped blocks
++    for (cb_x = bounds.x; cb_x < cb_r; cb_x += ctb_size, ++cb_dbp)
++    {
++        const unsigned int bv_r = FFMIN(cb_x + ctb_size, cb_r);
++        const unsigned int bv_l = FFMAX(cb_x, 8);
++        const unsigned int bh_r = cb_x + ctb_size >= cb_r ? cb_r - 8 : cb_x + ctb_size - 9;
++        const unsigned int bh_l = bv_l - 8;
++        unsigned int y;
++
++        // Main body
++        for (y = (bounds.y == 0 ? 0 : bounds.y - 8); y < b_b; y += 8)
++        {
++            uint32_t vbs = vbs_get32(s, bv_l, bv_r, y);
++
++            const DBParams * const dbp = y < bounds.y ? cb_dbp - s->ps.sps->ctb_width : cb_dbp;
++            const int8_t * const qta = s->qp_y_tab + ((y - 1) >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++            const int8_t * const qtb = s->qp_y_tab + (y >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++
++            if (vbs != 0)
++            {
++                const uint8_t * const tcv = tctable + dbp->tc_offset;
++                const uint8_t * const betav = betatable + dbp->beta_offset;
++                unsigned int pcmfa = pcm2(s, bv_l - 1, y);
++                unsigned int x;
++
++                for (x = bv_l; vbs != 0; x += 8, vbs >>= 4, pcmfa >>= 1)
++                {
++                    if ((vbs & 0xf) != 0 && (pcmfa & 3) != 3)
++                    {
++                        const int qp = (qtb[(x - 1) >> log2_min_cb_size] + qtb[x >> log2_min_cb_size] + 1) >> 1;
++                        s->hevcdsp.hevc_v_loop_filter_luma2(av_rpi_sand_frame_pos_y(s->frame, x, y),
++                                                         frame_stride1(s->frame, LUMA),
++                                                         betav[qp],
++                                                         ((vbs & 3) == 0 ? 0 : tcv[qp + (int)(vbs & 2)]) |
++                                                          (((vbs & 0xc) == 0 ? 0 : tcv[qp + (int)((vbs >> 2) & 2)]) << 16),
++                                                         pcmfa & 3,
++                                                         av_rpi_sand_frame_pos_y(s->frame, x - 4, y));
++                    }
++                }
++            }
++
++            if (y != 0)
++            {
++                uint32_t hbs;
++
++                // H left - mostly separated out so we only need a uint32_t hbs
++                if ((hbs = hbs_get32(s, bh_l, cb_x, y)) != 0)
++                {
++                    const unsigned int x = bh_l;
++                    const unsigned int pcmfa = pcm4(s, bh_l, y - 1);
++                    const int qp = (qta[x >> log2_min_cb_size] + qtb[x >> log2_min_cb_size] + 1) >> 1;
++                    const DBParams * const dbph = dbp - 1;
++                    const uint8_t * const tc = tctable + dbph->tc_offset + qp;
++
++                    av_assert2(cb_x - bh_l == 8);
++
++                    s->hevcdsp.hevc_h_loop_filter_luma2(av_rpi_sand_frame_pos_y(s->frame, x, y),
++                                                         frame_stride1(s->frame, LUMA),
++                                                         betatable[qp + dbph->beta_offset],
++                                                         ((hbs & 3) == 0 ? 0 : tc[hbs & 2]) |
++                                                            (((hbs & 0xc) == 0 ? 0 : tc[(hbs >> 2) & 2]) << 16),
++                                                         (pcmfa & 1) | ((pcmfa & 0x10000) >> 15));
++                }
++
++                // H
++                if ((hbs = hbs_get32(s, cb_x, bh_r + 1, y)) != 0)  // Will give (x <= bh_r) in for loop
++                {
++                    unsigned int x;
++                    unsigned int pcmfa = pcm4(s, cb_x, y - 1);
++
++                    for (x = cb_x; hbs != 0; x += 8, hbs >>= 4, pcmfa >>= 1)
++                    {
++                        if ((hbs & 0xf) != 0 && (~pcmfa & 0x10001) != 0)
++                        {
++                            const int qp = (qta[x >> log2_min_cb_size] + qtb[x >> log2_min_cb_size] + 1) >> 1;
++                            const uint8_t * const tc = tctable + dbp->tc_offset + qp;
++                            s->hevcdsp.hevc_h_loop_filter_luma2(av_rpi_sand_frame_pos_y(s->frame, x, y),
++                                                                frame_stride1(s->frame, LUMA),
++                                                                betatable[qp + dbp->beta_offset],
++                                                                ((hbs & 3) == 0 ? 0 : tc[hbs & 2]) |
++                                                                   (((hbs & 0xc) == 0 ? 0 : tc[(hbs >> 2) & 2]) << 16),
++                                                                (pcmfa & 1) | ((pcmfa & 0x10000) >> 15));
++                        }
++                    }
++                }
++            }
++
++        }
++    }
++}
++
++static av_always_inline int q2h(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y)
++{
++    const unsigned int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
++    const int8_t * const qt = s->qp_y_tab + (y >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++    return (qt[(x - 1) >> log2_min_cb_size] + qt[x >> log2_min_cb_size] + 1) >> 1;
++}
++
++static void deblock_uv_blk(const HEVCRpiContext * const s, const RpiBlk bounds, const int end_x, const int end_y)
++{
++    const unsigned int log2_ctb_size = s->ps.sps->log2_ctb_size;
++    const unsigned int log2_min_cb_size  = s->ps.sps->log2_min_cb_size;
++    const unsigned int ctb_size = (1 << log2_ctb_size);
++    const unsigned int cb_r = bounds.x + bounds.w - (end_x ? 0 :  8);
++    const unsigned int ctb_n = (bounds.x + bounds.y * s->ps.sps->ctb_width) >> log2_ctb_size;
++    const DBParams * dbp = s->deblock + ctb_n;
++    const unsigned int b_b = bounds.y + bounds.h - (end_y ? 0 : 8);
++    const uint8_t * const tcq_u = s->ps.pps->qp_dblk_x[1];
++    const uint8_t * const tcq_v = s->ps.pps->qp_dblk_x[2];
++
++    unsigned int cb_x;
++
++    av_assert1((bounds.x & (ctb_size - 1)) == 0);
++    av_assert1((bounds.y & (ctb_size - 1)) == 0);
++    av_assert1(bounds.h <= ctb_size);
++
++    // Do in CTB-shaped blocks
++    for (cb_x = bounds.x; cb_x < cb_r; cb_x += ctb_size, ++dbp) {
++        const unsigned int bv_r = FFMIN(cb_x + ctb_size, cb_r);
++        const unsigned int bv_l = FFMAX(cb_x, 16);
++        unsigned int y;
++
++        // V above
++        if (bounds.y != 0) {
++            // Deblock V up 8
++            // CTB above current
++            // Top-half only (tc4 & ~0xffff == 0) is special cased in asm
++            const unsigned int y = bounds.y - 8;
++            uint32_t vbs = vbs_get32(s, bv_l, bv_r, y) & 0x02020202U;
++
++            if (vbs != 0)
++            {
++                unsigned int pcmfa = pcm2(s, bv_l - 1, y);
++                const uint8_t * const tc = tctable + 2 + (dbp - s->ps.sps->ctb_width)->tc_offset;
++                unsigned int x;
++
++                for (x = bv_l; vbs != 0; x += 16, vbs >>= 8, pcmfa >>= 2)
++                {
++                    if ((vbs & 2) != 0 && (~pcmfa & 3) != 0)
++                    {
++                        const int qp0 = q2h(s, x, y);
++                        s->hevcdsp.hevc_v_loop_filter_uv2(av_rpi_sand_frame_pos_c(s->frame, x >> 1, y >> 1),
++                                                       frame_stride1(s->frame, 1),
++                                                       tc[tcq_u[qp0]] | (tc[tcq_v[qp0]] << 8),
++                                                       av_rpi_sand_frame_pos_c(s->frame, (x >> 1) - 2, y >> 1),
++                                                       pcmfa & 3);
++                    }
++                }
++            }
++        }
++
++        for (y = bounds.y; y < b_b; y += 16)
++        {
++            uint32_t vbs = (vbs_get32(s, bv_l, bv_r, y) & 0x02020202U) |
++                (y + 16 > b_b ? 0 : (vbs_get32(s, bv_l, bv_r, y + 8) & 0x02020202U) << 4);
++
++            // V
++            if (vbs != 0)
++            {
++                unsigned int x;
++                unsigned int pcmfa =
++                    (y + 16 > b_b ?
++                        pcm2(s, bv_l - 1, y) | 0xffff0000 :
++                        pcm4(s, bv_l - 1, y));
++                const uint8_t * const tc = tctable + 2 + dbp->tc_offset;
++
++                for (x = bv_l; vbs != 0; x += 16, vbs >>= 8, pcmfa >>= 2)
++                {
++                    if ((vbs & 0xff) != 0 && (~pcmfa & 0x30003) != 0)
++                    {
++                        const int qp0 = q2h(s, x, y);
++                        const int qp1 = q2h(s, x, y + 8);
++                        s->hevcdsp.hevc_v_loop_filter_uv2(av_rpi_sand_frame_pos_c(s->frame, x >> 1, y >> 1),
++                            frame_stride1(s->frame, 1),
++                            ((vbs & 2) == 0 ? 0 : (tc[tcq_u[qp0]] << 0) | (tc[tcq_v[qp0]] << 8)) |
++                                ((vbs & 0x20) == 0 ? 0 : (tc[tcq_u[qp1]] << 16) | (tc[tcq_v[qp1]] << 24)),
++                            av_rpi_sand_frame_pos_c(s->frame, (x >> 1) - 2, y >> 1),
++                            (pcmfa & 3) | ((pcmfa >> 14) & 0xc));
++                    }
++                }
++            }
++
++            // H
++            if (y != 0)
++            {
++                uint32_t hbs;
++                const unsigned int bh_l = bv_l - 16;
++                const unsigned int bh_r = cb_x + ctb_size >= cb_r ? cb_r : cb_x + ctb_size - 16;
++                const int8_t * const qta = s->qp_y_tab + ((y - 1) >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++                const int8_t * const qtb = s->qp_y_tab + (y >> log2_min_cb_size) * s->ps.sps->min_cb_width;
++
++                // H left - mostly separated out so we only need a uint32_t hbs
++                // Stub is width 8 to the left of bounds, but width 16 internally
++                if ((hbs = hbs_get32(s, bh_l, cb_x, y) & 0x22U) != 0)
++                {
++                    unsigned int pcmfa = pcm4(s, bh_l, y - 1);
++
++                    // Chop off bits we don't want...
++                    if (bh_l < bounds.x) {
++                        pcmfa |= 0x10001; // TL|BL pre rearrangement
++                        hbs &= ~3;  // Make BS 0
++                    }
++
++                    // Double check we still want this
++                    if (hbs != 0 && (~pcmfa & 0x30003) != 0)
++                    {
++                        const unsigned int x = bh_l;
++                        const int qp0 = (qta[x >> log2_min_cb_size] + qtb[x >> log2_min_cb_size] + 1) >> 1;
++                        const int qp1 = (qta[(x + 8) >> log2_min_cb_size] + qtb[(x + 8) >> log2_min_cb_size] + 1) >> 1;
++                        const uint8_t * const tc = tctable + 2 + (dbp - 1)->tc_offset;
++
++                        s->hevcdsp.hevc_h_loop_filter_uv(av_rpi_sand_frame_pos_c(s->frame, x >> 1, y >> 1),
++                            frame_stride1(s->frame, 1),
++                            ((hbs & 2) == 0 ? 0 : (tc[tcq_u[qp0]] << 0) | (tc[tcq_v[qp0]] << 8)) |
++                                ((hbs & 0x20) == 0 ? 0 : (tc[tcq_u[qp1]] << 16) | (tc[tcq_v[qp1]] << 24)),
++                            (pcmfa & 3) | ((pcmfa >> 14) & 0xc));
++                    }
++                }
++
++                // H main
++                if ((hbs = (hbs_get32(s, cb_x, bh_r, y) & 0x22222222U)) != 0)
++                {
++                    unsigned int x;
++                    unsigned int pcmfa = pcm4(s, cb_x, y - 1);  // Might like to mask out far right writes but probably not worth it
++
++                    for (x = cb_x; hbs != 0; x += 16, hbs >>= 8, pcmfa >>= 2)
++                    {
++                        if ((hbs & 0xff) != 0 && (~pcmfa & 0x30003) != 0)
++                        {
++                            const int qp0 = (qta[x >> log2_min_cb_size] + qtb[x >> log2_min_cb_size] + 1) >> 1;
++                            const int qp1 = (qta[(x + 8) >> log2_min_cb_size] + qtb[(x + 8) >> log2_min_cb_size] + 1) >> 1;
++                            const uint8_t * const tc = tctable + 2 + dbp->tc_offset;
++
++                            s->hevcdsp.hevc_h_loop_filter_uv(av_rpi_sand_frame_pos_c(s->frame, x >> 1, y >> 1),
++                                frame_stride1(s->frame, 1),
++                                ((hbs & 2) == 0 ? 0 : (tc[tcq_u[qp0]] << 0) | (tc[tcq_v[qp0]] << 8)) |
++                                    ((hbs & 0x20) == 0 ? 0 : (tc[tcq_u[qp1]] << 16) | (tc[tcq_v[qp1]] << 24)),
++                                (pcmfa & 3) | ((pcmfa >> 14) & 0xc));
++                        }
++                    }
++                }
++            }
++        }
++    }
++}
++
++static inline unsigned int off_boundary(const unsigned int x, const unsigned int log2_n)
++{
++    return x & ~(~0U << log2_n);
++}
++
++static inline void hbs_set(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y, const uint32_t mask, uint32_t bsf)
++{
++    av_assert2((y & 7) == 0);
++
++    // This doesn't have the same simultainious update issues that bsf_stash
++    // does (other threads will have a different y) so we can do it the easy way
++    if ((bsf &= mask) != 0)
++        *bs_ptr32(s->bs_horizontal, s->bs_stride2, x, y) |= bsf << ((x >> 1) & 31);
++}
++
++
++static void vbs_set(const HEVCRpiContext * const s, const unsigned int x, const unsigned int y, const uint32_t mask, uint32_t bsf)
++{
++    // We arrange this in a slightly odd fashion but it lines up with
++    // how we are going to use it in the actual deblock code & it is easier
++    // to do the contortions here than there
++    //
++    // Arrange (LE) {x0y0, x0y4, x8y0, x8,y4}, {x16y0, x16y4, x24y0, x24y4},...
++
++    av_assert2((x & 7) == 0);
++
++    if ((bsf &= mask) != 0)
++    {
++        uint8_t *p = bs_ptr8(s->bs_vertical, s->bs_stride2, x, y);
++        const unsigned int sh = ((x & 8) | (y & 4)) >> 1;
++
++        if (mask <= 0xf)
++        {
++            *p |= (bsf << sh);
++        }
++        else
++        {
++            do {
++                *p |= (bsf & 0xf) << sh;
++                p += HEVC_RPI_BS_STRIDE1_BYTES;
++            } while ((bsf >>= 4) != 0);
++        }
++    }
++}
++
++static inline uint32_t bsf_mv(const HEVCRpiContext * const s,
++                              const unsigned int rep, const unsigned int dup,
++                              const unsigned int mvf_stride0,
++                              const unsigned int mvf_stride1,
++                              const RefPicList * const rpl_p, const RefPicList * const rpl_q,
++                              const HEVCRpiMvField * const mvf_p, const HEVCRpiMvField * const mvf_q)
++{
++    return s->hevcdsp.hevc_deblocking_boundary_strengths(rep, dup,
++            mvf_p, mvf_q,
++            rpl_p[0].list, rpl_p[1].list, rpl_q[0].list, rpl_q[1].list,
++            sizeof(HEVCRpiMvField) * mvf_stride0, sizeof(HEVCRpiMvField) * mvf_stride1);
++}
++
++
++void ff_hevc_rpi_deblocking_boundary_strengths(const HEVCRpiContext * const s,
++                                               const HEVCRpiLocalContext * const lc,
++                                               const unsigned int x0, const unsigned int y0,
++                                               const unsigned int log2_trafo_size,
++                                               const int is_coded_block)
++{
++    const HEVCRpiMvField * const mvf_curr      = mvf_stash_ptr(s, lc, x0, y0);
++    const unsigned int log2_min_pu_size = LOG2_MIN_PU_SIZE;
++    const RefPicList * const rpl        = s->refPicList;
++    // Rep count for bsf_mv when running with min_pu chuncks
++    const unsigned int log2_rep_min_pu  = log2_trafo_size <= log2_min_pu_size ? 0 : log2_trafo_size - log2_min_pu_size;
++    const unsigned int boundary_flags   = s->sh.no_dblk_boundary_flags & lc->boundary_flags;
++    const unsigned int trafo_size       = (1U << log2_trafo_size);
++    const uint32_t bsf_mask             = log2_trafo_size > 5 ? ~0U : (1U << (trafo_size >> 1)) - 1;
++    const uint32_t bsf_cbf              = (bsf_mask & 0x55555555);
++
++    // Do we cover a pred split line?
++    const int has_x_split = x0 < lc->cu.x_split && x0 + trafo_size > lc->cu.x_split;
++    const int has_y_split = y0 < lc->cu.y_split && y0 + trafo_size > lc->cu.y_split;
++
++    uint32_t bsf_h;
++    uint32_t bsf_v;
++
++#ifdef DISABLE_STRENGTHS
++    return;
++#endif
++
++    // We are always on a size boundary
++    av_assert2((x0 & (trafo_size - 1)) == 0);
++    av_assert2((y0 & (trafo_size - 1)) == 0);
++    // log2_trafo_size not really a transform size; we can have to deal
++    // with size 2^6 blocks
++    av_assert2(log2_trafo_size >= 2 && log2_trafo_size <= 6);
++
++    // Retrieve and update coded (b0), intra (b1) bs flags
++    //
++    // Store on min width (rather than uint32_t) to avoid possible issues
++    // with another thread on another core running wpp using the same
++    // memory (min CTB = 16 pels = 4 bsf els = 8 bits)
++    //
++    // In bsf BS=2 is represented by 3 as it is much easier to test & set
++    // and the actual deblock code tests for 0 and b1 set/not-set so 2 and
++    // 3 will work the same
++    {
++        // Given where we are called from is_cbf_luma & is_intra will be constant over the block
++        const uint32_t bsf0 =  (lc->cu.pred_mode == MODE_INTRA) ? bsf_mask : is_coded_block ? bsf_cbf : 0;
++        uint8_t *const p = s->bsf_stash_up + (x0 >> 4);
++        uint8_t *const q = s->bsf_stash_left + (y0 >> 4);
++
++        switch (log2_trafo_size)
++        {
++            case 2:
++            case 3:
++            {
++                const unsigned int sh_h = (x0 >> 1) & 7;
++                const unsigned int sh_v = (y0 >> 1) & 7;
++                bsf_h = *p;
++                bsf_v = *q;
++                *p = (bsf_h & ~(bsf_mask << sh_h)) | (bsf0 << sh_h);
++                *q = (bsf_v & ~(bsf_mask << sh_v)) | (bsf0 << sh_v);
++                bsf_h >>= sh_h;
++                bsf_v >>= sh_v;
++                break;
++            }
++            case 4:
++                bsf_h = *p;
++                bsf_v = *q;
++                *p = bsf0;
++                *q = bsf0;
++                break;
++            case 5:
++                bsf_h = *(uint16_t *)p;
++                bsf_v = *(uint16_t *)q;
++                *(uint16_t *)p = bsf0;
++                *(uint16_t *)q = bsf0;
++                break;
++            case 6:
++            default:
++                bsf_h = *(uint32_t *)p;
++                bsf_v = *(uint32_t *)q;
++                *(uint32_t *)p = bsf0;
++                *(uint32_t *)q = bsf0;
++                break;
++        }
++
++        bsf_h |= bsf0;
++        bsf_v |= bsf0;
++    }
++
++    // Do Horizontal
++    if ((y0 & 7) == 0)
++    {
++        // Boundary upper
++        if (y0 != 0 &&
++            (off_boundary(y0, s->ps.sps->log2_ctb_size) ||
++             (boundary_flags & (BOUNDARY_UPPER_SLICE | BOUNDARY_UPPER_TILE)) == 0))
++        {
++            // Look at MVs (BS=1) if we don't already has a full set of bs bits
++            if ((~bsf_h & bsf_cbf) != 0 && (y0 == lc->cu.y || y0 == lc->cu.y_split))
++            {
++                // If we aren't on the top boundary we must be in the middle
++                // and in that case we know where mvf can change
++                const unsigned int log2_rep = (y0 == lc->cu.y) ? log2_rep_min_pu : has_x_split ? 1 : 0;
++                const RefPicList *const rpl_top = !off_boundary(y0, s->ps.sps->log2_ctb_size) ?
++                      s->rpl_up[x0 >> s->ps.sps->log2_ctb_size] :
++                      rpl;
++
++                bsf_h |= bsf_mv(s, 1 << log2_rep, trafo_size >> (2 + log2_rep),
++                    trafo_size >> (log2_min_pu_size + log2_rep),
++                    trafo_size >> (log2_min_pu_size + log2_rep),
++                    rpl, rpl_top,
++                    mvf_curr, mvf_ptr(s, lc, x0, y0, x0, y0 - 1));
++            }
++
++            // Finally put the results into bs
++            hbs_set(s, x0, y0, bsf_mask, bsf_h);
++        }
++
++        // Max of 1 pu internal split - ignore if not on 8pel boundary
++        if (has_y_split && !off_boundary(lc->cu.y_split, 3))
++        {
++            const HEVCRpiMvField * const mvf = mvf_stash_ptr(s, lc, x0, lc->cu.y_split);
++            // If we have the x split as well then it must be in the middle
++            const unsigned int log2_rep = has_x_split ? 1 : 0;
++
++            hbs_set(s, x0, lc->cu.y_split, bsf_mask,
++                bsf_mv(s, 1 << log2_rep, trafo_size >> (2 + log2_rep),
++                   trafo_size >> (log2_min_pu_size + log2_rep),
++                   trafo_size >> (log2_min_pu_size + log2_rep),
++                   rpl, rpl,
++                   mvf, mvf - MVF_STASH_WIDTH_PU));
++        }
++    }
++
++    // And again for vertical - same logic as horizontal just in the other direction
++    if ((x0 & 7) == 0)
++    {
++        // Boundary left
++        if (x0 != 0 &&
++            (off_boundary(x0, s->ps.sps->log2_ctb_size) ||
++             (boundary_flags & (BOUNDARY_LEFT_SLICE | BOUNDARY_LEFT_TILE)) == 0))
++        {
++            if ((~bsf_v & bsf_cbf) != 0 && (x0 == lc->cu.x || x0 == lc->cu.x_split))
++            {
++                const unsigned int log2_rep = (x0 == lc->cu.x) ? log2_rep_min_pu : has_y_split ? 1 : 0;
++                const RefPicList *const rpl_left = !off_boundary(x0, s->ps.sps->log2_ctb_size) ?
++                    s->rpl_left[y0 >> s->ps.sps->log2_ctb_size] :
++                    rpl;
++
++                bsf_v |= bsf_mv(s, 1 << log2_rep, trafo_size >> (2 + log2_rep),
++                    (MVF_STASH_WIDTH_PU << log2_trafo_size) >> (log2_min_pu_size + log2_rep),
++                    (mvf_left_stride(s, x0, x0 - 1) << log2_trafo_size) >> (log2_min_pu_size + log2_rep),
++                    rpl, rpl_left,
++                    mvf_curr, mvf_ptr(s, lc, x0, y0, x0 - 1, y0));
++            }
++
++            vbs_set(s, x0, y0, bsf_mask, bsf_v);
++        }
++
++        if (has_x_split && !off_boundary(lc->cu.x_split, 3))
++        {
++            const HEVCRpiMvField *const mvf = mvf_stash_ptr(s, lc, lc->cu.x_split, y0);
++            const unsigned int log2_rep = has_y_split ? 1 : 0;
++
++            vbs_set(s, lc->cu.x_split, y0, bsf_mask,
++                bsf_mv(s, 1 << log2_rep, trafo_size >> (2 + log2_rep),
++                   (MVF_STASH_WIDTH_PU << log2_trafo_size) >> (log2_min_pu_size + log2_rep),
++                   (MVF_STASH_WIDTH_PU << log2_trafo_size) >> (log2_min_pu_size + log2_rep),
++                   rpl, rpl,
++                   mvf, mvf - 1));
++        }
++    }
++}
++
++#undef LUMA
++#undef CB
++#undef CR
++
++static inline unsigned int ussub(const unsigned int a, const unsigned int b)
++{
++    return a < b ? 0 : a - b;
++}
++
++static inline int cache_boundry(const AVFrame * const frame, const unsigned int x)
++{
++    return ((x >> av_rpi_sand_frame_xshl(frame)) & ~63) == 0;
++}
++
++int ff_hevc_rpi_hls_filter_blk(const HEVCRpiContext * const s, const RpiBlk bounds, const int eot)
++{
++    const int ctb_size = (1 << s->ps.sps->log2_ctb_size);
++    int x, y;
++
++    const unsigned int br = bounds.x + bounds.w;
++    const unsigned int bb = bounds.y + bounds.h;
++
++    const int x_end = (br >= s->ps.sps->width);
++    const int y_end = (bb >= s->ps.sps->height);
++
++    // Deblock may not touch the edges of the bound as they are still needed
++    // for Intra pred
++    //
++    // Deblock is disabled with a per-slice flag
++    // Given that bounds may cover multiple slices & we dblock outside bounds
++    // anyway we can't avoid deblock using that flag - about the only thing we
++    // could do is have a "no deblock seen yet" flag but it doesn't really
++    // seem worth the effort
++
++    deblock_y_blk(s, bounds, x_end, y_end);
++    deblock_uv_blk(s, bounds, x_end, y_end);
++
++    // SAO needs
++    // (a) CTB alignment
++    // (b) Valid pixels all the way around the CTB in particular it needs the DR pixel
++    {
++        const unsigned int xo = bounds.x - ((bounds.x - 16) & ~(ctb_size - 1));
++        const unsigned int yo = bounds.y - ((bounds.y - 16) & ~(ctb_size - 1));
++        const unsigned int yt = ussub(bounds.y, yo);
++        const unsigned int yb = y_end ? bb : ussub(bb, yo);
++        const unsigned int xl = ussub(bounds.x, xo);
++        const unsigned int xr = x_end ? br : ussub(br, xo);
++
++        if (s->ps.sps->sao_enabled)
++        {
++            for (y = yt; y < yb; y += ctb_size) {
++                for (x = xl; x < xr; x += ctb_size) {
++                    sao_filter_CTB(s, x, y);
++                }
++            }
++        }
++
++        // Cache invalidate
++        y = 0;
++        if (xr != 0 && yb != 0)
++        {
++            const unsigned int llen =
++                (av_rpi_sand_frame_stride1(s->frame) >> av_rpi_sand_frame_xshl(s->frame));
++            const unsigned int mask = ~(llen - 1);
++            const unsigned int il = (xl == 0) ? 0 : (xl - 1) & mask;
++            const unsigned int ir = x_end || !cache_boundry(s->frame, br) ? br : (xr - 1) & mask;
++            const unsigned int it = ussub(yt, 1);
++            const unsigned int ib = y_end ? bb : yb - 1;
++
++            if (il < ir) {
++                rpi_cache_buf_t cbuf;
++                rpi_cache_flush_env_t * const rfe = rpi_cache_flush_init(&cbuf);
++                rpi_cache_flush_add_frame_block(rfe, s->frame, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE,
++                  il, it, ir - il, ib - it,
++                  ctx_vshift(s, 1), 1, 1);
++
++                // If we have to commit the right hand tile boundry due to
++                // cache boundry considerations then at EoTile we must commit
++                // that boundry to bottom of tile (bounds)
++                if (ib != bb && ir == br && eot) {
++                    rpi_cache_flush_add_frame_block(rfe, s->frame, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE,
++                      br - 1, ib, 1, bb - ib,
++                      ctx_vshift(s, 1), 1, 1);
++                }
++
++                rpi_cache_flush_finish(rfe);
++
++                if (x_end)
++                    y = y_end ? INT_MAX : ib;
++
++//                printf("Flush: %4d,%4d -> %4d,%4d: signal: %d\n", il, it, ir, ib, y - 1);
++            }
++        }
++    }
++
++    return y;
++}
++
+--- /dev/null
++++ b/libavcodec/rpi_hevc_mv.h
+@@ -0,0 +1,71 @@
++#ifndef AVCODEC_RPI_HEVC_MV_H
++#define AVCODEC_RPI_HEVC_MV_H
++
++#include "config.h"
++
++typedef int32_t MvXY;
++
++typedef struct HEVCRpiMvField {
++    MvXY xy[2];
++    int8_t ref_idx[2];
++    int8_t pred_flag;
++    int8_t dummy; // To 12 bytes
++} HEVCRpiMvField;
++
++
++#define MV_X(xy) (((xy) << 16) >> 16)
++#define MV_Y(xy) ((xy) >> 16)
++#define MV_XY(x, y) ((x & 0xffff) | ((y) << 16))
++
++#if ARCH_ARM
++#include "arm/rpi_hevc_mv_arm.h"
++#endif
++
++#ifndef mvxy_add
++static inline MvXY mvxy_add(const MvXY a, const MvXY b)
++{
++    return MV_XY(MV_X(a) + MV_X(b), MV_Y(a) + MV_Y(b));
++}
++#endif
++
++
++#ifndef mv_scale_xy
++static inline MvXY mv_scale_xy(const MvXY const src, int td, int tb)
++{
++    int tx, scale_factor;
++
++    td = td == 0 ? 1 : av_clip_int8(td);
++    tb = av_clip_int8(tb);
++    tx = (0x4000 + (abs(td) >> 1)) / td;
++    scale_factor = av_clip_intp2((tb * tx + 32) >> 6, 12);
++    return MV_XY(
++        av_clip_int16((scale_factor * MV_X(src) + 127 +
++                           (scale_factor * MV_X(src) < 0)) >> 8),
++        av_clip_int16((scale_factor * MV_Y(src) + 127 +
++                           (scale_factor * MV_Y(src) < 0)) >> 8));
++}
++#endif
++
++// 8.3.1 states that the bitstream may not contain poc diffs that do not
++// fit in 16 bits, so given that we don't care about the high bits we only
++// store the low 16 + LT & Inter flags
++
++#define COL_POC_INTRA   0
++#define COL_POC_INTER   (1 << 16)
++#define COL_POC_LT      (1 << 17)
++#define COL_POC_DIFF(x,y) ((int16_t)((x) - (y)))
++#define COL_POC_MAKE_INTER(lt,poc) (COL_POC_INTER | ((lt) ? COL_POC_LT : 0) | ((poc) & 0xffff))
++#define COL_POC_IS_LT(x) (((x) & COL_POC_LT) != 0)
++
++typedef struct ColMv_s {
++    int32_t poc;
++    int32_t xy;
++} ColMv;
++
++typedef struct ColMvField_s {
++    ColMv L[2];
++} ColMvField;
++
++
++
++#endif // AVCODEC_RPI_HEVC_MV_H
+--- /dev/null
++++ b/libavcodec/rpi_hevc_mvs.c
+@@ -0,0 +1,487 @@
++/*
++ * HEVC video decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2013 Anand Meher Kotra
++ * Copyright (C) 2018 John Cox for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "hevc.h"
++#include "rpi_hevcdec.h"
++
++static av_always_inline int
++is_eq_mer(const unsigned int plevel,
++    const unsigned int xN, const unsigned int yN,
++    const unsigned int xP, const unsigned int yP)
++{
++    return (((xN ^ xP) | (yN ^ yP)) >> plevel) == 0;
++}
++
++// check if the mv's and refidx are the same between A and B
++static av_always_inline int compare_mv_ref_idx(const HEVCRpiMvField * const a, const HEVCRpiMvField * const b)
++{
++    return a->pred_flag == b->pred_flag &&
++        ((a->pred_flag & PF_L0) == 0 || (a->ref_idx[0] == b->ref_idx[0] && a->xy[0] == b->xy[0])) &&
++        ((a->pred_flag & PF_L1) == 0 || (a->ref_idx[1] == b->ref_idx[1] && a->xy[1] == b->xy[1]));
++    return 0;
++}
++
++/*
++ * 8.5.3.1.7  temporal luma motion vector prediction
++ */
++static int temporal_luma_motion_vector(const HEVCRpiContext * const s,
++                                       const HEVCRpiLocalContext * const lc, const int x0, const int y0,
++                                       const int nPbW, const int nPbH, const int refIdxLx,
++                                       MvXY * const mvLXCol, const int X)
++{
++    int x, y;
++    const ColMv * cmv = NULL;
++
++    HEVCRpiFrame * const col_ref = s->ref->collocated_ref;
++    const RefPicList * const refPicList = s->refPicList + X;
++    const int cur_lt = refPicList->isLongTerm[refIdxLx];
++
++    *mvLXCol = 0;
++    // Unlikely but we might have a col_ref IDR frame!
++    if (col_ref->col_mvf == NULL)
++        return 0;
++
++    ff_hevc_rpi_progress_wait_mv(s, lc->jb0, col_ref, y0 + nPbH);
++
++    //bottom right collocated motion vector
++    x = x0 + nPbW;
++    y = y0 + nPbH;
++
++    if ((y0 >> s->ps.sps->log2_ctb_size) == (y >> s->ps.sps->log2_ctb_size) &&
++        y < s->ps.sps->height &&
++        x < s->ps.sps->width)
++    {
++        const ColMvField * const col = col_ref->col_mvf + (x >> 4) +
++            (y >> 4) * s->col_mvf_stride;
++
++        if (col->L[0].poc != COL_POC_INTRA &&
++            (col->L[1].poc == COL_POC_INTRA ||
++             (s->no_backward_pred_flag ? s->sh.collocated_list == L1 : X == 0)))
++        {
++            cmv = col->L + 0;
++        }
++        else if (col->L[1].poc != COL_POC_INTRA)
++        {
++            cmv = col->L + 1;
++        }
++    }
++
++    // derive center collocated motion vector
++    if (cmv == NULL || COL_POC_IS_LT(cmv->poc) != cur_lt)
++    {
++        cmv = NULL;
++        x                  = x0 + (nPbW >> 1);
++        y                  = y0 + (nPbH >> 1);
++
++        {
++            const ColMvField * const col = col_ref->col_mvf + (x >> 4) +
++              (y >> 4) * s->col_mvf_stride;
++
++            if (col->L[0].poc != COL_POC_INTRA &&
++              (col->L[1].poc == COL_POC_INTRA ||
++               (s->no_backward_pred_flag ? s->sh.collocated_list == L1 : X == 0)))
++            {
++              cmv = col->L + 0;
++            }
++            else if (col->L[1].poc != COL_POC_INTRA)
++            {
++              cmv = col->L + 1;
++            }
++        }
++    }
++
++    if (cmv == NULL || cur_lt != COL_POC_IS_LT(cmv->poc))
++        return 0;
++
++    {
++        const int col_poc  = col_ref->poc;
++        const int ref_poc  = refPicList->list[refIdxLx];
++
++        *mvLXCol = (cur_lt ||
++                        cmv->poc == col_poc ||
++                        COL_POC_DIFF(col_poc, cmv->poc) == s->poc - ref_poc) ?
++                    cmv->xy :
++                    mv_scale_xy(cmv->xy, COL_POC_DIFF(col_poc, cmv->poc), s->poc - ref_poc);
++    }
++
++    return cmv != NULL;
++}
++
++static inline int mvf_eq(const HEVCRpiMvField * const a, const HEVCRpiMvField * const b)
++{
++    return b != NULL && compare_mv_ref_idx(a, b);
++}
++
++
++
++/*
++ * 8.5.3.1.2  Derivation process for spatial merging candidates
++ */
++static inline const HEVCRpiMvField *
++derive_spatial_merge_candidates(
++    const HEVCRpiContext * const s,
++    const HEVCRpiLocalContext * const lc,
++    const unsigned int x0, const unsigned int y0,
++    const unsigned int nPbW, const unsigned int nPbH,
++    const unsigned int avail,
++    const unsigned int part_idx,
++    const unsigned int merge_idx,
++    HEVCRpiMvField * const mvf_t)
++{
++    const unsigned int parts_a1 = (1 << PART_Nx2N) | (1 << PART_nLx2N) | (1 << PART_nRx2N);
++    const unsigned int parts_b1 = (1 << PART_2NxN) | (1<< PART_2NxnU) | (1 << PART_2NxnD);
++
++    const HEVCRpiMvField * mvf_a1 = mvf_ptr(s, lc, x0, y0, x0 - 1, y0 + nPbH - 1);
++    const HEVCRpiMvField * mvf_a0 = mvf_a1 + mvf_left_stride(s, x0, x0 - 1);
++    const HEVCRpiMvField * mvf_b1 = mvf_ptr(s, lc, x0, y0, x0 + nPbW - 1, y0 - 1);
++    const HEVCRpiMvField * mvf_b0 = mvf_b1 + 1;
++    const unsigned int plevel = s->ps.pps->log2_parallel_merge_level;
++    const unsigned int part_mode = lc->cu.part_mode;
++
++    const HEVCRpiMvField * perm[4];
++    unsigned int nb_merge_cand = 0;
++
++    // singleMCLFlag => part_idx == 0 so no need to test for it
++    if ((avail & AVAIL_L) == 0 ||
++        (part_idx == 1 &&
++            ((parts_a1 >> part_mode) & 1) != 0 ||
++                is_eq_mer(plevel, x0 - 1, y0 + nPbH - 1, x0, y0)) ||
++        mvf_a1->pred_flag == PF_INTRA)
++    {
++        mvf_a1 = NULL;
++    }
++    else
++    {
++        if (merge_idx == nb_merge_cand)
++            return mvf_a1;
++        perm[nb_merge_cand++] = mvf_a1;
++    }
++
++    if ((avail & AVAIL_U) == 0 ||
++            (part_idx == 1 &&
++               ((parts_b1 >> part_mode) & 1) != 0 ||
++                   is_eq_mer(plevel, x0 + nPbW - 1, y0 - 1, x0, y0)) ||
++            mvf_b1->pred_flag == PF_INTRA)
++    {
++        mvf_b1 = NULL;
++    }
++    else if (!mvf_eq(mvf_b1, mvf_a1))
++    {
++        if (merge_idx == nb_merge_cand)
++            return mvf_b1;
++        perm[nb_merge_cand++] = mvf_b1;
++    }
++
++    // above right spatial merge candidate
++    // Never need mvf_b0 again so don't bother zeroing if navail
++    if ((avail & AVAIL_UR) != 0 &&
++        !is_eq_mer(plevel, x0 + nPbW, y0 - 1, x0, y0) &&
++        mvf_b0->pred_flag != PF_INTRA &&
++        !mvf_eq(mvf_b0, mvf_b1))
++    {
++        if (merge_idx == nb_merge_cand)
++            return mvf_b0;
++        perm[nb_merge_cand++] = mvf_b0;
++    }
++
++    // left bottom spatial merge candidate
++    // Never need mvf_a0 again so don't bother zeroing if navail
++    if ((avail & AVAIL_DL) != 0 &&
++        !is_eq_mer(plevel, x0 - 1, y0 + nPbH, x0, y0) &&
++        mvf_a0->pred_flag != PF_INTRA &&
++        !mvf_eq(mvf_a0, mvf_a1))
++    {
++        if (merge_idx == nb_merge_cand)
++            return mvf_a0;
++        perm[nb_merge_cand++] = mvf_a0;
++    }
++
++    // above left spatial merge candidate
++    if (nb_merge_cand != 4 &&
++        (avail & AVAIL_UL) != 0 &&
++        !is_eq_mer(plevel, x0 - 1, y0 - 1, x0, y0))
++    {
++        const HEVCRpiMvField * mvf_b2 = mvf_ptr(s, lc, x0, y0, x0 - 1, y0 - 1);  // UL
++
++        if (mvf_b2->pred_flag != PF_INTRA &&
++            !mvf_eq(mvf_b2, mvf_a1) &&
++            !mvf_eq(mvf_b2, mvf_b1))
++        {
++            if (merge_idx == nb_merge_cand)
++                return mvf_b2;
++            perm[nb_merge_cand++] = mvf_b2;
++        }
++    }
++
++    // temporal motion vector candidate
++    if (s->sh.slice_temporal_mvp_enabled_flag)
++    {
++        static const HEVCRpiMvField mvf_z = {{0}};
++
++        *mvf_t = mvf_z;
++
++        if (temporal_luma_motion_vector(s, lc, x0, y0, nPbW, nPbH,
++                                        0, mvf_t->xy + 0, 0))
++            mvf_t->pred_flag = PF_L0;
++
++        if (s->sh.slice_type == HEVC_SLICE_B &&
++                temporal_luma_motion_vector(s, lc, x0, y0, nPbW, nPbH,
++                                            0, mvf_t->xy + 1, 1))
++            mvf_t->pred_flag |= PF_L1;
++
++        if (mvf_t->pred_flag != 0)
++        {
++            if (merge_idx == nb_merge_cand)
++                return mvf_t;
++            perm[nb_merge_cand++] = mvf_t;
++        }
++    }
++
++    // combined bi-predictive merge candidates  (applies for B slices)
++    if (s->sh.slice_type == HEVC_SLICE_B && nb_merge_cand > 1)
++    {
++        unsigned int comb_idx = 0;
++        const unsigned int cand_count = nb_merge_cand * (nb_merge_cand - 1);
++        const RefPicList * const refPicList = s->refPicList;
++
++        for (comb_idx = 0; comb_idx < cand_count; comb_idx++)
++        {
++            static const uint8_t l0_l1_cand_idx[12][2] = {
++                { 0, 1, },
++                { 1, 0, },
++                { 0, 2, },
++                { 2, 0, },
++                { 1, 2, },
++                { 2, 1, },
++                { 0, 3, },
++                { 3, 0, },
++                { 1, 3, },
++                { 3, 1, },
++                { 2, 3, },
++                { 3, 2, },
++            };
++
++            const unsigned int l0_cand_idx = l0_l1_cand_idx[comb_idx][0];
++            const unsigned int l1_cand_idx = l0_l1_cand_idx[comb_idx][1];
++            const HEVCRpiMvField * const mvf_c0 = perm[l0_cand_idx];
++            const HEVCRpiMvField * const mvf_c1 = perm[l1_cand_idx];
++
++            if ((mvf_c0->pred_flag & PF_L0) != 0 &&
++                (mvf_c1->pred_flag & PF_L1) != 0 &&
++                (refPicList[0].list[mvf_c0->ref_idx[0]] != refPicList[1].list[mvf_c1->ref_idx[1]] ||
++                 mvf_c0->xy[0] != mvf_c1->xy[1]))
++            {
++                if (merge_idx == nb_merge_cand++)
++                {
++                    // Need to be a bit careful as we will construct mvf_t and we
++                    // may already be using that as one of our condidates
++                    // so build & copy rather than build in place
++                    const HEVCRpiMvField mvf_m = {
++                        .xy = {
++                            mvf_c0->xy[0],
++                            mvf_c1->xy[1]},
++                        .ref_idx = {
++                            mvf_c0->ref_idx[0],
++                            mvf_c1->ref_idx[1]},
++                        .pred_flag = PF_BI
++                    };
++                    *mvf_t = mvf_m;
++                    return mvf_t;
++                }
++            }
++        }
++    }
++
++    // "append" Zero motion vector candidates
++    {
++        const unsigned int nb_refs = (s->sh.slice_type == HEVC_SLICE_B) ?
++                            FFMIN(s->sh.nb_refs[0], s->sh.nb_refs[1]) : s->sh.nb_refs[0];
++        const unsigned int zero_idx = merge_idx - nb_merge_cand;
++
++        const HEVCRpiMvField mvf_m = {
++            .xy = {0, 0},
++            .ref_idx = {
++                zero_idx < nb_refs ? zero_idx : 0,
++                (s->sh.slice_type == HEVC_SLICE_B && zero_idx < nb_refs) ? zero_idx : 0},
++            .pred_flag = (s->sh.slice_type == HEVC_SLICE_B) ? PF_BI : PF_L0
++        };
++
++        *mvf_t = mvf_m;
++        return mvf_t;
++    }
++}
++
++
++// 8.5.3.1.1 Derivation process of luma Mvs for merge mode
++void ff_hevc_rpi_luma_mv_merge_mode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int x0, int y0, int nPbW,
++                                int nPbH, int log2_cb_size, int part_idx,
++                                int merge_idx, HEVCRpiMvField * const mv)
++{
++    const HEVCRpiMvField * mvf_m = (s->ps.pps->log2_parallel_merge_level > 2 && log2_cb_size == 3) ?
++        derive_spatial_merge_candidates(s, lc, lc->cu.x, lc->cu.y, 8, 8,
++                                        ff_hevc_rpi_tb_avail_flags(s, lc, lc->cu.x, lc->cu.y, 8, 8),
++                                        0, merge_idx, mv) :
++        derive_spatial_merge_candidates(s, lc, x0, y0, nPbW, nPbH,
++                                        ff_hevc_rpi_tb_avail_flags(s, lc, x0, y0, nPbW, nPbH),
++                                        part_idx, merge_idx, mv);
++
++    if (mvf_m != mv)
++        *mv = *mvf_m;
++
++    if (mv->pred_flag == PF_BI && (nPbW + nPbH) == 12)
++        mv->pred_flag = PF_L0;
++}
++
++
++static av_always_inline const MvXY *
++mvf_same_poc(const RefPicList * const rpl, const unsigned int pfi0, const unsigned int pfi1, const int poc0, const HEVCRpiMvField * const mvf)
++{
++    if (mvf != NULL)
++    {
++        if (((mvf->pred_flag >> pfi0) & 1) != 0 && rpl[pfi0].list[mvf->ref_idx[pfi0]] == poc0)
++            return mvf->xy + pfi0;
++        if (((mvf->pred_flag >> pfi1) & 1) != 0 && rpl[pfi1].list[mvf->ref_idx[pfi1]] == poc0)
++            return mvf->xy + pfi1;
++    }
++    return NULL;
++}
++
++static av_always_inline const MvXY *
++mvf_other_poc(const RefPicList * const rpl, const unsigned int pfi0, const unsigned int pfi1,
++              const int islt0, const int poc0, const int poc_cur,
++              MvXY * const mv_t, const HEVCRpiMvField * const mvf)
++{
++    if (mvf != NULL)
++    {
++        if (((mvf->pred_flag >> pfi0) & 1) != 0 && rpl[pfi0].isLongTerm[mvf->ref_idx[pfi0]] == islt0)
++        {
++            const int poc1 = rpl[pfi0].list[mvf->ref_idx[pfi0]];
++            if (islt0 || poc1 == poc0) {
++                return mvf->xy + pfi0;
++            }
++            *mv_t = mv_scale_xy(mvf->xy[pfi0], poc_cur - poc1, poc_cur - poc0);
++            return mv_t;
++        }
++        if (((mvf->pred_flag >> pfi1) & 1) != 0 && rpl[pfi1].isLongTerm[mvf->ref_idx[pfi1]] == islt0)
++        {
++            const int poc1 = rpl[pfi1].list[mvf->ref_idx[pfi1]];
++            if (islt0 || poc1 == poc0) {
++                return mvf->xy + pfi1;
++            }
++            *mv_t = mv_scale_xy(mvf->xy[pfi1], poc_cur - poc1, poc_cur - poc0);
++            return mv_t;
++        }
++    }
++    return NULL;
++}
++
++void ff_hevc_rpi_luma_mv_mvp_mode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++    const unsigned int x0, const unsigned int y0,
++    const unsigned int nPbW, const unsigned int nPbH,
++    const unsigned int avail,
++    HEVCRpiMvField * const mv,
++    const unsigned int mvp_lx_flag, const unsigned int LX)
++{
++    const unsigned int pfi0 = LX;
++    const unsigned int pfi1 = LX == 0 ? 1 : 0;
++    const RefPicList * const rpl = s->refPicList;
++    const int poc0 = rpl[LX].list[mv->ref_idx[LX]];
++    const int poc_cur = s->poc;
++    const int islt0 = rpl[LX].isLongTerm[mv->ref_idx[LX]];
++
++    const HEVCRpiMvField * mvf_a1 = mvf_ptr(s, lc, x0, y0, x0 - 1, y0 + nPbH - 1);
++    const HEVCRpiMvField * mvf_a0 = mvf_a1 + mvf_left_stride(s, x0, x0 - 1);
++    const HEVCRpiMvField * mvf_b2 = mvf_ptr(s, lc, x0, y0, x0 - 1, y0 - 1);  // UL
++    const HEVCRpiMvField * mvf_b1 = mvf_ptr(s, lc, x0, y0, x0 + nPbW - 1, y0 - 1);
++    const HEVCRpiMvField * mvf_b0 = mvf_b1 + 1;
++    const MvXY * mva = NULL;
++    const MvXY * mvb;
++    MvXY * const mv_rv = mv->xy + LX;
++    MvXY mvt_a, mvt_b;
++
++    *mv_rv = 0;
++
++    if ((avail & AVAIL_DL) == 0 || mvf_a0->pred_flag == PF_INTRA)
++        mvf_a0 = NULL;
++    else if ((mva = mvf_same_poc(rpl, pfi0, pfi1, poc0, mvf_a0)) != NULL && mvp_lx_flag == 0)
++        goto use_mva;
++
++    if ((avail & AVAIL_L) == 0 || mvf_a1->pred_flag == PF_INTRA)
++        mvf_a1 = NULL;
++
++    if (mva == NULL &&
++        (mva = mvf_same_poc(rpl, pfi0, pfi1, poc0, mvf_a1)) == NULL &&
++        (mva = mvf_other_poc(rpl, pfi0, pfi1, islt0, poc0, poc_cur, &mvt_a, mvf_a0)) == NULL)
++        mva = mvf_other_poc(rpl, pfi0, pfi1, islt0, poc0, poc_cur, &mvt_a, mvf_a1);
++
++    if (mvp_lx_flag == 0 && mva != NULL)
++        goto use_mva;
++
++    if ((avail & AVAIL_UR) == 0 || mvf_b0->pred_flag == PF_INTRA)
++        mvf_b0 = NULL;
++    if ((avail & AVAIL_U) == 0 || mvf_b1->pred_flag == PF_INTRA)
++        mvf_b1 = NULL;
++    if ((avail & AVAIL_UL) == 0 || mvf_b2->pred_flag == PF_INTRA)
++        mvf_b2 = NULL;
++
++    if ((mvb = mvf_same_poc(rpl, pfi0, pfi1, poc0, mvf_b0)) == NULL &&
++        (mvb = mvf_same_poc(rpl, pfi0, pfi1, poc0, mvf_b1)) == NULL)
++        mvb = mvf_same_poc(rpl, pfi0, pfi1, poc0, mvf_b2);
++
++    if (mvf_a0 == NULL && mvf_a1 == NULL) {
++        mva = mvb;
++        if (mvp_lx_flag == 0 && mva != NULL)
++            goto use_mva;
++
++        if ((mvb = mvf_other_poc(rpl, pfi0, pfi1, islt0, poc0, poc_cur, &mvt_b, mvf_b0)) == NULL &&
++            (mvb = mvf_other_poc(rpl, pfi0, pfi1, islt0, poc0, poc_cur, &mvt_b, mvf_b1)) == NULL)
++            mvb = mvf_other_poc(rpl, pfi0, pfi1, islt0, poc0, poc_cur, &mvt_b, mvf_b2);
++    }
++
++    if (mva == NULL) {
++        mva = mvb;
++        mvb = NULL;
++    }
++
++    if (mvb != NULL && *mva == *mvb)  // If A == B then ignore B
++        mvb = NULL;
++
++    if (mvp_lx_flag == 0 && mva != NULL) {
++        goto use_mva;
++    }
++    else if (mvp_lx_flag != 0 && mvb != NULL) {
++        *mv_rv = *mvb;
++    }
++    else if (s->sh.slice_temporal_mvp_enabled_flag && ((mvp_lx_flag == 0 && mva == NULL) || (mvp_lx_flag != 0 && mva != NULL))) {
++        temporal_luma_motion_vector(s, lc, x0, y0, nPbW,
++                                    nPbH, mv->ref_idx[LX],
++                                    mv_rv, LX);
++    }
++    return;
++
++use_mva:
++    *mv_rv = *mva;
++    return;
++}
++
+--- /dev/null
++++ b/libavcodec/rpi_hevc_parse.c
+@@ -0,0 +1,143 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "bytestream.h"
++#include "h2645_parse.h"
++#include "hevc.h"
++#include "rpi_hevc_parse.h"
++
++static int hevc_decode_nal_units(const uint8_t *buf, int buf_size, HEVCRpiParamSets *ps,
++                                 HEVCSEIContext *sei, int is_nalff, int nal_length_size,
++                                 int err_recognition, int apply_defdispwin, void *logctx)
++{
++    int i;
++    int ret = 0;
++    H2645Packet pkt = { 0 };
++
++    ret = ff_h2645_packet_split(&pkt, buf, buf_size, logctx, is_nalff,
++                                nal_length_size, AV_CODEC_ID_HEVC, 1, 0);
++    if (ret < 0) {
++        goto done;
++    }
++
++    for (i = 0; i < pkt.nb_nals; i++) {
++        H2645NAL *nal = &pkt.nals[i];
++
++        /* ignore everything except parameter sets and VCL NALUs */
++        switch (nal->type) {
++        case HEVC_NAL_VPS:
++            ret = ff_hevc_rpi_decode_nal_vps(&nal->gb, logctx, ps);
++            if (ret < 0)
++                goto done;
++            break;
++        case HEVC_NAL_SPS:
++            ret = ff_hevc_rpi_decode_nal_sps(&nal->gb, logctx, ps, apply_defdispwin);
++            if (ret < 0)
++                goto done;
++            break;
++        case HEVC_NAL_PPS:
++            ret = ff_hevc_rpi_decode_nal_pps(&nal->gb, logctx, ps);
++            if (ret < 0)
++                goto done;
++            break;
++        case HEVC_NAL_SEI_PREFIX:
++        case HEVC_NAL_SEI_SUFFIX:
++            ret = ff_hevc_rpi_decode_nal_sei(&nal->gb, logctx, sei, ps, nal->type);
++            if (ret < 0)
++                goto done;
++            break;
++        default:
++            av_log(logctx, AV_LOG_VERBOSE, "Ignoring NAL type %d in extradata\n", nal->type);
++            break;
++        }
++    }
++
++done:
++    ff_h2645_packet_uninit(&pkt);
++    if (err_recognition & AV_EF_EXPLODE)
++        return ret;
++
++    return 0;
++}
++
++int ff_hevc_rpi_decode_extradata(const uint8_t *data, int size, HEVCRpiParamSets *ps,
++                             HEVCSEIContext *sei, int *is_nalff, int *nal_length_size,
++                             int err_recognition, int apply_defdispwin, void *logctx)
++{
++    int ret = 0;
++    GetByteContext gb;
++
++    bytestream2_init(&gb, data, size);
++
++    if (size > 3 && (data[0] || data[1] || data[2] > 1)) {
++        /* It seems the extradata is encoded as hvcC format.
++         * Temporarily, we support configurationVersion==0 until 14496-15 3rd
++         * is finalized. When finalized, configurationVersion will be 1 and we
++         * can recognize hvcC by checking if avctx->extradata[0]==1 or not. */
++        int i, j, num_arrays, nal_len_size;
++
++        *is_nalff = 1;
++
++        bytestream2_skip(&gb, 21);
++        nal_len_size = (bytestream2_get_byte(&gb) & 3) + 1;
++        num_arrays   = bytestream2_get_byte(&gb);
++
++        /* nal units in the hvcC always have length coded with 2 bytes,
++         * so put a fake nal_length_size = 2 while parsing them */
++        *nal_length_size = 2;
++
++        /* Decode nal units from hvcC. */
++        for (i = 0; i < num_arrays; i++) {
++            int type = bytestream2_get_byte(&gb) & 0x3f;
++            int cnt  = bytestream2_get_be16(&gb);
++
++            for (j = 0; j < cnt; j++) {
++                // +2 for the nal size field
++                int nalsize = bytestream2_peek_be16(&gb) + 2;
++                if (bytestream2_get_bytes_left(&gb) < nalsize) {
++                    av_log(logctx, AV_LOG_ERROR,
++                           "Invalid NAL unit size in extradata.\n");
++                    return AVERROR_INVALIDDATA;
++                }
++
++                ret = hevc_decode_nal_units(gb.buffer, nalsize, ps, sei, *is_nalff,
++                                            *nal_length_size, err_recognition, apply_defdispwin,
++                                            logctx);
++                if (ret < 0) {
++                    av_log(logctx, AV_LOG_ERROR,
++                           "Decoding nal unit %d %d from hvcC failed\n",
++                           type, i);
++                    return ret;
++                }
++                bytestream2_skip(&gb, nalsize);
++            }
++        }
++
++        /* Now store right nal length size, that will be used to parse
++         * all other nals */
++        *nal_length_size = nal_len_size;
++    } else {
++        *is_nalff = 0;
++        ret = hevc_decode_nal_units(data, size, ps, sei, *is_nalff, *nal_length_size,
++                                    err_recognition, apply_defdispwin, logctx);
++        if (ret < 0)
++            return ret;
++    }
++
++    return ret;
++}
+--- /dev/null
++++ b/libavcodec/rpi_hevc_parse.h
+@@ -0,0 +1,36 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * H.265 parser code
++ */
++
++#ifndef AVCODEC_RPI_HEVC_PARSE_H
++#define AVCODEC_RPI_HEVC_PARSE_H
++
++#include <stdint.h>
++
++#include "rpi_hevc_ps.h"
++#include "rpi_hevc_sei.h"
++
++int ff_hevc_rpi_decode_extradata(const uint8_t *data, int size, HEVCRpiParamSets *ps,
++                             HEVCSEIContext *sei, int *is_nalff, int *nal_length_size,
++                             int err_recognition, int apply_defdispwin, void *logctx);
++
++#endif /* AVCODEC_RPI_HEVC_PARSE_H */
+--- /dev/null
++++ b/libavcodec/rpi_hevc_ps.c
+@@ -0,0 +1,1938 @@
++/*
++ * HEVC Parameter Set decoding
++ *
++ * Copyright (C) 2012 - 2103 Guillaume Martres
++ * Copyright (C) 2012 - 2103 Mickael Raulet
++ * Copyright (C) 2012 - 2013 Gildas Cocherel
++ * Copyright (C) 2013 Vittorio Giovara
++ * Copyright (C) 2018 John Cox for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/imgutils.h"
++#include "golomb.h"
++#include "rpi_hevc_data.h"
++#include "rpi_hevc_ps.h"
++#include "rpi_hevcdec.h"
++
++static const uint8_t default_scaling_list_intra[] = {
++    16, 16, 16, 16, 17, 18, 21, 24,
++    16, 16, 16, 16, 17, 19, 22, 25,
++    16, 16, 17, 18, 20, 22, 25, 29,
++    16, 16, 18, 21, 24, 27, 31, 36,
++    17, 17, 20, 24, 30, 35, 41, 47,
++    18, 19, 22, 27, 35, 44, 54, 65,
++    21, 22, 25, 31, 41, 54, 70, 88,
++    24, 25, 29, 36, 47, 65, 88, 115
++};
++
++static const uint8_t default_scaling_list_inter[] = {
++    16, 16, 16, 16, 17, 18, 20, 24,
++    16, 16, 16, 17, 18, 20, 24, 25,
++    16, 16, 17, 18, 20, 24, 25, 28,
++    16, 17, 18, 20, 24, 25, 28, 33,
++    17, 18, 20, 24, 25, 28, 33, 41,
++    18, 20, 24, 25, 28, 33, 41, 54,
++    20, 24, 25, 28, 33, 41, 54, 71,
++    24, 25, 28, 33, 41, 54, 71, 91
++};
++
++static const AVRational vui_sar[] = {
++    {  0,   1 },
++    {  1,   1 },
++    { 12,  11 },
++    { 10,  11 },
++    { 16,  11 },
++    { 40,  33 },
++    { 24,  11 },
++    { 20,  11 },
++    { 32,  11 },
++    { 80,  33 },
++    { 18,  11 },
++    { 15,  11 },
++    { 64,  33 },
++    { 160, 99 },
++    {  4,   3 },
++    {  3,   2 },
++    {  2,   1 },
++};
++
++
++// pps_cb_qp_offset: -12,+12
++// slice_cb_qp_offset: -12,+12 also
++//   "The value of pps_cb_qp_offset + slice_cb_qp_offset shall be in the range of -12 to +12, inclusive."
++// cr_qp_offset_list[n]: -12,+12
++// So worst case total offset: -24,+24
++
++#define T(n) ((((48+(n))/6-10)<<3) | (48+(n))%6)
++#define C(B,n) T(B*6+(n) < 0 ? -B*6 : (n) > 51 ? 51 : (n))
++#define M(B,n) C(B,(-n))
++
++// Sizeof the QP_START_BLOCK
++#define QP_OFFSET_0 (8*6 + 12*2)
++#define QP_START(B) \
++    M(B,48), M(B,48), M(B,48), M(B,48), M(B,48), M(B,48),\
++    M(B,48), M(B,48), M(B,48), M(B,48), M(B,48), M(B,48),\
++    M(B,48), M(B,48), M(B,48), M(B,48), M(B,48), M(B,48),\
++    M(B,48), M(B,48), M(B,48), M(B,48), M(B,48), M(B,48),\
++\
++    M(B,48), M(B,47), M(B,46), M(B,45), M(B,44), M(B,43),\
++    M(B,42), M(B,41), M(B,40), M(B,39), M(B,38), M(B,37),\
++    M(B,36), M(B,35), M(B,34), M(B,33), M(B,32), M(B,31),\
++    M(B,30), M(B,29), M(B,28), M(B,27), M(B,26), M(B,25),\
++    M(B,24), M(B,23), M(B,22), M(B,21), M(B,20), M(B,19),\
++    M(B,18), M(B,17), M(B,16), M(B,15), M(B,14), M(B,13),\
++    M(B,12), M(B,11), M(B,10), M(B, 9), M(B, 8), M(B, 7),\
++    M(B, 6), M(B, 5), M(B, 4), M(B, 3), M(B, 2), M(B, 1)
++#define QP_END(B) \
++    C(B,51), C(B,51), C(B,51), C(B,51), C(B,51), C(B,51),\
++    C(B,51), C(B,51), C(B,51), C(B,51), C(B,51), C(B,51),\
++    C(B,51), C(B,51), C(B,51), C(B,51), C(B,51), C(B,51)
++
++#define T1(B)\
++{\
++    QP_START(B),\
++    C(B, 0), C(B, 1), C(B, 2), C(B, 3), C(B, 4), C(B, 5), C(B, 6), C(B, 7), C(B, 8), C(B, 9),\
++    C(B,10), C(B,11), C(B,12), C(B,13), C(B,14), C(B,15), C(B,16), C(B,17), C(B,18), C(B,19),\
++    C(B,20), C(B,21), C(B,22), C(B,23), C(B,24), C(B,25), C(B,26), C(B,27), C(B,28), C(B,29),\
++    C(B,29), C(B,30), C(B,31), C(B,32), C(B,33), C(B,33), C(B,34), C(B,34), C(B,35), C(B,35),\
++    C(B,36), C(B,36), C(B,37), C(B,37), C(B,38), C(B,39), C(B,40), C(B,41), C(B,42), C(B,43),\
++    C(B,44), C(B,45),\
++    C(B,46), C(B,47), C(B,48), C(B,49), C(B,50), C(B,51),\
++    QP_END(B)\
++}
++#define T0(B)\
++{\
++    QP_START(B),\
++    C(B, 0), C(B, 1), C(B, 2), C(B, 3), C(B, 4), C(B, 5), C(B, 6), C(B, 7), C(B, 8), C(B, 9),\
++    C(B,10), C(B,11), C(B,12), C(B,13), C(B,14), C(B,15), C(B,16), C(B,17), C(B,18), C(B,19),\
++    C(B,20), C(B,21), C(B,22), C(B,23), C(B,24), C(B,25), C(B,26), C(B,27), C(B,28), C(B,29),\
++    C(B,30), C(B,31), C(B,32), C(B,33), C(B,34), C(B,35), C(B,36), C(B,37), C(B,38), C(B,39),\
++    C(B,40), C(B,41), C(B,42), C(B,43), C(B,44), C(B,45), C(B,46), C(B,47), C(B,48), C(B,49),\
++    C(B,50), C(B,51),\
++    C(B,51), C(B,51), C(B,51), C(B,51), C(B,51), C(B,51),\
++    QP_END(B)\
++}
++
++#define QP_TABLE_SIZE (QP_OFFSET_0 + 52 + 12*2)
++
++static const int8_t qp_c_bd_0[8][QP_TABLE_SIZE] = {T0(0),T0(1),T0(2),T0(3),T0(4),T0(5),T0(6),T0(7)};
++static const int8_t qp_c_bd_1[8][QP_TABLE_SIZE] = {T1(0),T1(1),T1(2),T1(3),T1(4),T1(5),T1(6),T1(7)};
++
++#undef T
++#undef C
++#undef QP_END
++
++#define C(B,n) ((n)<0?0:(n)>51?51:(n))
++// We do need a lot of -ve padding to cope with high bit depths that give -ve qps
++#define QP_DBLK_OFFSET_0 QP_OFFSET_0
++#define QP_END(B)\
++ 51, 51, 51, 51, 51, 51
++
++// These don't need all the padding we have here (12 top/bottom would be enough)
++static const uint8_t qp_c_dblk_0[] = T0(0);
++static const uint8_t qp_c_dblk_1[] = T1(0);
++
++#undef T
++#undef M
++#undef C
++#undef QP_END
++#undef QP_START
++
++
++static void remove_pps(HEVCRpiParamSets * const s, const int id)
++{
++    if (s->pps_list[id] && s->pps == (const HEVCRpiPPS*)s->pps_list[id]->data)
++        s->pps = NULL;
++    av_buffer_unref(&s->pps_list[id]);
++}
++
++static void remove_sps(HEVCRpiParamSets * const s, const int id)
++{
++    int i;
++    if (s->sps_list[id]) {
++        if (s->sps == (const HEVCRpiSPS*)s->sps_list[id]->data)
++            s->sps = NULL;
++
++        /* drop all PPS that depend on this SPS */
++        for (i = 0; i < FF_ARRAY_ELEMS(s->pps_list); i++)
++            if (s->pps_list[i] && ((HEVCRpiPPS*)s->pps_list[i]->data)->sps_id == id)
++                remove_pps(s, i);
++
++        av_assert0(!(s->sps_list[id] && s->sps == (HEVCRpiSPS*)s->sps_list[id]->data));
++    }
++    av_buffer_unref(&s->sps_list[id]);
++}
++
++static void remove_vps(HEVCRpiParamSets * const s, const int id)
++{
++    int i;
++    if (s->vps_list[id]) {
++        if (s->vps == (const HEVCRpiVPS*)s->vps_list[id]->data)
++            s->vps = NULL;
++
++        for (i = 0; i < FF_ARRAY_ELEMS(s->sps_list); i++)
++            if (s->sps_list[i] && ((HEVCRpiSPS*)s->sps_list[i]->data)->vps_id == id)
++                remove_sps(s, i);
++    }
++    av_buffer_unref(&s->vps_list[id]);
++}
++
++int ff_hevc_rpi_decode_short_term_rps(GetBitContext * const gb, AVCodecContext * const avctx,
++                                  ShortTermRPS * const rps, const HEVCRpiSPS * const sps, const int is_slice_header)
++{
++    uint8_t rps_predict = 0;
++    int delta_poc;
++    int k0 = 0;
++    int k1 = 0;
++    int k  = 0;
++    int i;
++
++    if (rps != sps->st_rps && sps->nb_st_rps)
++        rps_predict = get_bits1(gb);
++
++    if (rps_predict) {
++        const ShortTermRPS *rps_ridx;
++        int delta_rps;
++        unsigned abs_delta_rps;
++        uint8_t use_delta_flag = 0;
++        uint8_t delta_rps_sign;
++
++        if (is_slice_header) {
++            unsigned int delta_idx = get_ue_golomb_long(gb) + 1;
++            if (delta_idx > sps->nb_st_rps) {
++                av_log(avctx, AV_LOG_ERROR,
++                       "Invalid value of delta_idx in slice header RPS: %d > %d.\n",
++                       delta_idx, sps->nb_st_rps);
++                return AVERROR_INVALIDDATA;
++            }
++            rps_ridx = &sps->st_rps[sps->nb_st_rps - delta_idx];
++            rps->rps_idx_num_delta_pocs = rps_ridx->num_delta_pocs;
++        } else
++            rps_ridx = &sps->st_rps[rps - sps->st_rps - 1];
++
++        delta_rps_sign = get_bits1(gb);
++        abs_delta_rps  = get_ue_golomb_long(gb) + 1;
++        if (abs_delta_rps < 1 || abs_delta_rps > 32768) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "Invalid value of abs_delta_rps: %d\n",
++                   abs_delta_rps);
++            return AVERROR_INVALIDDATA;
++        }
++        delta_rps      = (1 - (delta_rps_sign << 1)) * abs_delta_rps;
++        for (i = 0; i <= rps_ridx->num_delta_pocs; i++) {
++            int used = rps->used[k] = get_bits1(gb);
++
++            if (!used)
++                use_delta_flag = get_bits1(gb);
++
++            if (used || use_delta_flag) {
++                if (i < rps_ridx->num_delta_pocs)
++                    delta_poc = delta_rps + rps_ridx->delta_poc[i];
++                else
++                    delta_poc = delta_rps;
++                rps->delta_poc[k] = delta_poc;
++                if (delta_poc < 0)
++                    k0++;
++                else
++                    k1++;
++                k++;
++            }
++        }
++
++        if (k >= FF_ARRAY_ELEMS(rps->used)) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "Invalid num_delta_pocs: %d\n", k);
++            return AVERROR_INVALIDDATA;
++        }
++
++        rps->num_delta_pocs    = k;
++        rps->num_negative_pics = k0;
++        // sort in increasing order (smallest first)
++        if (rps->num_delta_pocs != 0) {
++            int used, tmp;
++            for (i = 1; i < rps->num_delta_pocs; i++) {
++                delta_poc = rps->delta_poc[i];
++                used      = rps->used[i];
++                for (k = i - 1; k >= 0; k--) {
++                    tmp = rps->delta_poc[k];
++                    if (delta_poc < tmp) {
++                        rps->delta_poc[k + 1] = tmp;
++                        rps->used[k + 1]      = rps->used[k];
++                        rps->delta_poc[k]     = delta_poc;
++                        rps->used[k]          = used;
++                    }
++                }
++            }
++        }
++        if ((rps->num_negative_pics >> 1) != 0) {
++            int used;
++            k = rps->num_negative_pics - 1;
++            // flip the negative values to largest first
++            for (i = 0; i < rps->num_negative_pics >> 1; i++) {
++                delta_poc         = rps->delta_poc[i];
++                used              = rps->used[i];
++                rps->delta_poc[i] = rps->delta_poc[k];
++                rps->used[i]      = rps->used[k];
++                rps->delta_poc[k] = delta_poc;
++                rps->used[k]      = used;
++                k--;
++            }
++        }
++    } else {
++        unsigned int prev, nb_positive_pics;
++        rps->num_negative_pics = get_ue_golomb_long(gb);
++        nb_positive_pics       = get_ue_golomb_long(gb);
++
++        if (rps->num_negative_pics >= HEVC_MAX_REFS ||
++            nb_positive_pics >= HEVC_MAX_REFS) {
++            av_log(avctx, AV_LOG_ERROR, "Too many refs in a short term RPS.\n");
++            return AVERROR_INVALIDDATA;
++        }
++
++        rps->num_delta_pocs = rps->num_negative_pics + nb_positive_pics;
++        if (rps->num_delta_pocs) {
++            prev = 0;
++            for (i = 0; i < rps->num_negative_pics; i++) {
++                delta_poc = get_ue_golomb_long(gb) + 1;
++                if (delta_poc < 1 || delta_poc > 32768) {
++                    av_log(avctx, AV_LOG_ERROR,
++                        "Invalid value of delta_poc: %d\n",
++                        delta_poc);
++                    return AVERROR_INVALIDDATA;
++                }
++                prev -= delta_poc;
++                rps->delta_poc[i] = prev;
++                rps->used[i]      = get_bits1(gb);
++            }
++            prev = 0;
++            for (i = 0; i < nb_positive_pics; i++) {
++                delta_poc = get_ue_golomb_long(gb) + 1;
++                if (delta_poc < 1 || delta_poc > 32768) {
++                    av_log(avctx, AV_LOG_ERROR,
++                        "Invalid value of delta_poc: %d\n",
++                        delta_poc);
++                    return AVERROR_INVALIDDATA;
++                }
++                prev += delta_poc;
++                rps->delta_poc[rps->num_negative_pics + i] = prev;
++                rps->used[rps->num_negative_pics + i]      = get_bits1(gb);
++            }
++        }
++    }
++    return 0;
++}
++
++
++static int decode_profile_tier_level(GetBitContext * const gb, AVCodecContext * const avctx,
++                                      PTLCommon * const ptl)
++{
++    int i;
++
++    if (get_bits_left(gb) < 2+1+5 + 32 + 4 + 16 + 16 + 12)
++        return -1;
++
++    ptl->profile_space = get_bits(gb, 2);
++    ptl->tier_flag     = get_bits1(gb);
++    ptl->profile_idc   = get_bits(gb, 5);
++    if (ptl->profile_idc == FF_PROFILE_HEVC_MAIN)
++        av_log(avctx, AV_LOG_DEBUG, "Main profile bitstream\n");
++    else if (ptl->profile_idc == FF_PROFILE_HEVC_MAIN_10)
++        av_log(avctx, AV_LOG_DEBUG, "Main 10 profile bitstream\n");
++    else if (ptl->profile_idc == FF_PROFILE_HEVC_MAIN_STILL_PICTURE)
++        av_log(avctx, AV_LOG_DEBUG, "Main Still Picture profile bitstream\n");
++    else if (ptl->profile_idc == FF_PROFILE_HEVC_REXT)
++        av_log(avctx, AV_LOG_DEBUG, "Range Extension profile bitstream\n");
++    else
++        av_log(avctx, AV_LOG_WARNING, "Unknown HEVC profile: %d\n", ptl->profile_idc);
++
++    for (i = 0; i < 32; i++) {
++        ptl->profile_compatibility_flag[i] = get_bits1(gb);
++
++        if (ptl->profile_idc == 0 && i > 0 && ptl->profile_compatibility_flag[i])
++            ptl->profile_idc = i;
++    }
++    ptl->progressive_source_flag    = get_bits1(gb);
++    ptl->interlaced_source_flag     = get_bits1(gb);
++    ptl->non_packed_constraint_flag = get_bits1(gb);
++    ptl->frame_only_constraint_flag = get_bits1(gb);
++
++    skip_bits(gb, 16); // XXX_reserved_zero_44bits[0..15]
++    skip_bits(gb, 16); // XXX_reserved_zero_44bits[16..31]
++    skip_bits(gb, 12); // XXX_reserved_zero_44bits[32..43]
++
++    return 0;
++}
++
++static int parse_ptl(GetBitContext * const gb, AVCodecContext * const avctx,
++                      PTL * const ptl, const int max_num_sub_layers)
++{
++    int i;
++    if (decode_profile_tier_level(gb, avctx, &ptl->general_ptl) < 0 ||
++        get_bits_left(gb) < 8 + (8*2 * (max_num_sub_layers - 1 > 0))) {
++        av_log(avctx, AV_LOG_ERROR, "PTL information too short\n");
++        return -1;
++    }
++
++    ptl->general_ptl.level_idc = get_bits(gb, 8);
++
++    for (i = 0; i < max_num_sub_layers - 1; i++) {
++        ptl->sub_layer_profile_present_flag[i] = get_bits1(gb);
++        ptl->sub_layer_level_present_flag[i]   = get_bits1(gb);
++    }
++
++    if (max_num_sub_layers - 1> 0)
++        for (i = max_num_sub_layers - 1; i < 8; i++)
++            skip_bits(gb, 2); // reserved_zero_2bits[i]
++    for (i = 0; i < max_num_sub_layers - 1; i++) {
++        if (ptl->sub_layer_profile_present_flag[i] &&
++            decode_profile_tier_level(gb, avctx, &ptl->sub_layer_ptl[i]) < 0) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "PTL information for sublayer %i too short\n", i);
++            return -1;
++        }
++        if (ptl->sub_layer_level_present_flag[i]) {
++            if (get_bits_left(gb) < 8) {
++                av_log(avctx, AV_LOG_ERROR,
++                       "Not enough data for sublayer %i level_idc\n", i);
++                return -1;
++            } else
++                ptl->sub_layer_ptl[i].level_idc = get_bits(gb, 8);
++        }
++    }
++
++    return 0;
++}
++
++static void decode_sublayer_hrd(GetBitContext * const gb, const unsigned int nb_cpb,
++                                const int subpic_params_present)
++{
++    int i;
++
++    for (i = 0; i < nb_cpb; i++) {
++        get_ue_golomb_long(gb); // bit_rate_value_minus1
++        get_ue_golomb_long(gb); // cpb_size_value_minus1
++
++        if (subpic_params_present) {
++            get_ue_golomb_long(gb); // cpb_size_du_value_minus1
++            get_ue_golomb_long(gb); // bit_rate_du_value_minus1
++        }
++        skip_bits1(gb); // cbr_flag
++    }
++}
++
++static int decode_hrd(GetBitContext * const gb, const int common_inf_present,
++                      const int max_sublayers)
++{
++    int nal_params_present = 0, vcl_params_present = 0;
++    int subpic_params_present = 0;
++    int i;
++
++    if (common_inf_present) {
++        nal_params_present = get_bits1(gb);
++        vcl_params_present = get_bits1(gb);
++
++        if (nal_params_present || vcl_params_present) {
++            subpic_params_present = get_bits1(gb);
++
++            if (subpic_params_present) {
++                skip_bits(gb, 8); // tick_divisor_minus2
++                skip_bits(gb, 5); // du_cpb_removal_delay_increment_length_minus1
++                skip_bits(gb, 1); // sub_pic_cpb_params_in_pic_timing_sei_flag
++                skip_bits(gb, 5); // dpb_output_delay_du_length_minus1
++            }
++
++            skip_bits(gb, 4); // bit_rate_scale
++            skip_bits(gb, 4); // cpb_size_scale
++
++            if (subpic_params_present)
++                skip_bits(gb, 4);  // cpb_size_du_scale
++
++            skip_bits(gb, 5); // initial_cpb_removal_delay_length_minus1
++            skip_bits(gb, 5); // au_cpb_removal_delay_length_minus1
++            skip_bits(gb, 5); // dpb_output_delay_length_minus1
++        }
++    }
++
++    for (i = 0; i < max_sublayers; i++) {
++        int low_delay = 0;
++        unsigned int nb_cpb = 1;
++        int fixed_rate = get_bits1(gb);
++
++        if (!fixed_rate)
++            fixed_rate = get_bits1(gb);
++
++        if (fixed_rate)
++            get_ue_golomb_long(gb);  // elemental_duration_in_tc_minus1
++        else
++            low_delay = get_bits1(gb);
++
++        if (!low_delay) {
++            nb_cpb = get_ue_golomb_long(gb) + 1;
++            if (nb_cpb < 1 || nb_cpb > 32) {
++                av_log(NULL, AV_LOG_ERROR, "nb_cpb %d invalid\n", nb_cpb);
++                return AVERROR_INVALIDDATA;
++            }
++        }
++
++        if (nal_params_present)
++            decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
++        if (vcl_params_present)
++            decode_sublayer_hrd(gb, nb_cpb, subpic_params_present);
++    }
++    return 0;
++}
++
++int ff_hevc_rpi_decode_nal_vps(GetBitContext * const gb, AVCodecContext * const avctx,
++                           HEVCRpiParamSets * const ps)
++{
++    int i,j;
++    int vps_id = 0;
++    ptrdiff_t nal_size;
++    HEVCRpiVPS *vps;
++    AVBufferRef *vps_buf = av_buffer_allocz(sizeof(*vps));
++
++    if (!vps_buf)
++        return AVERROR(ENOMEM);
++    vps = (HEVCRpiVPS*)vps_buf->data;
++
++    av_log(avctx, AV_LOG_DEBUG, "Decoding VPS\n");
++
++    nal_size = gb->buffer_end - gb->buffer;
++    if (nal_size > sizeof(vps->data)) {
++        av_log(avctx, AV_LOG_WARNING, "Truncating likely oversized VPS "
++               "(%"PTRDIFF_SPECIFIER" > %"SIZE_SPECIFIER")\n",
++               nal_size, sizeof(vps->data));
++        vps->data_size = sizeof(vps->data);
++    } else {
++        vps->data_size = nal_size;
++    }
++    memcpy(vps->data, gb->buffer, vps->data_size);
++
++    vps_id = get_bits(gb, 4);
++    if (vps_id >= HEVC_MAX_VPS_COUNT) {
++        av_log(avctx, AV_LOG_ERROR, "VPS id out of range: %d\n", vps_id);
++        goto err;
++    }
++
++    if (get_bits(gb, 2) != 3) { // vps_reserved_three_2bits
++        av_log(avctx, AV_LOG_ERROR, "vps_reserved_three_2bits is not three\n");
++        goto err;
++    }
++
++    vps->vps_max_layers               = get_bits(gb, 6) + 1;
++    vps->vps_max_sub_layers           = get_bits(gb, 3) + 1;
++    vps->vps_temporal_id_nesting_flag = get_bits1(gb);
++
++    if (get_bits(gb, 16) != 0xffff) { // vps_reserved_ffff_16bits
++        av_log(avctx, AV_LOG_ERROR, "vps_reserved_ffff_16bits is not 0xffff\n");
++        goto err;
++    }
++
++    if (vps->vps_max_sub_layers > HEVC_MAX_SUB_LAYERS) {
++        av_log(avctx, AV_LOG_ERROR, "vps_max_sub_layers out of range: %d\n",
++               vps->vps_max_sub_layers);
++        goto err;
++    }
++
++    if (parse_ptl(gb, avctx, &vps->ptl, vps->vps_max_sub_layers) < 0)
++        goto err;
++
++    vps->vps_sub_layer_ordering_info_present_flag = get_bits1(gb);
++
++    i = vps->vps_sub_layer_ordering_info_present_flag ? 0 : vps->vps_max_sub_layers - 1;
++    for (; i < vps->vps_max_sub_layers; i++) {
++        vps->vps_max_dec_pic_buffering[i] = get_ue_golomb_long(gb) + 1;
++        vps->vps_num_reorder_pics[i]      = get_ue_golomb_long(gb);
++        vps->vps_max_latency_increase[i]  = get_ue_golomb_long(gb) - 1;
++
++        if (vps->vps_max_dec_pic_buffering[i] > HEVC_MAX_DPB_SIZE || !vps->vps_max_dec_pic_buffering[i]) {
++            av_log(avctx, AV_LOG_ERROR, "vps_max_dec_pic_buffering_minus1 out of range: %d\n",
++                   vps->vps_max_dec_pic_buffering[i] - 1);
++            goto err;
++        }
++        if (vps->vps_num_reorder_pics[i] > vps->vps_max_dec_pic_buffering[i] - 1) {
++            av_log(avctx, AV_LOG_WARNING, "vps_max_num_reorder_pics out of range: %d\n",
++                   vps->vps_num_reorder_pics[i]);
++            if (avctx->err_recognition & AV_EF_EXPLODE)
++                goto err;
++        }
++    }
++
++    vps->vps_max_layer_id   = get_bits(gb, 6);
++    vps->vps_num_layer_sets = get_ue_golomb_long(gb) + 1;
++    if (vps->vps_num_layer_sets < 1 || vps->vps_num_layer_sets > 1024 ||
++        (vps->vps_num_layer_sets - 1LL) * (vps->vps_max_layer_id + 1LL) > get_bits_left(gb)) {
++        av_log(avctx, AV_LOG_ERROR, "too many layer_id_included_flags\n");
++        goto err;
++    }
++
++    for (i = 1; i < vps->vps_num_layer_sets; i++)
++        for (j = 0; j <= vps->vps_max_layer_id; j++)
++            skip_bits(gb, 1);  // layer_id_included_flag[i][j]
++
++    vps->vps_timing_info_present_flag = get_bits1(gb);
++    if (vps->vps_timing_info_present_flag) {
++        vps->vps_num_units_in_tick               = get_bits_long(gb, 32);
++        vps->vps_time_scale                      = get_bits_long(gb, 32);
++        vps->vps_poc_proportional_to_timing_flag = get_bits1(gb);
++        if (vps->vps_poc_proportional_to_timing_flag)
++            vps->vps_num_ticks_poc_diff_one = get_ue_golomb_long(gb) + 1;
++        vps->vps_num_hrd_parameters = get_ue_golomb_long(gb);
++        if (vps->vps_num_hrd_parameters > (unsigned)vps->vps_num_layer_sets) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "vps_num_hrd_parameters %d is invalid\n", vps->vps_num_hrd_parameters);
++            goto err;
++        }
++        for (i = 0; i < vps->vps_num_hrd_parameters; i++) {
++            int common_inf_present = 1;
++
++            get_ue_golomb_long(gb); // hrd_layer_set_idx
++            if (i)
++                common_inf_present = get_bits1(gb);
++            decode_hrd(gb, common_inf_present, vps->vps_max_sub_layers);
++        }
++    }
++    get_bits1(gb); /* vps_extension_flag */
++
++    if (get_bits_left(gb) < 0) {
++        av_log(avctx, AV_LOG_ERROR,
++               "Overread VPS by %d bits\n", -get_bits_left(gb));
++        if (ps->vps_list[vps_id])
++            goto err;
++    }
++
++    if (ps->vps_list[vps_id] &&
++        !memcmp(ps->vps_list[vps_id]->data, vps_buf->data, vps_buf->size)) {
++        av_buffer_unref(&vps_buf);
++    } else {
++        remove_vps(ps, vps_id);
++        ps->vps_list[vps_id] = vps_buf;
++    }
++
++    return 0;
++
++err:
++    av_buffer_unref(&vps_buf);
++    return AVERROR_INVALIDDATA;
++}
++
++static void decode_vui(GetBitContext * const gb, AVCodecContext * const avctx,
++                       const int apply_defdispwin, HEVCRpiSPS * const sps)
++{
++    VUI backup_vui, * const vui = &sps->vui;
++    GetBitContext backup;
++    int sar_present, alt = 0;
++
++    av_log(avctx, AV_LOG_DEBUG, "Decoding VUI\n");
++
++    sar_present = get_bits1(gb);
++    if (sar_present) {
++        uint8_t sar_idx = get_bits(gb, 8);
++        if (sar_idx < FF_ARRAY_ELEMS(vui_sar))
++            vui->sar = vui_sar[sar_idx];
++        else if (sar_idx == 255) {
++            vui->sar.num = get_bits(gb, 16);
++            vui->sar.den = get_bits(gb, 16);
++        } else
++            av_log(avctx, AV_LOG_WARNING,
++                   "Unknown SAR index: %u.\n", sar_idx);
++    }
++
++    vui->overscan_info_present_flag = get_bits1(gb);
++    if (vui->overscan_info_present_flag)
++        vui->overscan_appropriate_flag = get_bits1(gb);
++
++    vui->video_signal_type_present_flag = get_bits1(gb);
++    if (vui->video_signal_type_present_flag) {
++        vui->video_format                    = get_bits(gb, 3);
++        vui->video_full_range_flag           = get_bits1(gb);
++        vui->colour_description_present_flag = get_bits1(gb);
++        if (vui->video_full_range_flag && sps->pix_fmt == AV_PIX_FMT_YUV420P)
++            sps->pix_fmt = AV_PIX_FMT_YUVJ420P;
++        if (vui->colour_description_present_flag) {
++            vui->colour_primaries        = get_bits(gb, 8);
++            vui->transfer_characteristic = get_bits(gb, 8);
++            vui->matrix_coeffs           = get_bits(gb, 8);
++
++            // Set invalid values to "unspecified"
++            if (!av_color_primaries_name(vui->colour_primaries))
++                vui->colour_primaries = AVCOL_PRI_UNSPECIFIED;
++            if (!av_color_transfer_name(vui->transfer_characteristic))
++                vui->transfer_characteristic = AVCOL_TRC_UNSPECIFIED;
++            if (!av_color_space_name(vui->matrix_coeffs))
++                vui->matrix_coeffs = AVCOL_SPC_UNSPECIFIED;
++            if (vui->matrix_coeffs == AVCOL_SPC_RGB) {
++                switch (sps->pix_fmt) {
++                case AV_PIX_FMT_YUV444P:
++                    sps->pix_fmt = AV_PIX_FMT_GBRP;
++                    break;
++                case AV_PIX_FMT_YUV444P10:
++                    sps->pix_fmt = AV_PIX_FMT_GBRP10;
++                    break;
++                case AV_PIX_FMT_YUV444P12:
++                    sps->pix_fmt = AV_PIX_FMT_GBRP12;
++                    break;
++                }
++            }
++        }
++    }
++
++    vui->chroma_loc_info_present_flag = get_bits1(gb);
++    if (vui->chroma_loc_info_present_flag) {
++        vui->chroma_sample_loc_type_top_field    = get_ue_golomb_long(gb);
++        vui->chroma_sample_loc_type_bottom_field = get_ue_golomb_long(gb);
++    }
++
++    vui->neutra_chroma_indication_flag = get_bits1(gb);
++    vui->field_seq_flag                = get_bits1(gb);
++    vui->frame_field_info_present_flag = get_bits1(gb);
++
++    // Backup context in case an alternate header is detected
++    memcpy(&backup, gb, sizeof(backup));
++    memcpy(&backup_vui, vui, sizeof(backup_vui));
++    if (get_bits_left(gb) >= 68 && show_bits_long(gb, 21) == 0x100000) {
++        vui->default_display_window_flag = 0;
++        av_log(avctx, AV_LOG_WARNING, "Invalid default display window\n");
++    } else
++        vui->default_display_window_flag = get_bits1(gb);
++
++    if (vui->default_display_window_flag) {
++        int vert_mult  = 1 + (sps->chroma_format_idc < 2);
++        int horiz_mult = 1 + (sps->chroma_format_idc < 3);
++        vui->def_disp_win.left_offset   = get_ue_golomb_long(gb) * horiz_mult;
++        vui->def_disp_win.right_offset  = get_ue_golomb_long(gb) * horiz_mult;
++        vui->def_disp_win.top_offset    = get_ue_golomb_long(gb) *  vert_mult;
++        vui->def_disp_win.bottom_offset = get_ue_golomb_long(gb) *  vert_mult;
++
++        if (apply_defdispwin &&
++            avctx->flags2 & AV_CODEC_FLAG2_IGNORE_CROP) {
++            av_log(avctx, AV_LOG_DEBUG,
++                   "discarding vui default display window, "
++                   "original values are l:%u r:%u t:%u b:%u\n",
++                   vui->def_disp_win.left_offset,
++                   vui->def_disp_win.right_offset,
++                   vui->def_disp_win.top_offset,
++                   vui->def_disp_win.bottom_offset);
++
++            vui->def_disp_win.left_offset   =
++            vui->def_disp_win.right_offset  =
++            vui->def_disp_win.top_offset    =
++            vui->def_disp_win.bottom_offset = 0;
++        }
++    }
++
++timing_info:
++    vui->vui_timing_info_present_flag = get_bits1(gb);
++
++    if (vui->vui_timing_info_present_flag) {
++        if( get_bits_left(gb) < 66 && !alt) {
++            // The alternate syntax seem to have timing info located
++            // at where def_disp_win is normally located
++            av_log(avctx, AV_LOG_WARNING,
++                   "Strange VUI timing information, retrying...\n");
++            memcpy(vui, &backup_vui, sizeof(backup_vui));
++            memcpy(gb, &backup, sizeof(backup));
++            alt = 1;
++            goto timing_info;
++        }
++        vui->vui_num_units_in_tick               = get_bits_long(gb, 32);
++        vui->vui_time_scale                      = get_bits_long(gb, 32);
++        if (alt) {
++            av_log(avctx, AV_LOG_INFO, "Retry got %"PRIu32"/%"PRIu32" fps\n",
++                   vui->vui_time_scale, vui->vui_num_units_in_tick);
++        }
++        vui->vui_poc_proportional_to_timing_flag = get_bits1(gb);
++        if (vui->vui_poc_proportional_to_timing_flag)
++            vui->vui_num_ticks_poc_diff_one_minus1 = get_ue_golomb_long(gb);
++        vui->vui_hrd_parameters_present_flag = get_bits1(gb);
++        if (vui->vui_hrd_parameters_present_flag)
++            decode_hrd(gb, 1, sps->max_sub_layers);
++    }
++
++    vui->bitstream_restriction_flag = get_bits1(gb);
++    if (vui->bitstream_restriction_flag) {
++        if (get_bits_left(gb) < 8 && !alt) {
++            av_log(avctx, AV_LOG_WARNING,
++                   "Strange VUI bitstream restriction information, retrying"
++                   " from timing information...\n");
++            memcpy(vui, &backup_vui, sizeof(backup_vui));
++            memcpy(gb, &backup, sizeof(backup));
++            alt = 1;
++            goto timing_info;
++        }
++        vui->tiles_fixed_structure_flag              = get_bits1(gb);
++        vui->motion_vectors_over_pic_boundaries_flag = get_bits1(gb);
++        vui->restricted_ref_pic_lists_flag           = get_bits1(gb);
++        vui->min_spatial_segmentation_idc            = get_ue_golomb_long(gb);
++        vui->max_bytes_per_pic_denom                 = get_ue_golomb_long(gb);
++        vui->max_bits_per_min_cu_denom               = get_ue_golomb_long(gb);
++        vui->log2_max_mv_length_horizontal           = get_ue_golomb_long(gb);
++        vui->log2_max_mv_length_vertical             = get_ue_golomb_long(gb);
++    }
++
++    if (get_bits_left(gb) < 1 && !alt) {
++        // XXX: Alternate syntax when sps_range_extension_flag != 0?
++        av_log(avctx, AV_LOG_WARNING,
++               "Overread in VUI, retrying from timing information...\n");
++        memcpy(vui, &backup_vui, sizeof(backup_vui));
++        memcpy(gb, &backup, sizeof(backup));
++        alt = 1;
++        goto timing_info;
++    }
++}
++
++static void set_default_scaling_list_data(ScalingList * const sl)
++{
++    int matrixId;
++
++    for (matrixId = 0; matrixId < 6; matrixId++) {
++        // 4x4 default is 16
++        memset(sl->sl[0][matrixId], 16, 16);
++        sl->sl_dc[0][matrixId] = 16; // default for 16x16
++        sl->sl_dc[1][matrixId] = 16; // default for 32x32
++    }
++
++    memcpy(sl->sl[1][0], default_scaling_list_intra, 64);
++    memcpy(sl->sl[1][1], default_scaling_list_intra, 64);
++    memcpy(sl->sl[1][2], default_scaling_list_intra, 64);
++
++    memcpy(sl->sl[1][3], default_scaling_list_inter, 64);
++    memcpy(sl->sl[1][4], default_scaling_list_inter, 64);
++    memcpy(sl->sl[1][5], default_scaling_list_inter, 64);
++
++    memcpy(sl->sl[2][0], default_scaling_list_intra, 64);
++    memcpy(sl->sl[2][1], default_scaling_list_intra, 64);
++    memcpy(sl->sl[2][2], default_scaling_list_intra, 64);
++
++    memcpy(sl->sl[2][3], default_scaling_list_inter, 64);
++    memcpy(sl->sl[2][4], default_scaling_list_inter, 64);
++    memcpy(sl->sl[2][5], default_scaling_list_inter, 64);
++
++    memcpy(sl->sl[3][0], default_scaling_list_intra, 64);
++    memcpy(sl->sl[3][1], default_scaling_list_intra, 64);
++    memcpy(sl->sl[3][2], default_scaling_list_intra, 64);
++
++    memcpy(sl->sl[3][3], default_scaling_list_inter, 64);
++    memcpy(sl->sl[3][4], default_scaling_list_inter, 64);
++    memcpy(sl->sl[3][5], default_scaling_list_inter, 64);
++}
++
++static int scaling_list_data(GetBitContext * const gb, AVCodecContext * const avctx, ScalingList * const sl,
++                             const HEVCRpiSPS * const sps)
++{
++    uint8_t scaling_list_pred_mode_flag;
++    int32_t scaling_list_dc_coef[2][6];
++    int size_id, matrix_id, pos;
++    int i;
++
++    for (size_id = 0; size_id < 4; size_id++)
++        for (matrix_id = 0; matrix_id < 6; matrix_id += ((size_id == 3) ? 3 : 1)) {
++            scaling_list_pred_mode_flag = get_bits1(gb);
++            if (!scaling_list_pred_mode_flag) {
++                unsigned int delta = get_ue_golomb_long(gb);
++                /* Only need to handle non-zero delta. Zero means default,
++                 * which should already be in the arrays. */
++                if (delta) {
++                    // Copy from previous array.
++                    delta *= (size_id == 3) ? 3 : 1;
++                    if (matrix_id < delta) {
++                        av_log(avctx, AV_LOG_ERROR,
++                               "Invalid delta in scaling list data: %d.\n", delta);
++                        return AVERROR_INVALIDDATA;
++                    }
++
++                    memcpy(sl->sl[size_id][matrix_id],
++                           sl->sl[size_id][matrix_id - delta],
++                           size_id > 0 ? 64 : 16);
++                    if (size_id > 1)
++                        sl->sl_dc[size_id - 2][matrix_id] = sl->sl_dc[size_id - 2][matrix_id - delta];
++                }
++            } else {
++                int next_coef, coef_num;
++                int32_t scaling_list_delta_coef;
++
++                next_coef = 8;
++                coef_num  = FFMIN(64, 1 << (4 + (size_id << 1)));
++                if (size_id > 1) {
++                    scaling_list_dc_coef[size_id - 2][matrix_id] = get_se_golomb(gb) + 8;
++                    next_coef = scaling_list_dc_coef[size_id - 2][matrix_id];
++                    sl->sl_dc[size_id - 2][matrix_id] = next_coef;
++                }
++                for (i = 0; i < coef_num; i++) {
++                    if (size_id == 0)
++                        pos = 4 * ff_hevc_rpi_diag_scan4x4_y[i] +
++                                  ff_hevc_rpi_diag_scan4x4_x[i];
++                    else
++                        pos = 8 * ff_hevc_rpi_diag_scan8x8_y[i] +
++                                  ff_hevc_rpi_diag_scan8x8_x[i];
++
++                    scaling_list_delta_coef = get_se_golomb(gb);
++                    next_coef = (next_coef + 256U + scaling_list_delta_coef) % 256;
++                    sl->sl[size_id][matrix_id][pos] = next_coef;
++                }
++            }
++        }
++
++    if (sps->chroma_format_idc == 3) {
++        for (i = 0; i < 64; i++) {
++            sl->sl[3][1][i] = sl->sl[2][1][i];
++            sl->sl[3][2][i] = sl->sl[2][2][i];
++            sl->sl[3][4][i] = sl->sl[2][4][i];
++            sl->sl[3][5][i] = sl->sl[2][5][i];
++        }
++        sl->sl_dc[1][1] = sl->sl_dc[0][1];
++        sl->sl_dc[1][2] = sl->sl_dc[0][2];
++        sl->sl_dc[1][4] = sl->sl_dc[0][4];
++        sl->sl_dc[1][5] = sl->sl_dc[0][5];
++    }
++
++
++    return 0;
++}
++
++static int map_pixel_format(HEVCRpiSPS * const sps)
++{
++    const int cfmt = sps->chroma_format_idc;
++
++    sps->pix_fmt = AV_PIX_FMT_NONE;
++    switch (sps->bit_depth) {
++    case 8:
++        if (cfmt == 1)
++            sps->pix_fmt = AV_PIX_FMT_SAND128;
++        break;
++    case 10:
++        if (cfmt == 1)
++            sps->pix_fmt = AV_PIX_FMT_SAND64_10;
++        break;
++    default:
++        break;
++    }
++
++    sps->hshift[0] = sps->vshift[0] = 0;
++    sps->hshift[2] = sps->hshift[1] = cfmt > 2 ? 0 : 1; // 1 unless 4:4:4
++    sps->vshift[2] = sps->vshift[1] = cfmt > 1 ? 0 : 1; // 1 unless 4:4:4 or 4:2:2
++
++    sps->pixel_shift = sps->bit_depth > 8 ? 1 : 0;
++
++    return 0;
++}
++
++static int ff_hevc_rpi_parse_sps(HEVCRpiSPS * const sps, GetBitContext * const gb, unsigned int * const sps_id,
++                      const int apply_defdispwin, AVBufferRef * const * const vps_list, AVCodecContext * const avctx)
++{
++    HEVCRpiWindow *ow;
++    int ret = 0;
++    int log2_diff_max_min_transform_block_size;
++    int bit_depth_chroma, start, vui_present, sublayer_ordering_info;
++    int i;
++
++    // Coded parameters
++
++    sps->vps_id = get_bits(gb, 4);
++    if (sps->vps_id >= HEVC_MAX_VPS_COUNT) {
++        av_log(avctx, AV_LOG_ERROR, "VPS id out of range: %d\n", sps->vps_id);
++        return AVERROR_INVALIDDATA;
++    }
++
++    if (vps_list && !vps_list[sps->vps_id]) {
++        av_log(avctx, AV_LOG_ERROR, "VPS %d does not exist\n",
++               sps->vps_id);
++        return AVERROR_INVALIDDATA;
++    }
++
++    sps->max_sub_layers = get_bits(gb, 3) + 1;
++    if (sps->max_sub_layers > HEVC_MAX_SUB_LAYERS) {
++        av_log(avctx, AV_LOG_ERROR, "sps_max_sub_layers out of range: %d\n",
++               sps->max_sub_layers);
++        return AVERROR_INVALIDDATA;
++    }
++
++    sps->temporal_id_nesting_flag = get_bits(gb, 1);
++
++    if ((ret = parse_ptl(gb, avctx, &sps->ptl, sps->max_sub_layers)) < 0)
++        return ret;
++
++    *sps_id = get_ue_golomb_long(gb);
++    if (*sps_id >= HEVC_MAX_SPS_COUNT) {
++        av_log(avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", *sps_id);
++        return AVERROR_INVALIDDATA;
++    }
++
++    sps->chroma_format_idc = get_ue_golomb_long(gb);
++    if (sps->chroma_format_idc > 3U) {
++        av_log(avctx, AV_LOG_ERROR, "chroma_format_idc %d is invalid\n", sps->chroma_format_idc);
++        return AVERROR_INVALIDDATA;
++    }
++
++    if (sps->chroma_format_idc == 3)
++        sps->separate_colour_plane_flag = get_bits1(gb);
++
++    if (sps->separate_colour_plane_flag)
++        sps->chroma_format_idc = 0;
++
++    sps->width  = get_ue_golomb_long(gb);
++    sps->height = get_ue_golomb_long(gb);
++    if ((ret = av_image_check_size(sps->width,
++                                   sps->height, 0, avctx)) < 0)
++        return ret;
++
++    if (get_bits1(gb)) { // pic_conformance_flag
++        int vert_mult  = 1 + (sps->chroma_format_idc < 2);
++        int horiz_mult = 1 + (sps->chroma_format_idc < 3);
++        sps->pic_conf_win.left_offset   = get_ue_golomb_long(gb) * horiz_mult;
++        sps->pic_conf_win.right_offset  = get_ue_golomb_long(gb) * horiz_mult;
++        sps->pic_conf_win.top_offset    = get_ue_golomb_long(gb) *  vert_mult;
++        sps->pic_conf_win.bottom_offset = get_ue_golomb_long(gb) *  vert_mult;
++
++        if (avctx->flags2 & AV_CODEC_FLAG2_IGNORE_CROP) {
++            av_log(avctx, AV_LOG_DEBUG,
++                   "discarding sps conformance window, "
++                   "original values are l:%u r:%u t:%u b:%u\n",
++                   sps->pic_conf_win.left_offset,
++                   sps->pic_conf_win.right_offset,
++                   sps->pic_conf_win.top_offset,
++                   sps->pic_conf_win.bottom_offset);
++
++            sps->pic_conf_win.left_offset   =
++            sps->pic_conf_win.right_offset  =
++            sps->pic_conf_win.top_offset    =
++            sps->pic_conf_win.bottom_offset = 0;
++        }
++        sps->output_window = sps->pic_conf_win;
++    }
++
++    sps->bit_depth   = get_ue_golomb_long(gb) + 8;
++    bit_depth_chroma = get_ue_golomb_long(gb) + 8;
++    if (sps->chroma_format_idc && bit_depth_chroma != sps->bit_depth) {
++        av_log(avctx, AV_LOG_ERROR,
++               "Luma bit depth (%d) is different from chroma bit depth (%d), "
++               "this is unsupported.\n",
++               sps->bit_depth, bit_depth_chroma);
++        return AVERROR_INVALIDDATA;
++    }
++
++    ret = map_pixel_format(sps);
++    if (ret < 0)
++        return ret;
++
++    sps->log2_max_poc_lsb = get_ue_golomb_long(gb) + 4;
++    if (sps->log2_max_poc_lsb > 16) {
++        av_log(avctx, AV_LOG_ERROR, "log2_max_pic_order_cnt_lsb_minus4 out range: %d\n",
++               sps->log2_max_poc_lsb - 4);
++        return AVERROR_INVALIDDATA;
++    }
++
++    sublayer_ordering_info = get_bits1(gb);
++    start = sublayer_ordering_info ? 0 : sps->max_sub_layers - 1;
++    for (i = start; i < sps->max_sub_layers; i++) {
++        sps->temporal_layer[i].max_dec_pic_buffering = get_ue_golomb_long(gb) + 1;
++        sps->temporal_layer[i].num_reorder_pics      = get_ue_golomb_long(gb);
++        sps->temporal_layer[i].max_latency_increase  = get_ue_golomb_long(gb) - 1;
++        if (sps->temporal_layer[i].max_dec_pic_buffering > (unsigned)HEVC_MAX_DPB_SIZE) {
++            av_log(avctx, AV_LOG_ERROR, "sps_max_dec_pic_buffering_minus1 out of range: %d\n",
++                   sps->temporal_layer[i].max_dec_pic_buffering - 1U);
++            return AVERROR_INVALIDDATA;
++        }
++        if (sps->temporal_layer[i].num_reorder_pics > sps->temporal_layer[i].max_dec_pic_buffering - 1) {
++            av_log(avctx, AV_LOG_WARNING, "sps_max_num_reorder_pics out of range: %d\n",
++                   sps->temporal_layer[i].num_reorder_pics);
++            if (avctx->err_recognition & AV_EF_EXPLODE ||
++                sps->temporal_layer[i].num_reorder_pics > HEVC_MAX_DPB_SIZE - 1) {
++                return AVERROR_INVALIDDATA;
++            }
++            sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[i].num_reorder_pics + 1;
++        }
++    }
++
++    if (!sublayer_ordering_info) {
++        for (i = 0; i < start; i++) {
++            sps->temporal_layer[i].max_dec_pic_buffering = sps->temporal_layer[start].max_dec_pic_buffering;
++            sps->temporal_layer[i].num_reorder_pics      = sps->temporal_layer[start].num_reorder_pics;
++            sps->temporal_layer[i].max_latency_increase  = sps->temporal_layer[start].max_latency_increase;
++        }
++    }
++
++    sps->log2_min_cb_size                    = get_ue_golomb_long(gb) + 3;
++    sps->log2_diff_max_min_coding_block_size = get_ue_golomb_long(gb);
++    sps->log2_min_tb_size                    = get_ue_golomb_long(gb) + 2;
++    log2_diff_max_min_transform_block_size   = get_ue_golomb_long(gb);
++    sps->log2_max_trafo_size                 = log2_diff_max_min_transform_block_size +
++                                               sps->log2_min_tb_size;
++
++    if (sps->log2_min_cb_size < 3 || sps->log2_min_cb_size > 30) {
++        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_min_cb_size", sps->log2_min_cb_size);
++        return AVERROR_INVALIDDATA;
++    }
++
++    if (sps->log2_diff_max_min_coding_block_size > 30) {
++        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_coding_block_size", sps->log2_diff_max_min_coding_block_size);
++        return AVERROR_INVALIDDATA;
++    }
++
++    if (sps->log2_min_tb_size >= sps->log2_min_cb_size || sps->log2_min_tb_size < 2) {
++        av_log(avctx, AV_LOG_ERROR, "Invalid value for log2_min_tb_size");
++        return AVERROR_INVALIDDATA;
++    }
++
++    if (log2_diff_max_min_transform_block_size < 0 || log2_diff_max_min_transform_block_size > 30) {
++        av_log(avctx, AV_LOG_ERROR, "Invalid value %d for log2_diff_max_min_transform_block_size", log2_diff_max_min_transform_block_size);
++        return AVERROR_INVALIDDATA;
++    }
++
++    {
++        const unsigned int CtbLog2SizeY = sps->log2_min_cb_size + sps->log2_diff_max_min_coding_block_size;
++        // Not a bitstream limitation, but all profiles
++        if (CtbLog2SizeY < 4 || CtbLog2SizeY > HEVC_MAX_LOG2_CTB_SIZE) {
++            av_log(avctx, AV_LOG_ERROR, "Invalid value %d for CtbLog2SizeY", CtbLog2SizeY);
++            return AVERROR_INVALIDDATA;
++        }
++
++        if (sps->log2_max_trafo_size > FFMIN(5, CtbLog2SizeY)) {
++            av_log(avctx, AV_LOG_ERROR, "Invalid value %d for MaxTbLog2SizeY", sps->log2_max_trafo_size);
++            return AVERROR_INVALIDDATA;
++        }
++
++        // Inferred parameters
++        sps->log2_ctb_size = CtbLog2SizeY;
++//        sps->log2_min_pu_size = sps->log2_min_cb_size - 1;
++    }
++
++    sps->max_transform_hierarchy_depth_inter = get_ue_golomb_long(gb);
++    sps->max_transform_hierarchy_depth_intra = get_ue_golomb_long(gb);
++
++    sps->scaling_list_enable_flag = get_bits1(gb);
++    if (sps->scaling_list_enable_flag) {
++        set_default_scaling_list_data(&sps->scaling_list);
++
++        if (get_bits1(gb)) {
++            ret = scaling_list_data(gb, avctx, &sps->scaling_list, sps);
++            if (ret < 0)
++                return ret;
++        }
++    }
++
++    sps->amp_enabled_flag = get_bits1(gb);
++    sps->sao_enabled      = get_bits1(gb);
++
++    // Set pcm defaults (0) so we don't have to test _enabled when we
++    // want to use them
++    memset(&sps->pcm, 0, sizeof(sps->pcm));
++
++    if (get_bits1(gb))  // pcm_enabled_flag
++    {
++        const unsigned int limit_max_pcm = FFMIN(5,
++            sps->log2_min_cb_size + sps->log2_diff_max_min_coding_block_size);
++        sps->pcm.bit_depth   = get_bits(gb, 4) + 1;
++        sps->pcm.bit_depth_chroma = get_bits(gb, 4) + 1;
++        sps->pcm.log2_min_pcm_cb_size = get_ue_golomb_long(gb) + 3;
++        sps->pcm.log2_max_pcm_cb_size = sps->pcm.log2_min_pcm_cb_size +
++                                        get_ue_golomb_long(gb);
++        if (FFMAX(sps->pcm.bit_depth, sps->pcm.bit_depth_chroma) > sps->bit_depth) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "PCM bit depth (%d, %d) is greater than normal bit depth (%d)\n",
++                   sps->pcm.bit_depth, sps->pcm.bit_depth_chroma, sps->bit_depth);
++            return AVERROR_INVALIDDATA;
++        }
++        if (sps->pcm.log2_min_pcm_cb_size < sps->log2_min_cb_size ||
++            sps->pcm.log2_max_pcm_cb_size > limit_max_pcm) {
++            av_log(avctx, AV_LOG_ERROR, "Bad PCM CB min/max size (%d->%d)",
++                   sps->pcm.log2_min_pcm_cb_size, sps->pcm.log2_max_pcm_cb_size);
++            return AVERROR_INVALIDDATA;
++        }
++
++        sps->pcm.loop_filter_disable_flag = get_bits1(gb);
++    }
++
++    // Could be based on min_pcm_cb_size but much easier logic if we just stick
++    // with 8 (and costs us little)
++    sps->pcm_width = (sps->width + 63) >> 6;  // 8 for min size, 8 bits per byte - round up
++    sps->pcm_height = (sps->height + 7) >> 3;
++
++    sps->nb_st_rps = get_ue_golomb_long(gb);
++    if (sps->nb_st_rps > HEVC_MAX_SHORT_TERM_REF_PIC_SETS) {
++        av_log(avctx, AV_LOG_ERROR, "Too many short term RPS: %d.\n",
++               sps->nb_st_rps);
++        return AVERROR_INVALIDDATA;
++    }
++    for (i = 0; i < sps->nb_st_rps; i++) {
++        if ((ret = ff_hevc_rpi_decode_short_term_rps(gb, avctx, &sps->st_rps[i],
++                                                 sps, 0)) < 0)
++            return ret;
++    }
++
++    sps->long_term_ref_pics_present_flag = get_bits1(gb);
++    if (sps->long_term_ref_pics_present_flag) {
++        sps->num_long_term_ref_pics_sps = get_ue_golomb_long(gb);
++        if (sps->num_long_term_ref_pics_sps > HEVC_MAX_LONG_TERM_REF_PICS) {
++            av_log(avctx, AV_LOG_ERROR, "num_long_term_ref_pics_sps %d is out of range.\n",
++                   sps->num_long_term_ref_pics_sps);
++            return AVERROR_INVALIDDATA;
++        }
++        for (i = 0; i < sps->num_long_term_ref_pics_sps; i++) {
++            sps->lt_ref_pic_poc_lsb_sps[i]       = get_bits(gb, sps->log2_max_poc_lsb);
++            sps->used_by_curr_pic_lt_sps_flag[i] = get_bits1(gb);
++        }
++    }
++
++    sps->sps_temporal_mvp_enabled_flag          = get_bits1(gb);
++    sps->intra_filters_disable = get_bits1(gb) ? 0 : FILTER_STRONG; // sps->sps_strong_intra_smoothing_enable_flag
++    sps->vui.sar = (AVRational){0, 1};
++    vui_present = get_bits1(gb);
++    if (vui_present)
++        decode_vui(gb, avctx, apply_defdispwin, sps);
++
++    if (get_bits1(gb)) { // sps_extension_flag
++        int sps_extension_flag[1];
++        for (i = 0; i < 1; i++)
++            sps_extension_flag[i] = get_bits1(gb);
++        skip_bits(gb, 7); //sps_extension_7bits = get_bits(gb, 7);
++        if (sps_extension_flag[0]) {
++            int extended_precision_processing_flag;
++            int cabac_bypass_alignment_enabled_flag;
++
++            sps->transform_skip_rotation_enabled_flag = get_bits1(gb);
++            sps->transform_skip_context_enabled_flag  = get_bits1(gb);
++            sps->implicit_rdpcm_enabled_flag = get_bits1(gb);
++
++            sps->explicit_rdpcm_enabled_flag = get_bits1(gb);
++
++            extended_precision_processing_flag = get_bits1(gb);
++            if (extended_precision_processing_flag)
++                av_log(avctx, AV_LOG_WARNING,
++                   "extended_precision_processing_flag not yet implemented\n");
++
++            if (get_bits1(gb))          // sps->intra_smoothing_disabled_flag
++                sps->intra_filters_disable |= FILTER_EITHER;
++            sps->high_precision_offsets_enabled_flag = get_bits1(gb);
++            sps->persistent_rice_adaptation_enabled_flag = get_bits1(gb);
++
++            cabac_bypass_alignment_enabled_flag  = get_bits1(gb);
++            if (cabac_bypass_alignment_enabled_flag)
++                av_log(avctx, AV_LOG_WARNING,
++                   "cabac_bypass_alignment_enabled_flag not yet implemented\n");
++        }
++    }
++    if (apply_defdispwin) {
++        sps->output_window.left_offset   += sps->vui.def_disp_win.left_offset;
++        sps->output_window.right_offset  += sps->vui.def_disp_win.right_offset;
++        sps->output_window.top_offset    += sps->vui.def_disp_win.top_offset;
++        sps->output_window.bottom_offset += sps->vui.def_disp_win.bottom_offset;
++    }
++
++    ow = &sps->output_window;
++    if (ow->left_offset >= INT_MAX - ow->right_offset     ||
++        ow->top_offset  >= INT_MAX - ow->bottom_offset    ||
++        ow->left_offset + ow->right_offset  >= sps->width ||
++        ow->top_offset  + ow->bottom_offset >= sps->height) {
++        av_log(avctx, AV_LOG_WARNING, "Invalid cropping offsets: %u/%u/%u/%u\n",
++               ow->left_offset, ow->right_offset, ow->top_offset, ow->bottom_offset);
++        if (avctx->err_recognition & AV_EF_EXPLODE) {
++            return AVERROR_INVALIDDATA;
++        }
++        av_log(avctx, AV_LOG_WARNING,
++               "Displaying the whole video surface.\n");
++        memset(ow, 0, sizeof(*ow));
++        memset(&sps->pic_conf_win, 0, sizeof(sps->pic_conf_win));
++    }
++
++    // Inferred parameters
++
++    sps->ctb_width  = (sps->width  + (1 << sps->log2_ctb_size) - 1) >> sps->log2_ctb_size;
++    sps->ctb_height = (sps->height + (1 << sps->log2_ctb_size) - 1) >> sps->log2_ctb_size;
++    sps->ctb_size   = sps->ctb_width * sps->ctb_height;
++
++    sps->min_cb_width  = sps->width  >> sps->log2_min_cb_size;
++    sps->min_cb_height = sps->height >> sps->log2_min_cb_size;
++    sps->min_tb_width  = sps->width  >> sps->log2_min_tb_size;
++    sps->min_tb_height = sps->height >> sps->log2_min_tb_size;
++    sps->min_pu_width  = sps->width  >> LOG2_MIN_PU_SIZE;
++    sps->min_pu_height = sps->height >> LOG2_MIN_PU_SIZE;
++    sps->tb_mask       = (1 << (sps->log2_ctb_size - sps->log2_min_tb_size)) - 1;
++
++    sps->qp_bd_offset = 6 * (sps->bit_depth - 8);
++    sps->wp_offset_half_range = (1U << (sps->high_precision_offsets_enabled_flag ? sps->bit_depth - 1 : 7));
++
++    if (av_mod_uintp2(sps->width, sps->log2_min_cb_size) ||
++        av_mod_uintp2(sps->height, sps->log2_min_cb_size)) {
++        av_log(avctx, AV_LOG_ERROR, "Invalid coded frame dimensions.\n");
++        return AVERROR_INVALIDDATA;
++    }
++
++    if (sps->max_transform_hierarchy_depth_inter > sps->log2_ctb_size - sps->log2_min_tb_size) {
++        av_log(avctx, AV_LOG_ERROR, "max_transform_hierarchy_depth_inter out of range: %d\n",
++               sps->max_transform_hierarchy_depth_inter);
++        return AVERROR_INVALIDDATA;
++    }
++    if (sps->max_transform_hierarchy_depth_intra > sps->log2_ctb_size - sps->log2_min_tb_size) {
++        av_log(avctx, AV_LOG_ERROR, "max_transform_hierarchy_depth_intra out of range: %d\n",
++               sps->max_transform_hierarchy_depth_intra);
++        return AVERROR_INVALIDDATA;
++    }
++    if (sps->log2_max_trafo_size > FFMIN(sps->log2_ctb_size, 5)) {
++        av_log(avctx, AV_LOG_ERROR,
++               "max transform block size out of range: %d\n",
++               sps->log2_max_trafo_size);
++        return AVERROR_INVALIDDATA;
++    }
++
++    if (get_bits_left(gb) < 0) {
++        av_log(avctx, AV_LOG_ERROR,
++               "Overread SPS by %d bits\n", -get_bits_left(gb));
++        return AVERROR_INVALIDDATA;
++    }
++
++    return 0;
++}
++
++int ff_hevc_rpi_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
++                           HEVCRpiParamSets *ps, int apply_defdispwin)
++{
++    HEVCRpiSPS *sps;
++    AVBufferRef *sps_buf = av_buffer_allocz(sizeof(*sps));
++    unsigned int sps_id;
++    int ret;
++    ptrdiff_t nal_size;
++
++    if (!sps_buf)
++        return AVERROR(ENOMEM);
++    sps = (HEVCRpiSPS*)sps_buf->data;
++
++    av_log(avctx, AV_LOG_DEBUG, "Decoding SPS\n");
++
++    nal_size = gb->buffer_end - gb->buffer;
++    if (nal_size > sizeof(sps->data)) {
++        av_log(avctx, AV_LOG_WARNING, "Truncating likely oversized SPS "
++               "(%"PTRDIFF_SPECIFIER" > %"SIZE_SPECIFIER")\n",
++               nal_size, sizeof(sps->data));
++        sps->data_size = sizeof(sps->data);
++    } else {
++        sps->data_size = nal_size;
++    }
++    memcpy(sps->data, gb->buffer, sps->data_size);
++
++    ret = ff_hevc_rpi_parse_sps(sps, gb, &sps_id,
++                            apply_defdispwin,
++                            ps->vps_list, avctx);
++    if (ret < 0) {
++        av_buffer_unref(&sps_buf);
++        return ret;
++    }
++
++    if (avctx->debug & FF_DEBUG_BITSTREAM) {
++        av_log(avctx, AV_LOG_DEBUG,
++               "Parsed SPS: id %d; coded wxh: %dx%d; "
++               "cropped wxh: %dx%d; pix_fmt: %s.\n",
++               sps_id, sps->width, sps->height,
++               sps->width - (sps->output_window.left_offset + sps->output_window.right_offset),
++               sps->height - (sps->output_window.top_offset + sps->output_window.bottom_offset),
++               av_get_pix_fmt_name(sps->pix_fmt));
++    }
++
++    /* check if this is a repeat of an already parsed SPS, then keep the
++     * original one.
++     * otherwise drop all PPSes that depend on it */
++    if (ps->sps_list[sps_id] &&
++        !memcmp(ps->sps_list[sps_id]->data, sps_buf->data, sps_buf->size)) {
++        av_buffer_unref(&sps_buf);
++    } else {
++        remove_sps(ps, sps_id);
++        ps->sps_list[sps_id] = sps_buf;
++    }
++
++    return 0;
++}
++
++static void hevc_pps_free(void *opaque, uint8_t *data)
++{
++    HEVCRpiPPS *pps = (HEVCRpiPPS*)data;
++
++    av_freep(&pps->column_width);
++    av_freep(&pps->row_height);
++    av_freep(&pps->col_bd);
++    av_freep(&pps->row_bd);
++    av_freep(&pps->col_idxX);
++    av_freep(&pps->ctb_addr_rs_to_ts);
++    av_freep(&pps->ctb_addr_ts_to_rs);
++    av_freep(&pps->tile_pos_ts);
++    av_freep(&pps->tile_size);
++    av_freep(&pps->tile_id);
++    av_freep(&pps->ctb_ts_flags);
++
++    av_freep(&pps);
++}
++
++static int get_offset_list(GetBitContext * const gb, AVCodecContext * const avctx, unsigned int n_minus_1, int8_t * offsets)
++{
++    do
++    {
++        const int offset = get_se_golomb_long(gb);
++        if (offset < -12 || offset > 12) {
++            av_log(avctx, AV_LOG_ERROR, "qp_offset_list[]: %d out of range\n", offset);
++            return AVERROR_INVALIDDATA;
++        }
++        *offsets++ = offset;
++    } while (n_minus_1-- != 0);
++    return 0;
++}
++
++static int pps_range_extensions(GetBitContext * const gb, AVCodecContext * const avctx,
++                                HEVCRpiPPS * const pps, const HEVCRpiSPS * const sps)
++{
++    if (pps->transform_skip_enabled_flag) {
++        pps->log2_max_transform_skip_block_size = get_ue_golomb_long(gb) + 2;
++    }
++    pps->cross_component_prediction_enabled_flag = get_bits1(gb);
++    if (pps->cross_component_prediction_enabled_flag &&
++        (sps->chroma_format_idc != 3 || sps->separate_colour_plane_flag))
++    {
++        av_log(avctx, AV_LOG_ERROR, "cross_component_prediction_enabled but chroma_format_idc != 3\n");
++        return AVERROR_INVALIDDATA;
++    }
++    pps->chroma_qp_offset_list_enabled_flag = get_bits1(gb);
++    if (pps->chroma_qp_offset_list_enabled_flag) {
++        int err;
++
++        pps->diff_cu_chroma_qp_offset_depth = get_ue_golomb_long(gb);
++        pps->chroma_qp_offset_list_len_minus1 = get_ue_golomb_long(gb);
++        if (pps->chroma_qp_offset_list_len_minus1 > 5) {
++            av_log(avctx, AV_LOG_ERROR,
++                   "chroma_qp_offset_list_len_minus1 shall be in the range [0, 5].\n");
++            return AVERROR_INVALIDDATA;
++        }
++        av_log(avctx, AV_LOG_WARNING, "cb_qp_offset_list not tested yet.\n");
++
++        if ((err = get_offset_list(gb, avctx, pps->chroma_qp_offset_list_len_minus1, pps->cb_qp_offset_list)) != 0 ||
++            (err = get_offset_list(gb, avctx, pps->chroma_qp_offset_list_len_minus1, pps->cr_qp_offset_list)) != 0)
++            return err;
++    }
++
++    {
++        const unsigned int max_offset = sps->bit_depth > 10 ? sps->bit_depth - 10 : 0;
++
++        pps->log2_sao_offset_scale_luma = get_ue_golomb_long(gb);
++        if (pps->log2_sao_offset_scale_luma > max_offset) {
++            av_log(avctx, AV_LOG_ERROR, "log2_sao_offset_scale_luma invalid");
++            return AVERROR_INVALIDDATA;
++        }
++        pps->log2_sao_offset_scale_chroma = get_ue_golomb_long(gb);
++        if (pps->log2_sao_offset_scale_chroma > max_offset) {
++            av_log(avctx, AV_LOG_ERROR, "log2_sao_offset_scale_chroma invalid");
++            return AVERROR_INVALIDDATA;
++        }
++    }
++
++    return(0);
++}
++
++static inline int setup_pps(AVCodecContext * const avctx,
++                            HEVCRpiPPS * const pps, const HEVCRpiSPS * const sps)
++{
++    int pic_area_in_ctbs;
++    int i, j, x, y, ctb_addr_rs, tile_id;
++
++    // Inferred parameters
++
++    // qp_y -> qp_u/qp_v tables
++    // The tables have at least -24,+24 overrun after adding offset here
++    // which should allow for clipless offseting
++
++    pps->qp_dblk_x[0] = qp_c_dblk_0 + QP_DBLK_OFFSET_0;  // No offset for luma, but may be useful for general code
++    pps->qp_bd_x[0] = qp_c_bd_0[sps->bit_depth - 8] + QP_OFFSET_0;
++
++    if (sps->chroma_format_idc == 1) {
++        pps->qp_dblk_x[1] = qp_c_dblk_1 + pps->cb_qp_offset + QP_DBLK_OFFSET_0;
++        pps->qp_bd_x[1] = qp_c_bd_1[sps->bit_depth - 8] + pps->cb_qp_offset + QP_OFFSET_0;
++        pps->qp_dblk_x[2] = qp_c_dblk_1 + pps->cr_qp_offset + QP_DBLK_OFFSET_0;
++        pps->qp_bd_x[2] = qp_c_bd_1[sps->bit_depth - 8] + pps->cr_qp_offset + QP_OFFSET_0;
++    }
++    else
++    {
++        pps->qp_dblk_x[1] = qp_c_dblk_0 + pps->cb_qp_offset + QP_DBLK_OFFSET_0;
++        pps->qp_bd_x[1] = qp_c_bd_0[sps->bit_depth - 8] + pps->cb_qp_offset + QP_OFFSET_0;
++        pps->qp_dblk_x[2] = qp_c_dblk_0 + pps->cr_qp_offset + QP_DBLK_OFFSET_0;
++        pps->qp_bd_x[2] = qp_c_bd_0[sps->bit_depth - 8] + pps->cr_qp_offset + QP_OFFSET_0;
++    }
++
++    pps->col_bd   = av_malloc_array(pps->num_tile_columns + 1, sizeof(*pps->col_bd));
++    pps->row_bd   = av_malloc_array(pps->num_tile_rows + 1,    sizeof(*pps->row_bd));
++    pps->col_idxX = av_malloc_array(sps->ctb_width,    sizeof(*pps->col_idxX));
++    if (!pps->col_bd || !pps->row_bd || !pps->col_idxX)
++        return AVERROR(ENOMEM);
++
++    if (pps->uniform_spacing_flag) {
++        if (!pps->column_width) {
++            pps->column_width = av_malloc_array(pps->num_tile_columns, sizeof(*pps->column_width));
++            pps->row_height   = av_malloc_array(pps->num_tile_rows,    sizeof(*pps->row_height));
++        }
++        if (!pps->column_width || !pps->row_height)
++            return AVERROR(ENOMEM);
++
++        for (i = 0; i < pps->num_tile_columns; i++) {
++            pps->column_width[i] = ((i + 1) * sps->ctb_width) / pps->num_tile_columns -
++                                   (i * sps->ctb_width) / pps->num_tile_columns;
++        }
++
++        for (i = 0; i < pps->num_tile_rows; i++) {
++            pps->row_height[i] = ((i + 1) * sps->ctb_height) / pps->num_tile_rows -
++                                 (i * sps->ctb_height) / pps->num_tile_rows;
++        }
++    }
++
++    {
++        const unsigned int td_mask = 63 >> (sps->log2_ctb_size + sps->pixel_shift);
++        pps->col_bd[0] = 0;
++        pps->tile_wpp_inter_disable = 0;
++        for (i = 0; i < pps->num_tile_columns; i++)
++        {
++            pps->col_bd[i + 1] = pps->col_bd[i] + pps->column_width[i];
++
++            // Avoid trying tile parallel if the columns don't fall on cache boundries
++            // (this causes too much pain syncing flushes with the QPU)
++            // Ignore the final (RHS of pic) tile boundry
++            if ((pps->col_bd[i] & td_mask) != 0) {
++                pps->tile_wpp_inter_disable = 1;
++            }
++        }
++
++        // If we can start the next row before finishing the first line of
++        // this one then we must wait at the end of the tile
++        // * if this happens a lot then there are better but more complicated
++        //   conditions that we could apply
++        if (pps->tile_wpp_inter_disable) {
++            for (i = 0; i < pps->num_tile_rows; i++)
++            {
++                if (pps->row_height[i] <= RPI_MAX_JOBS) {
++                    pps->tile_wpp_inter_disable = 2;
++                    break;
++                }
++            }
++        }
++    }
++
++    pps->row_bd[0] = 0;
++    for (i = 0; i < pps->num_tile_rows; i++)
++        pps->row_bd[i + 1] = pps->row_bd[i] + pps->row_height[i];
++
++    for (i = 0, j = 0; i < sps->ctb_width; i++) {
++        if (i >= pps->col_bd[j + 1])
++            j++;
++        pps->col_idxX[i] = j;
++    }
++
++    /**
++     * 6.5
++     */
++    pic_area_in_ctbs     = sps->ctb_size;
++
++    pps->ctb_addr_rs_to_ts = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_rs_to_ts));
++    pps->ctb_addr_ts_to_rs = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_addr_ts_to_rs));
++    pps->tile_id           = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->tile_id));
++    pps->tile_size         = av_malloc_array(pps->num_tile_columns * pps->num_tile_rows, sizeof(*pps->tile_size));
++    pps->tile_pos_ts       = av_malloc_array(pps->num_tile_columns * pps->num_tile_rows, sizeof(*pps->tile_pos_ts));
++    pps->ctb_ts_flags      = av_malloc_array(pic_area_in_ctbs,    sizeof(*pps->ctb_ts_flags));
++    if (!pps->ctb_addr_rs_to_ts || !pps->ctb_addr_ts_to_rs ||
++        !pps->tile_id || pps->tile_pos_ts == NULL || pps->tile_size == NULL) {
++        return AVERROR(ENOMEM);
++    }
++
++    memset(pps->ctb_ts_flags, 0, pic_area_in_ctbs * sizeof(*pps->ctb_ts_flags));
++
++    for (ctb_addr_rs = 0; ctb_addr_rs < pic_area_in_ctbs; ctb_addr_rs++) {
++        int tb_x   = ctb_addr_rs % sps->ctb_width;
++        int tb_y   = ctb_addr_rs / sps->ctb_width;
++        int tile_x = 0;
++        int tile_y = 0;
++        int val    = 0;
++
++        for (i = 0; i < pps->num_tile_columns; i++) {
++            if (tb_x < pps->col_bd[i + 1]) {
++                tile_x = i;
++                break;
++            }
++        }
++
++        for (i = 0; i < pps->num_tile_rows; i++) {
++            if (tb_y < pps->row_bd[i + 1]) {
++                tile_y = i;
++                break;
++            }
++        }
++
++        for (i = 0; i < tile_x; i++)
++            val += pps->row_height[tile_y] * pps->column_width[i];
++        for (i = 0; i < tile_y; i++)
++            val += sps->ctb_width * pps->row_height[i];
++
++        val += (tb_y - pps->row_bd[tile_y]) * pps->column_width[tile_x] +
++               tb_x - pps->col_bd[tile_x];
++
++        pps->ctb_addr_rs_to_ts[ctb_addr_rs] = val;
++        pps->ctb_addr_ts_to_rs[val]         = ctb_addr_rs;
++    }
++
++    {
++        uint8_t * pflags = pps->ctb_ts_flags;
++        uint16_t * ptid = pps->tile_id;
++
++        for (j = 0, tile_id = 0; j < pps->num_tile_rows; j++)
++        {
++            for (i = 0; i < pps->num_tile_columns; i++, tile_id++)
++            {
++                const unsigned int tile_w = pps->column_width[i];
++
++                pflags[0] |= CTB_TS_FLAGS_CIREQ;
++
++                for (x = 0; x != tile_w; ++x) {
++                    pflags[x] |= CTB_TS_FLAGS_TOT;
++                }
++
++                for (y = pps->row_bd[j]; y < pps->row_bd[j + 1]; y++)
++                {
++                    pflags[0] |= CTB_TS_FLAGS_SOTL;
++
++                    if (pps->entropy_coding_sync_enabled_flag)
++                    {
++                        if (pps->column_width[i] != 1)
++                            pflags[1] |= CTB_TS_FLAGS_CSAVE;
++                        else
++                            pflags[0] |= CTB_TS_FLAGS_CIREQ;
++
++                        if ((pflags[0] & CTB_TS_FLAGS_CIREQ) == 0)
++                            pflags[0] |= CTB_TS_FLAGS_CLOAD;
++                    }
++
++                    for (x = 0; x != tile_w; ++x)
++                        *ptid++ = tile_id;
++
++                    pflags += tile_w;
++                    pflags[-1] |= CTB_TS_FLAGS_EOTL;
++                    if (i + 1 == pps->num_tile_columns)
++                        pflags[-1] |= CTB_TS_FLAGS_EOL;
++                }
++
++                pflags[-1] |= CTB_TS_FLAGS_EOT;
++            }
++        }
++    }
++
++    {
++        unsigned int ts = 0;
++        for (j = 0; j < pps->num_tile_rows; j++)
++            for (i = 0; i < pps->num_tile_columns; i++)
++            {
++                const unsigned int size = pps->column_width[i] * pps->row_height[j];
++                pps->tile_size[j * pps->num_tile_columns + i] = size;
++                pps->tile_pos_ts[j * pps->num_tile_columns + i] = ts;
++                ts += size;
++            }
++    }
++
++    return 0;
++}
++
++int ff_hevc_rpi_decode_nal_pps(GetBitContext * const gb, AVCodecContext * const avctx,
++                           HEVCRpiParamSets * const ps)
++{
++    const HEVCRpiSPS *sps = NULL;
++    int i, ret = 0;
++    unsigned int pps_id = 0;
++    ptrdiff_t nal_size;
++    unsigned log2_parallel_merge_level_minus2;
++
++    AVBufferRef *pps_buf;
++    HEVCRpiPPS *pps = av_mallocz(sizeof(*pps));
++
++    if (!pps)
++        return AVERROR(ENOMEM);
++
++    pps_buf = av_buffer_create((uint8_t *)pps, sizeof(*pps),
++                               hevc_pps_free, NULL, 0);
++    if (!pps_buf) {
++        av_freep(&pps);
++        return AVERROR(ENOMEM);
++    }
++
++    av_log(avctx, AV_LOG_DEBUG, "Decoding PPS\n");
++
++    nal_size = gb->buffer_end - gb->buffer;
++    if (nal_size > sizeof(pps->data)) {
++        av_log(avctx, AV_LOG_WARNING, "Truncating likely oversized PPS "
++               "(%"PTRDIFF_SPECIFIER" > %"SIZE_SPECIFIER")\n",
++               nal_size, sizeof(pps->data));
++        pps->data_size = sizeof(pps->data);
++    } else {
++        pps->data_size = nal_size;
++    }
++    memcpy(pps->data, gb->buffer, pps->data_size);
++
++    // Default values
++    pps->loop_filter_across_tiles_enabled_flag = 1;
++    pps->num_tile_columns                      = 1;
++    pps->num_tile_rows                         = 1;
++    pps->uniform_spacing_flag                  = 1;
++    pps->disable_dbf                           = 0;
++    pps->beta_offset                           = 0;
++    pps->tc_offset                             = 0;
++    pps->log2_max_transform_skip_block_size    = 2;
++
++    // Coded parameters
++    pps_id = get_ue_golomb_long(gb);
++    if (pps_id >= HEVC_MAX_PPS_COUNT) {
++        av_log(avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", pps_id);
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++    pps->sps_id = get_ue_golomb_long(gb);
++    if (pps->sps_id >= HEVC_MAX_SPS_COUNT) {
++        av_log(avctx, AV_LOG_ERROR, "SPS id out of range: %d\n", pps->sps_id);
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++    if (!ps->sps_list[pps->sps_id]) {
++        av_log(avctx, AV_LOG_ERROR, "SPS %u does not exist.\n", pps->sps_id);
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++    sps = (HEVCRpiSPS *)ps->sps_list[pps->sps_id]->data;
++
++    pps->dependent_slice_segments_enabled_flag = get_bits1(gb);
++    pps->output_flag_present_flag              = get_bits1(gb);
++    pps->num_extra_slice_header_bits           = get_bits(gb, 3);
++
++    pps->sign_data_hiding_flag = get_bits1(gb);
++
++    pps->cabac_init_present_flag = get_bits1(gb);
++
++    pps->num_ref_idx_l0_default_active = get_ue_golomb_long(gb) + 1;
++    if (pps->num_ref_idx_l0_default_active < 1 || pps->num_ref_idx_l0_default_active > 15) {
++        av_log(avctx, AV_LOG_ERROR, "pps->num_ref_idx_l0_default_active invalid\n");
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++    pps->num_ref_idx_l1_default_active = get_ue_golomb_long(gb) + 1;
++    if (pps->num_ref_idx_l1_default_active < 1 || pps->num_ref_idx_l1_default_active > 15) {
++        av_log(avctx, AV_LOG_ERROR, "pps->num_ref_idx_l1_default_active invalid\n");
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++
++    pps->pic_init_qp_minus26 = get_se_golomb(gb);
++    if (pps->pic_init_qp_minus26 > 25 || pps->pic_init_qp_minus26 < -(26 + sps->qp_bd_offset)) {
++        av_log(avctx, AV_LOG_ERROR,
++               "init_qp_minus26 %d is outside the valid range "
++               "[%d, %d].\n",
++               pps->pic_init_qp_minus26,
++               -(26 + sps->qp_bd_offset), 25);
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++
++    pps->constrained_intra_pred_flag = get_bits1(gb);
++    pps->transform_skip_enabled_flag = get_bits1(gb);
++
++    pps->cu_qp_delta_enabled_flag = get_bits1(gb);
++    pps->log2_min_cu_qp_delta_size = sps->log2_ctb_size;
++    if (pps->cu_qp_delta_enabled_flag)
++    {
++        const unsigned int diff_cu_qp_delta_depth = get_ue_golomb_long(gb);
++
++        if (diff_cu_qp_delta_depth > sps->log2_diff_max_min_coding_block_size) {
++            av_log(avctx, AV_LOG_ERROR, "diff_cu_qp_delta_depth %d is invalid\n",
++                   diff_cu_qp_delta_depth);
++            ret = AVERROR_INVALIDDATA;
++            goto err;
++        }
++
++        pps->log2_min_cu_qp_delta_size = sps->log2_ctb_size - diff_cu_qp_delta_depth;
++    }
++
++    pps->cb_qp_offset = get_se_golomb(gb);
++    if (pps->cb_qp_offset < -12 || pps->cb_qp_offset > 12) {
++        av_log(avctx, AV_LOG_ERROR, "pps_cb_qp_offset out of range: %d\n",
++               pps->cb_qp_offset);
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++    pps->cr_qp_offset = get_se_golomb(gb);
++    if (pps->cr_qp_offset < -12 || pps->cr_qp_offset > 12) {
++        av_log(avctx, AV_LOG_ERROR, "pps_cr_qp_offset out of range: %d\n",
++               pps->cr_qp_offset);
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++    pps->pic_slice_level_chroma_qp_offsets_present_flag = get_bits1(gb);
++
++    pps->weighted_pred_flag   = get_bits1(gb);
++    pps->weighted_bipred_flag = get_bits1(gb);
++
++    pps->transquant_bypass_enable_flag    = get_bits1(gb);
++    pps->tiles_enabled_flag               = get_bits1(gb);
++    pps->entropy_coding_sync_enabled_flag = get_bits1(gb);
++
++    if (pps->tiles_enabled_flag) {
++        pps->num_tile_columns = get_ue_golomb_long(gb) + 1;
++        pps->num_tile_rows    = get_ue_golomb_long(gb) + 1;
++        if (pps->num_tile_columns <= 0 ||
++            pps->num_tile_columns >= sps->width) {
++            av_log(avctx, AV_LOG_ERROR, "num_tile_columns_minus1 out of range: %d\n",
++                   pps->num_tile_columns - 1);
++            ret = AVERROR_INVALIDDATA;
++            goto err;
++        }
++        if (pps->num_tile_rows <= 0 ||
++            pps->num_tile_rows >= sps->height) {
++            av_log(avctx, AV_LOG_ERROR, "num_tile_rows_minus1 out of range: %d\n",
++                   pps->num_tile_rows - 1);
++            ret = AVERROR_INVALIDDATA;
++            goto err;
++        }
++
++        pps->column_width = av_malloc_array(pps->num_tile_columns, sizeof(*pps->column_width));
++        pps->row_height   = av_malloc_array(pps->num_tile_rows,    sizeof(*pps->row_height));
++        if (!pps->column_width || !pps->row_height) {
++            ret = AVERROR(ENOMEM);
++            goto err;
++        }
++
++        pps->uniform_spacing_flag = get_bits1(gb);
++        if (!pps->uniform_spacing_flag) {
++            uint64_t sum = 0;
++            for (i = 0; i < pps->num_tile_columns - 1; i++) {
++                pps->column_width[i] = get_ue_golomb_long(gb) + 1;
++                sum                 += pps->column_width[i];
++            }
++            if (sum >= sps->ctb_width) {
++                av_log(avctx, AV_LOG_ERROR, "Invalid tile widths.\n");
++                ret = AVERROR_INVALIDDATA;
++                goto err;
++            }
++            pps->column_width[pps->num_tile_columns - 1] = sps->ctb_width - sum;
++
++            sum = 0;
++            for (i = 0; i < pps->num_tile_rows - 1; i++) {
++                pps->row_height[i] = get_ue_golomb_long(gb) + 1;
++                sum               += pps->row_height[i];
++            }
++            if (sum >= sps->ctb_height) {
++                av_log(avctx, AV_LOG_ERROR, "Invalid tile heights.\n");
++                ret = AVERROR_INVALIDDATA;
++                goto err;
++            }
++            pps->row_height[pps->num_tile_rows - 1] = sps->ctb_height - sum;
++        }
++        pps->loop_filter_across_tiles_enabled_flag = get_bits1(gb);
++    }
++
++    pps->seq_loop_filter_across_slices_enabled_flag = get_bits1(gb);
++
++    pps->deblocking_filter_control_present_flag = get_bits1(gb);
++    if (pps->deblocking_filter_control_present_flag) {
++        pps->deblocking_filter_override_enabled_flag = get_bits1(gb);
++        pps->disable_dbf                             = get_bits1(gb);
++        if (!pps->disable_dbf) {
++            int beta_offset_div2 = get_se_golomb(gb);
++            int tc_offset_div2   = get_se_golomb(gb) ;
++            if (beta_offset_div2 < -6 || beta_offset_div2 > 6) {
++                av_log(avctx, AV_LOG_ERROR, "pps_beta_offset_div2 out of range: %d\n",
++                       beta_offset_div2);
++                ret = AVERROR_INVALIDDATA;
++                goto err;
++            }
++            if (tc_offset_div2 < -6 || tc_offset_div2 > 6) {
++                av_log(avctx, AV_LOG_ERROR, "pps_tc_offset_div2 out of range: %d\n",
++                       tc_offset_div2);
++                ret = AVERROR_INVALIDDATA;
++                goto err;
++            }
++            pps->beta_offset = 2 * beta_offset_div2;
++            pps->tc_offset   = 2 *   tc_offset_div2;
++        }
++    }
++
++    pps->scaling_list_data_present_flag = get_bits1(gb);
++    if (pps->scaling_list_data_present_flag) {
++        set_default_scaling_list_data(&pps->scaling_list);
++        ret = scaling_list_data(gb, avctx, &pps->scaling_list, sps);
++        if (ret < 0)
++            goto err;
++    }
++    pps->lists_modification_present_flag = get_bits1(gb);
++    log2_parallel_merge_level_minus2     = get_ue_golomb_long(gb);
++    if (log2_parallel_merge_level_minus2 > sps->log2_ctb_size) {
++        av_log(avctx, AV_LOG_ERROR, "log2_parallel_merge_level_minus2 out of range: %d\n",
++               log2_parallel_merge_level_minus2);
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++    pps->log2_parallel_merge_level       = log2_parallel_merge_level_minus2 + 2;
++
++    pps->slice_header_extension_present_flag = get_bits1(gb);
++
++    if (get_bits1(gb)) { // pps_extension_present_flag
++        int pps_range_extensions_flag = get_bits1(gb);
++        skip_bits(gb, 7); // pps_extension_7bits
++        if (sps->ptl.general_ptl.profile_idc == FF_PROFILE_HEVC_REXT && pps_range_extensions_flag) {
++            if ((ret = pps_range_extensions(gb, avctx, pps, sps)) < 0)
++                goto err;
++        }
++    }
++
++    ret = setup_pps(avctx, pps, sps);
++    if (ret < 0)
++        goto err;
++
++    if (get_bits_left(gb) < 0) {
++        av_log(avctx, AV_LOG_ERROR,
++               "Overread PPS by %d bits\n", -get_bits_left(gb));
++        ret = AVERROR_INVALIDDATA;
++        goto err;
++    }
++
++    remove_pps(ps, pps_id);
++    ps->pps_list[pps_id] = pps_buf;
++
++    return 0;
++
++err:
++    av_buffer_unref(&pps_buf);
++    return ret;
++}
++
++int ff_hevc_rpi_compute_poc(const HEVCRpiSPS *sps, int pocTid0, int poc_lsb, int nal_unit_type)
++{
++    int max_poc_lsb  = 1 << sps->log2_max_poc_lsb;
++    int prev_poc_lsb = pocTid0 % max_poc_lsb;
++    int prev_poc_msb = pocTid0 - prev_poc_lsb;
++    int poc_msb;
++
++    if (poc_lsb < prev_poc_lsb && prev_poc_lsb - poc_lsb >= max_poc_lsb / 2)
++        poc_msb = prev_poc_msb + max_poc_lsb;
++    else if (poc_lsb > prev_poc_lsb && poc_lsb - prev_poc_lsb > max_poc_lsb / 2)
++        poc_msb = prev_poc_msb - max_poc_lsb;
++    else
++        poc_msb = prev_poc_msb;
++
++    // For BLA picture types, POCmsb is set to 0.
++    if (nal_unit_type == HEVC_NAL_BLA_W_LP   ||
++        nal_unit_type == HEVC_NAL_BLA_W_RADL ||
++        nal_unit_type == HEVC_NAL_BLA_N_LP)
++        poc_msb = 0;
++
++    return poc_msb + poc_lsb;
++}
+--- /dev/null
++++ b/libavcodec/rpi_hevc_ps.h
+@@ -0,0 +1,449 @@
++/*
++ * HEVC parameter set parsing
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_RPI_HEVC_PS_H
++#define AVCODEC_RPI_HEVC_PS_H
++
++#include <stdint.h>
++
++#include "libavutil/buffer.h"
++#include "libavutil/pixfmt.h"
++#include "libavutil/rational.h"
++
++#include "avcodec.h"
++#include "get_bits.h"
++#include "hevc.h"
++
++typedef struct ShortTermRPS {
++    unsigned int num_negative_pics;
++    int num_delta_pocs;
++    int rps_idx_num_delta_pocs;
++    int32_t delta_poc[32];
++    uint8_t used[32];
++} ShortTermRPS;
++
++typedef struct LongTermRPS {
++    int     poc[32];
++    uint8_t used[32];
++    uint8_t nb_refs;
++} LongTermRPS;
++
++typedef struct RpiSliceHeader {
++    unsigned int pps_id;
++
++    ///< address (in raster order) of the first block in the current slice segment
++    unsigned int   slice_segment_addr;
++    ///< address (in raster order) of the first block in the current slice
++    unsigned int   slice_addr;
++
++    enum HEVCSliceType slice_type;
++
++    int pic_order_cnt_lsb;
++
++    uint8_t first_slice_in_pic_flag;
++    uint8_t dependent_slice_segment_flag;
++    uint8_t pic_output_flag;
++    uint8_t colour_plane_id;
++
++    ///< RPS coded in the slice header itself is stored here
++    int short_term_ref_pic_set_sps_flag;
++    int short_term_ref_pic_set_size;
++    ShortTermRPS slice_rps;
++    const ShortTermRPS *short_term_rps;
++    int long_term_ref_pic_set_size;
++    LongTermRPS long_term_rps;
++    unsigned int list_entry_lx[2][32];
++
++    uint8_t rpl_modification_flag[2];
++    uint8_t no_output_of_prior_pics_flag;
++    uint8_t slice_temporal_mvp_enabled_flag;
++
++    unsigned int nb_refs[2];
++
++    uint8_t slice_sample_adaptive_offset_flag[3];
++    uint8_t mvd_l1_zero_flag;
++
++    uint8_t cabac_init_flag;
++    uint8_t disable_deblocking_filter_flag; ///< slice_header_disable_deblocking_filter_flag
++    uint8_t slice_loop_filter_across_slices_enabled_flag;
++    uint8_t collocated_list;
++
++    uint8_t no_dblk_boundary_flags;
++
++    unsigned int collocated_ref_idx;
++
++    int slice_qp_delta;
++    int slice_cb_qp_offset;  // -12, +12
++    int slice_cr_qp_offset;  // -12, +12
++
++    uint8_t cu_chroma_qp_offset_enabled_flag;
++
++    int beta_offset;    ///< beta_offset_div2 * 2
++    int tc_offset;      ///< tc_offset_div2 * 2
++
++    unsigned int max_num_merge_cand; ///< 5 - 5_minus_max_num_merge_cand
++
++    unsigned *entry_point_offset;
++    int * offset;
++    int * size;
++    int num_entry_point_offsets;
++    int offsets_allocated;
++
++    uint8_t offload_wpp;
++    uint8_t offload_tiles;
++
++    int8_t slice_qp;
++
++    uint8_t luma_log2_weight_denom;
++    uint8_t chroma_log2_weight_denom;
++
++    int16_t luma_weight_l0[16];     // -128, +255
++    int16_t luma_offset_l0[16];
++    int16_t chroma_weight_l0[16][2];
++    int16_t chroma_offset_l0[16][2];
++
++    int16_t luma_weight_l1[16];
++    int16_t luma_offset_l1[16];
++    int16_t chroma_weight_l1[16][2];
++    int16_t chroma_offset_l1[16][2];
++
++} RpiSliceHeader;
++
++typedef struct HEVCRpiWindow {
++    uint16_t left_offset;
++    uint16_t right_offset;
++    uint16_t top_offset;
++    uint16_t bottom_offset;
++} HEVCRpiWindow;
++
++typedef struct VUI {
++    AVRational sar;
++
++    int overscan_info_present_flag;
++    int overscan_appropriate_flag;
++
++    int video_signal_type_present_flag;
++    int video_format;
++    int video_full_range_flag;
++    int colour_description_present_flag;
++    uint8_t colour_primaries;
++    uint8_t transfer_characteristic;
++    uint8_t matrix_coeffs;
++
++    int chroma_loc_info_present_flag;
++    int chroma_sample_loc_type_top_field;
++    int chroma_sample_loc_type_bottom_field;
++    int neutra_chroma_indication_flag;
++
++    int field_seq_flag;
++    int frame_field_info_present_flag;
++
++    int default_display_window_flag;
++    HEVCRpiWindow def_disp_win;
++
++    int vui_timing_info_present_flag;
++    uint32_t vui_num_units_in_tick;
++    uint32_t vui_time_scale;
++    int vui_poc_proportional_to_timing_flag;
++    int vui_num_ticks_poc_diff_one_minus1;
++    int vui_hrd_parameters_present_flag;
++
++    int bitstream_restriction_flag;
++    int tiles_fixed_structure_flag;
++    int motion_vectors_over_pic_boundaries_flag;
++    int restricted_ref_pic_lists_flag;
++    int min_spatial_segmentation_idc;
++    int max_bytes_per_pic_denom;
++    int max_bits_per_min_cu_denom;
++    int log2_max_mv_length_horizontal;
++    int log2_max_mv_length_vertical;
++} VUI;
++
++typedef struct PTLCommon {
++    uint8_t profile_space;
++    uint8_t tier_flag;
++    uint8_t profile_idc;
++    uint8_t profile_compatibility_flag[32];
++    uint8_t level_idc;
++    uint8_t progressive_source_flag;
++    uint8_t interlaced_source_flag;
++    uint8_t non_packed_constraint_flag;
++    uint8_t frame_only_constraint_flag;
++} PTLCommon;
++
++typedef struct PTL {
++    PTLCommon general_ptl;
++    PTLCommon sub_layer_ptl[HEVC_MAX_SUB_LAYERS];
++
++    uint8_t sub_layer_profile_present_flag[HEVC_MAX_SUB_LAYERS];
++    uint8_t sub_layer_level_present_flag[HEVC_MAX_SUB_LAYERS];
++} PTL;
++
++typedef struct HEVCRpiVPS {
++    uint8_t vps_temporal_id_nesting_flag;
++    int vps_max_layers;
++    int vps_max_sub_layers; ///< vps_max_temporal_layers_minus1 + 1
++
++    PTL ptl;
++    int vps_sub_layer_ordering_info_present_flag;
++    unsigned int vps_max_dec_pic_buffering[HEVC_MAX_SUB_LAYERS];
++    unsigned int vps_num_reorder_pics[HEVC_MAX_SUB_LAYERS];
++    unsigned int vps_max_latency_increase[HEVC_MAX_SUB_LAYERS];
++    int vps_max_layer_id;
++    int vps_num_layer_sets; ///< vps_num_layer_sets_minus1 + 1
++    uint8_t vps_timing_info_present_flag;
++    uint32_t vps_num_units_in_tick;
++    uint32_t vps_time_scale;
++    uint8_t vps_poc_proportional_to_timing_flag;
++    int vps_num_ticks_poc_diff_one; ///< vps_num_ticks_poc_diff_one_minus1 + 1
++    int vps_num_hrd_parameters;
++
++    uint8_t data[4096];
++    int data_size;
++} HEVCRpiVPS;
++
++typedef struct ScalingList {
++    /* This is a little wasteful, since sizeID 0 only needs 8 coeffs,
++     * and size ID 3 only has 2 arrays, not 6. */
++    uint8_t sl[4][6][64];
++    uint8_t sl_dc[2][6];
++} ScalingList;
++
++typedef struct HEVCRpiSPS {
++    unsigned vps_id;
++    uint8_t chroma_format_idc;
++    uint8_t separate_colour_plane_flag;
++
++    HEVCRpiWindow output_window;
++
++    HEVCRpiWindow pic_conf_win;
++
++    uint16_t wp_offset_half_range;  // WpOffsetHalfRange
++
++    uint8_t bit_depth;
++
++//    int bit_depth_chroma;  // We only support lum_bit_depth = chroma_bit_depth
++    uint8_t pixel_shift;
++    enum AVPixelFormat pix_fmt;
++
++    unsigned int log2_max_poc_lsb;
++
++    int max_sub_layers;
++    struct {
++        int max_dec_pic_buffering;
++        int num_reorder_pics;
++        int max_latency_increase;
++    } temporal_layer[HEVC_MAX_SUB_LAYERS];
++    uint8_t temporal_id_nesting_flag;
++
++    uint8_t scaling_list_enable_flag;
++    ScalingList scaling_list;
++
++    unsigned int nb_st_rps;
++    ShortTermRPS st_rps[HEVC_MAX_SHORT_TERM_REF_PIC_SETS];
++
++    uint8_t amp_enabled_flag;
++    uint8_t sao_enabled;
++
++    uint8_t long_term_ref_pics_present_flag;
++    uint16_t lt_ref_pic_poc_lsb_sps[HEVC_MAX_LONG_TERM_REF_PICS];
++    uint8_t used_by_curr_pic_lt_sps_flag[HEVC_MAX_LONG_TERM_REF_PICS];
++    uint8_t num_long_term_ref_pics_sps;
++
++    struct {
++        uint8_t bit_depth;
++        uint8_t bit_depth_chroma;
++        uint8_t log2_min_pcm_cb_size;
++        uint8_t log2_max_pcm_cb_size;
++        uint8_t loop_filter_disable_flag;
++    } pcm;
++    char sps_temporal_mvp_enabled_flag;
++//    char sps_strong_intra_smoothing_enable_flag;  -> intra_filtes_disable
++
++    uint8_t log2_min_cb_size;  // 3..6
++    uint8_t log2_diff_max_min_coding_block_size;
++    uint8_t log2_min_tb_size;  // 2..5
++    uint8_t log2_max_trafo_size;
++    uint8_t log2_ctb_size;     // 4..6
++//    unsigned int log2_min_pu_size;  // 2..5 (min_cb_size - 1)
++#define LOG2_MIN_PU_SIZE 2
++#define LOG2_MIN_CU_SIZE 3
++
++    uint8_t max_transform_hierarchy_depth_inter;
++    uint8_t max_transform_hierarchy_depth_intra;
++
++    char transform_skip_rotation_enabled_flag;
++    char transform_skip_context_enabled_flag;
++    char implicit_rdpcm_enabled_flag;
++    char explicit_rdpcm_enabled_flag;
++//    char intra_smoothing_disabled_flag;  -> intra_filtes_disable
++    char high_precision_offsets_enabled_flag;
++    char persistent_rice_adaptation_enabled_flag;
++
++    uint8_t intra_filters_disable;
++
++    ///< coded frame dimension in various units
++    int width;
++    int height;
++    int ctb_width;
++    int ctb_height;
++    int ctb_size;   // Pic size in CTBs not size of a CTB
++    int min_cb_width;
++    int min_cb_height;
++    int min_tb_width;
++    int min_tb_height;
++    int min_pu_width;
++    int min_pu_height;
++    int pcm_width;
++    int pcm_height;
++    int tb_mask;
++
++    int hshift[3];
++    int vshift[3];
++
++    int qp_bd_offset;
++
++    uint8_t data[4096];
++    int data_size;
++
++    VUI vui;
++    PTL ptl;
++} HEVCRpiSPS;
++
++#define CTB_TS_FLAGS_SOTL       (1U << 0)       // X start of tile line
++#define CTB_TS_FLAGS_EOTL       (1U << 1)       // Last CTB of a tile line
++#define CTB_TS_FLAGS_EOL        (1U << 2)       // Last CTB of a complete line
++#define CTB_TS_FLAGS_EOT        (1U << 3)       // Last CTB of a tile
++#define CTB_TS_FLAGS_CSAVE      (1U << 4)
++#define CTB_TS_FLAGS_CIREQ      (1U << 5)       // Cabac init request
++#define CTB_TS_FLAGS_TOT        (1U << 6)       // CTB on top row of a tile
++#define CTB_TS_FLAGS_CLOAD      (1U << 7)
++
++typedef struct HEVCRpiPPS {
++    unsigned int sps_id; ///< seq_parameter_set_id
++
++    uint8_t sign_data_hiding_flag;
++
++    uint8_t cabac_init_present_flag;
++
++    int num_ref_idx_l0_default_active; ///< num_ref_idx_l0_default_active_minus1 + 1
++    int num_ref_idx_l1_default_active; ///< num_ref_idx_l1_default_active_minus1 + 1
++    int pic_init_qp_minus26;
++
++    uint8_t constrained_intra_pred_flag;
++    uint8_t transform_skip_enabled_flag;
++
++    uint8_t cu_qp_delta_enabled_flag;
++    uint8_t log2_min_cu_qp_delta_size;
++    int cb_qp_offset;   // -12..12
++    int cr_qp_offset;   // -12..12
++    const uint8_t * qp_dblk_x[3];
++    const int8_t * qp_bd_x[3];
++
++    uint8_t pic_slice_level_chroma_qp_offsets_present_flag;
++    uint8_t weighted_pred_flag;
++    uint8_t weighted_bipred_flag;
++    uint8_t output_flag_present_flag;
++    uint8_t transquant_bypass_enable_flag;
++
++    uint8_t dependent_slice_segments_enabled_flag;
++    uint8_t tiles_enabled_flag;
++    uint8_t entropy_coding_sync_enabled_flag;
++
++    uint8_t tile_wpp_inter_disable;
++    int num_tile_columns;   ///< num_tile_columns_minus1 + 1
++    int num_tile_rows;      ///< num_tile_rows_minus1 + 1
++    uint8_t uniform_spacing_flag;
++    uint8_t loop_filter_across_tiles_enabled_flag;
++
++    uint8_t seq_loop_filter_across_slices_enabled_flag;
++
++    uint8_t deblocking_filter_control_present_flag;
++    uint8_t deblocking_filter_override_enabled_flag;
++    uint8_t disable_dbf;
++    int beta_offset;    ///< beta_offset_div2 * 2
++    int tc_offset;      ///< tc_offset_div2 * 2
++
++    uint8_t scaling_list_data_present_flag;
++    ScalingList scaling_list;
++
++    uint8_t lists_modification_present_flag;
++    int log2_parallel_merge_level; ///< log2_parallel_merge_level_minus2 + 2
++    int num_extra_slice_header_bits;
++    uint8_t slice_header_extension_present_flag;
++    uint8_t log2_max_transform_skip_block_size;
++    uint8_t cross_component_prediction_enabled_flag;
++    uint8_t chroma_qp_offset_list_enabled_flag;
++    uint8_t diff_cu_chroma_qp_offset_depth;
++    uint8_t chroma_qp_offset_list_len_minus1;
++    int8_t  cb_qp_offset_list[6];
++    int8_t  cr_qp_offset_list[6];
++    uint8_t log2_sao_offset_scale_luma;
++    uint8_t log2_sao_offset_scale_chroma;
++
++    // Inferred parameters
++    uint16_t *column_width;  ///< ColumnWidth
++    uint16_t *row_height;    ///< RowHeight
++    uint16_t *col_bd;        ///< ColBd
++    uint16_t *row_bd;        ///< RowBd
++    uint16_t *col_idxX;
++
++    // We can limit these to uint16_t given our other size limits
++    uint16_t *ctb_addr_rs_to_ts; ///< CtbAddrRSToTS
++    uint16_t *ctb_addr_ts_to_rs; ///< CtbAddrTSToRS
++    uint16_t *tile_id;           ///< TileId
++    uint16_t *tile_pos_ts;       ///< TilePosRS
++    uint16_t *tile_size;         ///< TileSize
++    uint8_t * ctb_ts_flags;
++
++    uint8_t data[4096];
++    int data_size;
++} HEVCRpiPPS;
++
++typedef struct HEVCRpiParamSets {
++    /* currently active parameter sets */
++    const HEVCRpiVPS *vps;
++    const HEVCRpiSPS *sps;
++    const HEVCRpiPPS *pps;
++
++    AVBufferRef *vps_list[HEVC_MAX_VPS_COUNT];
++    AVBufferRef *sps_list[HEVC_MAX_SPS_COUNT];
++    AVBufferRef *pps_list[HEVC_MAX_PPS_COUNT];
++} HEVCRpiParamSets;
++
++int ff_hevc_rpi_decode_nal_vps(GetBitContext *gb, AVCodecContext *avctx,
++                           HEVCRpiParamSets *ps);
++int ff_hevc_rpi_decode_nal_sps(GetBitContext *gb, AVCodecContext *avctx,
++                           HEVCRpiParamSets *ps, int apply_defdispwin);
++int ff_hevc_rpi_decode_nal_pps(GetBitContext *gb, AVCodecContext *avctx,
++                           HEVCRpiParamSets *ps);
++
++int ff_hevc_rpi_decode_short_term_rps(GetBitContext *gb, AVCodecContext *avctx,
++                                  ShortTermRPS *rps, const HEVCRpiSPS *sps, int is_slice_header);
++
++int ff_hevc_rpi_encode_nal_vps(HEVCRpiVPS *vps, unsigned int id,
++                           uint8_t *buf, int buf_size);
++
++/**
++ * Compute POC of the current frame and return it.
++ */
++int ff_hevc_rpi_compute_poc(const HEVCRpiSPS *sps, int pocTid0, int poc_lsb, int nal_unit_type);
++
++#endif /* AVCODEC_RPI_HEVC_PS_H */
+--- /dev/null
++++ b/libavcodec/rpi_hevc_refs.c
+@@ -0,0 +1,485 @@
++/*
++ * HEVC video decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2012 - 2013 Gildas Cocherel
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/avassert.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/rpi_sand_fns.h"
++#include "internal.h"
++#include "thread.h"
++#include "hevc.h"
++#include "rpi_hevcdec.h"
++
++void ff_hevc_rpi_unref_frame(HEVCRpiContext *s, HEVCRpiFrame *frame, int flags)
++{
++    /* frame->frame can be NULL if context init failed */
++    if (!frame->frame || !frame->frame->buf[0])
++        return;
++
++    frame->flags &= ~flags;
++    if (!frame->flags) {
++        ff_thread_release_buffer(s->avctx, &frame->tf);
++
++        av_buffer_unref(&frame->col_mvf_buf);  // OK if already NULL
++        frame->col_mvf = NULL;
++
++        frame->collocated_ref = NULL;
++    }
++}
++
++void ff_hevc_rpi_clear_refs(HEVCRpiContext *s)
++{
++    int i;
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++)
++        ff_hevc_rpi_unref_frame(s, &s->DPB[i],
++                            HEVC_FRAME_FLAG_SHORT_REF |
++                            HEVC_FRAME_FLAG_LONG_REF);
++}
++
++void ff_hevc_rpi_flush_dpb(HEVCRpiContext *s)
++{
++    int i;
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++)
++        ff_hevc_rpi_unref_frame(s, &s->DPB[i], ~0);
++}
++
++static HEVCRpiFrame *alloc_frame(HEVCRpiContext * const s)
++{
++    int i, ret;
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        HEVCRpiFrame * const frame = &s->DPB[i];
++        if (frame->frame->buf[0])
++            continue;
++
++        ret = ff_thread_get_buffer(s->avctx, &frame->tf,
++                                   AV_GET_BUFFER_FLAG_REF);
++        if (ret < 0)
++            return NULL;
++
++        frame->col_mvf = NULL;
++        frame->col_mvf_buf = NULL;
++        if (s->used_for_ref && !s->is_irap)
++        {
++            frame->col_mvf_buf = av_buffer_pool_get(s->col_mvf_pool);
++            if (!frame->col_mvf_buf)
++                goto fail;
++            frame->col_mvf = (ColMvField *)frame->col_mvf_buf->data;
++        }
++
++        frame->frame->top_field_first  = s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD;
++        frame->frame->interlaced_frame = (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_TOP_FIELD) || (s->sei.picture_timing.picture_struct == AV_PICTURE_STRUCTURE_BOTTOM_FIELD);
++
++        return frame;
++
++fail:
++        ff_hevc_rpi_unref_frame(s, frame, ~0);
++        return NULL;
++    }
++    av_log(s->avctx, AV_LOG_ERROR, "Error allocating frame, DPB full.\n");
++    return NULL;
++}
++
++int ff_hevc_rpi_set_new_ref(HEVCRpiContext *s, AVFrame **frame, int poc)
++{
++    HEVCRpiFrame *ref;
++    int i;
++
++    /* check that this POC doesn't already exist */
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        HEVCRpiFrame *frame = &s->DPB[i];
++
++        if (frame->frame->buf[0] && frame->sequence == s->seq_decode &&
++            frame->poc == poc) {
++            av_log(s->avctx, AV_LOG_ERROR, "Duplicate POC in a sequence: %d.\n",
++                   poc);
++            return AVERROR_INVALIDDATA;
++        }
++    }
++
++    ref = alloc_frame(s);
++    if (!ref)
++        return AVERROR(ENOMEM);
++
++    *frame = ref->frame;
++    s->ref = ref;
++
++    if (s->sh.pic_output_flag)
++        ref->flags = HEVC_FRAME_FLAG_OUTPUT | HEVC_FRAME_FLAG_SHORT_REF;
++    else
++        ref->flags = HEVC_FRAME_FLAG_SHORT_REF;
++
++    ref->poc      = poc;
++    ref->sequence = s->seq_decode;
++    ref->frame->crop_left   = s->ps.sps->output_window.left_offset;
++    ref->frame->crop_right  = s->ps.sps->output_window.right_offset;
++    ref->frame->crop_top    = s->ps.sps->output_window.top_offset;
++    ref->frame->crop_bottom = s->ps.sps->output_window.bottom_offset;
++
++    return 0;
++}
++
++int ff_hevc_rpi_output_frame(HEVCRpiContext *s, AVFrame *out, int flush)
++{
++    do {
++        int nb_output = 0;
++        int min_poc   = INT_MAX;
++        int i, min_idx, ret;
++
++        if (s->sh.no_output_of_prior_pics_flag == 1 && s->no_rasl_output_flag == 1) {
++            for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++                HEVCRpiFrame *frame = &s->DPB[i];
++                if (!(frame->flags & HEVC_FRAME_FLAG_BUMPING) && frame->poc != s->poc &&
++                        frame->sequence == s->seq_output) {
++                    ff_hevc_rpi_unref_frame(s, frame, HEVC_FRAME_FLAG_OUTPUT);
++                }
++            }
++        }
++
++        for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++            HEVCRpiFrame *frame = &s->DPB[i];
++            if ((frame->flags & HEVC_FRAME_FLAG_OUTPUT) &&
++                frame->sequence == s->seq_output) {
++                nb_output++;
++                if (frame->poc < min_poc || nb_output == 1) {
++                    min_poc = frame->poc;
++                    min_idx = i;
++                }
++            }
++        }
++
++        /* wait for more frames before output */
++        if (!flush && s->seq_output == s->seq_decode && s->ps.sps &&
++            nb_output <= s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].num_reorder_pics)
++            return 0;
++
++        if (nb_output) {
++            HEVCRpiFrame *frame = &s->DPB[min_idx];
++            if (frame->frame->format == AV_PIX_FMT_VIDEOTOOLBOX && frame->frame->buf[0]->size == 1)
++                return 0;
++
++            ret = av_frame_ref(out, frame->frame);
++            if (frame->flags & HEVC_FRAME_FLAG_BUMPING)
++                ff_hevc_rpi_unref_frame(s, frame, HEVC_FRAME_FLAG_OUTPUT | HEVC_FRAME_FLAG_BUMPING);
++            else
++                ff_hevc_rpi_unref_frame(s, frame, HEVC_FRAME_FLAG_OUTPUT);
++            if (ret < 0)
++                return ret;
++            av_log(s->avctx, AV_LOG_DEBUG,
++                   "Output frame with POC %d.\n", frame->poc);
++            return 1;
++        }
++
++        if (s->seq_output != s->seq_decode)
++            s->seq_output = (s->seq_output + 1) & 0xff;
++        else
++            break;
++    } while (1);
++
++    return 0;
++}
++
++void ff_hevc_rpi_bump_frame(HEVCRpiContext *s)
++{
++    int dpb = 0;
++    int min_poc = INT_MAX;
++    int i;
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        HEVCRpiFrame *frame = &s->DPB[i];
++        if ((frame->flags) &&
++            frame->sequence == s->seq_output &&
++            frame->poc != s->poc) {
++            dpb++;
++        }
++    }
++
++    if (s->ps.sps && dpb >= s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering) {
++        for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++            HEVCRpiFrame *frame = &s->DPB[i];
++            if ((frame->flags) &&
++                frame->sequence == s->seq_output &&
++                frame->poc != s->poc) {
++                if (frame->flags == HEVC_FRAME_FLAG_OUTPUT && frame->poc < min_poc) {
++                    min_poc = frame->poc;
++                }
++            }
++        }
++
++        for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++            HEVCRpiFrame *frame = &s->DPB[i];
++            if (frame->flags & HEVC_FRAME_FLAG_OUTPUT &&
++                frame->sequence == s->seq_output &&
++                frame->poc <= min_poc) {
++                frame->flags |= HEVC_FRAME_FLAG_BUMPING;
++            }
++        }
++
++        dpb--;
++    }
++}
++
++static int init_slice_rpl(HEVCRpiContext *s)
++{
++    if (s->slice_idx >= s->rpl_tab_size)
++        return AVERROR_INVALIDDATA;
++
++    s->refPicList = s->rpl_tab[s->slice_idx].refPicList + 0;
++    return 0;
++}
++
++int ff_hevc_rpi_slice_rpl(HEVCRpiContext *s)
++{
++    RpiSliceHeader *sh = &s->sh;
++
++    uint8_t nb_list = sh->slice_type == HEVC_SLICE_B ? 2 : 1;
++    uint8_t list_idx;
++    int i, j, ret;
++
++    ret = init_slice_rpl(s);
++    if (ret < 0)
++        return ret;
++
++    if (!(s->rps[ST_CURR_BEF].nb_refs + s->rps[ST_CURR_AFT].nb_refs +
++          s->rps[LT_CURR].nb_refs)) {
++        av_log(s->avctx, AV_LOG_ERROR, "Zero refs in the frame RPS.\n");
++        return AVERROR_INVALIDDATA;
++    }
++
++    for (list_idx = 0; list_idx < nb_list; list_idx++) {
++        RefPicList  rpl_tmp = { { 0 } };
++        RefPicList *rpl     = &s->refPicList[list_idx];
++
++        /* The order of the elements is
++         * ST_CURR_BEF - ST_CURR_AFT - LT_CURR for the L0 and
++         * ST_CURR_AFT - ST_CURR_BEF - LT_CURR for the L1 */
++        int cand_lists[3] = { list_idx ? ST_CURR_AFT : ST_CURR_BEF,
++                              list_idx ? ST_CURR_BEF : ST_CURR_AFT,
++                              LT_CURR };
++
++        /* concatenate the candidate lists for the current frame */
++        while (rpl_tmp.nb_refs < sh->nb_refs[list_idx]) {
++            for (i = 0; i < FF_ARRAY_ELEMS(cand_lists); i++) {
++                RefPicList *rps = &s->rps[cand_lists[i]];
++                for (j = 0; j < rps->nb_refs && rpl_tmp.nb_refs < HEVC_MAX_REFS; j++) {
++                    rpl_tmp.list[rpl_tmp.nb_refs]       = rps->list[j];
++                    rpl_tmp.ref[rpl_tmp.nb_refs]        = rps->ref[j];
++                    rpl_tmp.isLongTerm[rpl_tmp.nb_refs] = i == 2;
++                    rpl_tmp.nb_refs++;
++                }
++            }
++        }
++
++        /* reorder the references if necessary */
++        if (sh->rpl_modification_flag[list_idx]) {
++            for (i = 0; i < sh->nb_refs[list_idx]; i++) {
++                int idx = sh->list_entry_lx[list_idx][i];
++
++                if (idx >= rpl_tmp.nb_refs) {
++                    av_log(s->avctx, AV_LOG_ERROR, "Invalid reference index.\n");
++                    return AVERROR_INVALIDDATA;
++                }
++
++                rpl->list[i]       = rpl_tmp.list[idx];
++                rpl->ref[i]        = rpl_tmp.ref[idx];
++                rpl->isLongTerm[i] = rpl_tmp.isLongTerm[idx];
++                rpl->nb_refs++;
++            }
++        } else {
++            memcpy(rpl, &rpl_tmp, sizeof(*rpl));
++            rpl->nb_refs = FFMIN(rpl->nb_refs, sh->nb_refs[list_idx]);
++        }
++
++        if (sh->collocated_list == list_idx &&
++            sh->collocated_ref_idx < rpl->nb_refs)
++            s->ref->collocated_ref = rpl->ref[sh->collocated_ref_idx];
++    }
++
++    return 0;
++}
++
++static HEVCRpiFrame *find_ref_idx(HEVCRpiContext *s, int poc)
++{
++    int i;
++    int LtMask = (1 << s->ps.sps->log2_max_poc_lsb) - 1;
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        HEVCRpiFrame *ref = &s->DPB[i];
++        if (ref->frame->buf[0] && (ref->sequence == s->seq_decode)) {
++            if ((ref->poc & LtMask) == poc)
++                return ref;
++        }
++    }
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        HEVCRpiFrame *ref = &s->DPB[i];
++        if (ref->frame->buf[0] && ref->sequence == s->seq_decode) {
++            if (ref->poc == poc || (ref->poc & LtMask) == poc)
++                return ref;
++        }
++    }
++
++    if (s->nal_unit_type != HEVC_NAL_CRA_NUT && !IS_BLA(s))
++        av_log(s->avctx, AV_LOG_ERROR,
++               "Could not find ref with POC %d\n", poc);
++    return NULL;
++}
++
++static void mark_ref(HEVCRpiFrame *frame, int flag)
++{
++    frame->flags &= ~(HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF);
++    frame->flags |= flag;
++}
++
++static HEVCRpiFrame *generate_missing_ref(HEVCRpiContext *s, int poc)
++{
++    HEVCRpiFrame *frame;
++    int i, x, y;
++
++    frame = alloc_frame(s);
++    if (!frame)
++        return NULL;
++
++    if (!s->ps.sps->pixel_shift) {
++        for (i = 0; frame->frame->buf[i]; i++)
++            memset(frame->frame->buf[i]->data, 1 << (s->ps.sps->bit_depth - 1),
++                   frame->frame->buf[i]->size);
++    } else {
++        for (i = 0; frame->frame->data[i]; i++)
++            for (y = 0; y < (s->ps.sps->height >> s->ps.sps->vshift[i]); y++)
++                for (x = 0; x < (s->ps.sps->width >> s->ps.sps->hshift[i]); x++) {
++                    AV_WN16(frame->frame->data[i] + y * frame_stride1(frame->frame, 1) + 2 * x,
++                            1 << (s->ps.sps->bit_depth - 1));
++                }
++    }
++
++    frame->poc      = poc;
++    frame->sequence = s->seq_decode;
++    frame->flags    = 0;
++
++    ff_hevc_rpi_progress_set_all_done(frame);
++
++    return frame;
++}
++
++/* add a reference with the given poc to the list and mark it as used in DPB */
++static int add_candidate_ref(HEVCRpiContext *s, RefPicList *list,
++                             int poc, int ref_flag)
++{
++    HEVCRpiFrame *ref = find_ref_idx(s, poc);
++
++    if (ref == s->ref || list->nb_refs >= HEVC_MAX_REFS)
++        return AVERROR_INVALIDDATA;
++
++    if (!ref) {
++        ref = generate_missing_ref(s, poc);
++        if (!ref)
++            return AVERROR(ENOMEM);
++    }
++
++    list->list[list->nb_refs] = ref->poc;
++    list->ref[list->nb_refs]  = ref;
++    list->nb_refs++;
++
++    mark_ref(ref, ref_flag);
++    return 0;
++}
++
++int ff_hevc_rpi_frame_rps(HEVCRpiContext *s)
++{
++    const ShortTermRPS *short_rps = s->sh.short_term_rps;
++    const LongTermRPS  *long_rps  = &s->sh.long_term_rps;
++    RefPicList               *rps = s->rps;
++    int i, ret = 0;
++
++    if (!short_rps) {
++        rps[0].nb_refs = rps[1].nb_refs = 0;
++        return 0;
++    }
++
++    /* clear the reference flags on all frames except the current one */
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        HEVCRpiFrame *frame = &s->DPB[i];
++
++        if (frame == s->ref)
++            continue;
++
++        mark_ref(frame, 0);
++    }
++
++    for (i = 0; i < NB_RPS_TYPE; i++)
++        rps[i].nb_refs = 0;
++
++    /* add the short refs */
++    for (i = 0; i < short_rps->num_delta_pocs; i++) {
++        int poc = s->poc + short_rps->delta_poc[i];
++        int list;
++
++        if (!short_rps->used[i])
++            list = ST_FOLL;
++        else if (i < short_rps->num_negative_pics)
++            list = ST_CURR_BEF;
++        else
++            list = ST_CURR_AFT;
++
++        ret = add_candidate_ref(s, &rps[list], poc, HEVC_FRAME_FLAG_SHORT_REF);
++        if (ret < 0)
++            goto fail;
++    }
++
++    /* add the long refs */
++    for (i = 0; i < long_rps->nb_refs; i++) {
++        int poc  = long_rps->poc[i];
++        int list = long_rps->used[i] ? LT_CURR : LT_FOLL;
++
++        ret = add_candidate_ref(s, &rps[list], poc, HEVC_FRAME_FLAG_LONG_REF);
++        if (ret < 0)
++            goto fail;
++    }
++
++fail:
++    /* release any frames that are now unused */
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++)
++        ff_hevc_rpi_unref_frame(s, &s->DPB[i], 0);
++
++    return ret;
++}
++
++int ff_hevc_rpi_frame_nb_refs(HEVCRpiContext *s)
++{
++    int ret = 0;
++    int i;
++    const ShortTermRPS *rps = s->sh.short_term_rps;
++    LongTermRPS *long_rps   = &s->sh.long_term_rps;
++
++    if (rps) {
++        for (i = 0; i < rps->num_negative_pics; i++)
++            ret += !!rps->used[i];
++        for (; i < rps->num_delta_pocs; i++)
++            ret += !!rps->used[i];
++    }
++
++    if (long_rps) {
++        for (i = 0; i < long_rps->nb_refs; i++)
++            ret += !!long_rps->used[i];
++    }
++    return ret;
++}
+--- /dev/null
++++ b/libavcodec/rpi_hevc_sei.c
+@@ -0,0 +1,368 @@
++/*
++ * HEVC Supplementary Enhancement Information messages
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2012 - 2013 Gildas Cocherel
++ * Copyright (C) 2013 Vittorio Giovara
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "golomb.h"
++#include "rpi_hevc_ps.h"
++#include "rpi_hevc_sei.h"
++
++static int decode_nal_sei_decoded_picture_hash(HEVCSEIPictureHash *s, GetBitContext *gb)
++{
++    int cIdx, i;
++    uint8_t hash_type;
++    //uint16_t picture_crc;
++    //uint32_t picture_checksum;
++    hash_type = get_bits(gb, 8);
++
++    for (cIdx = 0; cIdx < 3/*((s->sps->chroma_format_idc == 0) ? 1 : 3)*/; cIdx++) {
++        if (hash_type == 0) {
++            s->is_md5 = 1;
++            for (i = 0; i < 16; i++)
++                s->md5[cIdx][i] = get_bits(gb, 8);
++        } else if (hash_type == 1) {
++            // picture_crc = get_bits(gb, 16);
++            skip_bits(gb, 16);
++        } else if (hash_type == 2) {
++            // picture_checksum = get_bits_long(gb, 32);
++            skip_bits(gb, 32);
++        }
++    }
++    return 0;
++}
++
++static int decode_nal_sei_mastering_display_info(HEVCSEIMasteringDisplay *s, GetBitContext *gb)
++{
++    int i;
++    // Mastering primaries
++    for (i = 0; i < 3; i++) {
++        s->display_primaries[i][0] = get_bits(gb, 16);
++        s->display_primaries[i][1] = get_bits(gb, 16);
++    }
++    // White point (x, y)
++    s->white_point[0] = get_bits(gb, 16);
++    s->white_point[1] = get_bits(gb, 16);
++
++    // Max and min luminance of mastering display
++    s->max_luminance = get_bits_long(gb, 32);
++    s->min_luminance = get_bits_long(gb, 32);
++
++    // As this SEI message comes before the first frame that references it,
++    // initialize the flag to 2 and decrement on IRAP access unit so it
++    // persists for the coded video sequence (e.g., between two IRAPs)
++    s->present = 2;
++    return 0;
++}
++
++static int decode_nal_sei_content_light_info(HEVCSEIContentLight *s, GetBitContext *gb)
++{
++    // Max and average light levels
++    s->max_content_light_level     = get_bits_long(gb, 16);
++    s->max_pic_average_light_level = get_bits_long(gb, 16);
++    // As this SEI message comes before the first frame that references it,
++    // initialize the flag to 2 and decrement on IRAP access unit so it
++    // persists for the coded video sequence (e.g., between two IRAPs)
++    s->present = 2;
++    return  0;
++}
++
++static int decode_nal_sei_frame_packing_arrangement(HEVCSEIFramePacking *s, GetBitContext *gb)
++{
++    get_ue_golomb_long(gb);             // frame_packing_arrangement_id
++    s->present = !get_bits1(gb);
++
++    if (s->present) {
++        s->arrangement_type               = get_bits(gb, 7);
++        s->quincunx_subsampling           = get_bits1(gb);
++        s->content_interpretation_type    = get_bits(gb, 6);
++
++        // spatial_flipping_flag, frame0_flipped_flag, field_views_flag
++        skip_bits(gb, 3);
++        s->current_frame_is_frame0_flag = get_bits1(gb);
++        // frame0_self_contained_flag, frame1_self_contained_flag
++        skip_bits(gb, 2);
++
++        if (!s->quincunx_subsampling && s->arrangement_type != 5)
++            skip_bits(gb, 16);  // frame[01]_grid_position_[xy]
++        skip_bits(gb, 8);       // frame_packing_arrangement_reserved_byte
++        skip_bits1(gb);         // frame_packing_arrangement_persistence_flag
++    }
++    skip_bits1(gb);             // upsampled_aspect_ratio_flag
++    return 0;
++}
++
++static int decode_nal_sei_display_orientation(HEVCSEIDisplayOrientation *s, GetBitContext *gb)
++{
++    s->present = !get_bits1(gb);
++
++    if (s->present) {
++        s->hflip = get_bits1(gb);     // hor_flip
++        s->vflip = get_bits1(gb);     // ver_flip
++
++        s->anticlockwise_rotation = get_bits(gb, 16);
++        skip_bits1(gb);     // display_orientation_persistence_flag
++    }
++
++    return 0;
++}
++
++static int decode_nal_sei_pic_timing(HEVCSEIContext *s, GetBitContext *gb, const HEVCRpiParamSets *ps,
++                                     void *logctx, int size)
++{
++    HEVCSEIPictureTiming *h = &s->picture_timing;
++    HEVCRpiSPS *sps;
++
++    if (!ps->sps_list[s->active_seq_parameter_set_id])
++        return(AVERROR(ENOMEM));
++    sps = (HEVCRpiSPS*)ps->sps_list[s->active_seq_parameter_set_id]->data;
++
++    if (sps->vui.frame_field_info_present_flag) {
++        int pic_struct = get_bits(gb, 4);
++        h->picture_struct = AV_PICTURE_STRUCTURE_UNKNOWN;
++        if (pic_struct == 2 || pic_struct == 10 || pic_struct == 12) {
++            av_log(logctx, AV_LOG_DEBUG, "BOTTOM Field\n");
++            h->picture_struct = AV_PICTURE_STRUCTURE_BOTTOM_FIELD;
++        } else if (pic_struct == 1 || pic_struct == 9 || pic_struct == 11) {
++            av_log(logctx, AV_LOG_DEBUG, "TOP Field\n");
++            h->picture_struct = AV_PICTURE_STRUCTURE_TOP_FIELD;
++        }
++        get_bits(gb, 2);                   // source_scan_type
++        get_bits(gb, 1);                   // duplicate_flag
++        skip_bits1(gb);
++        size--;
++    }
++    skip_bits_long(gb, 8 * size);
++
++    return 0;
++}
++
++static int decode_registered_user_data_closed_caption(HEVCSEIA53Caption *s, GetBitContext *gb,
++                                                      int size)
++{
++    int flag;
++    int user_data_type_code;
++    int cc_count;
++
++    if (size < 3)
++       return AVERROR(EINVAL);
++
++    user_data_type_code = get_bits(gb, 8);
++    if (user_data_type_code == 0x3) {
++        skip_bits(gb, 1); // reserved
++
++        flag = get_bits(gb, 1); // process_cc_data_flag
++        if (flag) {
++            skip_bits(gb, 1);
++            cc_count = get_bits(gb, 5);
++            skip_bits(gb, 8); // reserved
++            size -= 2;
++
++            if (cc_count && size >= cc_count * 3) {
++                const uint64_t new_size = (s->a53_caption_size + cc_count
++                                           * UINT64_C(3));
++                int i, ret;
++
++                if (new_size > INT_MAX)
++                    return AVERROR(EINVAL);
++
++                /* Allow merging of the cc data from two fields. */
++                ret = av_reallocp(&s->a53_caption, new_size);
++                if (ret < 0)
++                    return ret;
++
++                for (i = 0; i < cc_count; i++) {
++                    s->a53_caption[s->a53_caption_size++] = get_bits(gb, 8);
++                    s->a53_caption[s->a53_caption_size++] = get_bits(gb, 8);
++                    s->a53_caption[s->a53_caption_size++] = get_bits(gb, 8);
++                }
++                skip_bits(gb, 8); // marker_bits
++            }
++        }
++    } else {
++        int i;
++        for (i = 0; i < size - 1; i++)
++            skip_bits(gb, 8);
++    }
++
++    return 0;
++}
++
++static int decode_nal_sei_user_data_registered_itu_t_t35(HEVCSEIContext *s, GetBitContext *gb,
++                                                         int size)
++{
++    uint32_t country_code;
++    uint32_t user_identifier;
++
++    if (size < 7)
++        return AVERROR(EINVAL);
++    size -= 7;
++
++    country_code = get_bits(gb, 8);
++    if (country_code == 0xFF) {
++        skip_bits(gb, 8);
++        size--;
++    }
++
++    skip_bits(gb, 8);
++    skip_bits(gb, 8);
++
++    user_identifier = get_bits_long(gb, 32);
++
++    switch (user_identifier) {
++        case MKBETAG('G', 'A', '9', '4'):
++            return decode_registered_user_data_closed_caption(&s->a53_caption, gb, size);
++        default:
++            skip_bits_long(gb, size * 8);
++            break;
++    }
++    return 0;
++}
++
++static int decode_nal_sei_active_parameter_sets(HEVCSEIContext *s, GetBitContext *gb, void *logctx)
++{
++    int num_sps_ids_minus1;
++    int i;
++    unsigned active_seq_parameter_set_id;
++
++    get_bits(gb, 4); // active_video_parameter_set_id
++    get_bits(gb, 1); // self_contained_cvs_flag
++    get_bits(gb, 1); // num_sps_ids_minus1
++    num_sps_ids_minus1 = get_ue_golomb_long(gb); // num_sps_ids_minus1
++
++    if (num_sps_ids_minus1 < 0 || num_sps_ids_minus1 > 15) {
++        av_log(logctx, AV_LOG_ERROR, "num_sps_ids_minus1 %d invalid\n", num_sps_ids_minus1);
++        return AVERROR_INVALIDDATA;
++    }
++
++    active_seq_parameter_set_id = get_ue_golomb_long(gb);
++    if (active_seq_parameter_set_id >= HEVC_MAX_SPS_COUNT) {
++        av_log(logctx, AV_LOG_ERROR, "active_parameter_set_id %d invalid\n", active_seq_parameter_set_id);
++        return AVERROR_INVALIDDATA;
++    }
++    s->active_seq_parameter_set_id = active_seq_parameter_set_id;
++
++    for (i = 1; i <= num_sps_ids_minus1; i++)
++        get_ue_golomb_long(gb); // active_seq_parameter_set_id[i]
++
++    return 0;
++}
++
++static int decode_nal_sei_alternative_transfer(HEVCSEIAlternativeTransfer *s, GetBitContext *gb)
++{
++    s->present = 1;
++    s->preferred_transfer_characteristics = get_bits(gb, 8);
++    return 0;
++}
++
++static int decode_nal_sei_prefix(GetBitContext *gb, void *logctx, HEVCSEIContext *s, const HEVCRpiParamSets *ps,
++                                 int type, int size)
++{
++    switch (type) {
++    case 256:  // Mismatched value from HM 8.1
++        return decode_nal_sei_decoded_picture_hash(&s->picture_hash, gb);
++    case HEVC_SEI_TYPE_FRAME_PACKING:
++        return decode_nal_sei_frame_packing_arrangement(&s->frame_packing, gb);
++    case HEVC_SEI_TYPE_DISPLAY_ORIENTATION:
++        return decode_nal_sei_display_orientation(&s->display_orientation, gb);
++    case HEVC_SEI_TYPE_PICTURE_TIMING:
++        return decode_nal_sei_pic_timing(s, gb, ps, logctx, size);
++    case HEVC_SEI_TYPE_MASTERING_DISPLAY_INFO:
++        return decode_nal_sei_mastering_display_info(&s->mastering_display, gb);
++    case HEVC_SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO:
++        return decode_nal_sei_content_light_info(&s->content_light, gb);
++    case HEVC_SEI_TYPE_ACTIVE_PARAMETER_SETS:
++        return decode_nal_sei_active_parameter_sets(s, gb, logctx);
++    case HEVC_SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35:
++        return decode_nal_sei_user_data_registered_itu_t_t35(s, gb, size);
++    case HEVC_SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS:
++        return decode_nal_sei_alternative_transfer(&s->alternative_transfer, gb);
++    default:
++        av_log(logctx, AV_LOG_DEBUG, "Skipped PREFIX SEI %d\n", type);
++        skip_bits_long(gb, 8 * size);
++        return 0;
++    }
++}
++
++static int decode_nal_sei_suffix(GetBitContext *gb, void *logctx, HEVCSEIContext *s,
++                                 int type, int size)
++{
++    switch (type) {
++    case HEVC_SEI_TYPE_DECODED_PICTURE_HASH:
++        return decode_nal_sei_decoded_picture_hash(&s->picture_hash, gb);
++    default:
++        av_log(logctx, AV_LOG_DEBUG, "Skipped SUFFIX SEI %d\n", type);
++        skip_bits_long(gb, 8 * size);
++        return 0;
++    }
++}
++
++static int decode_nal_sei_message(GetBitContext * const gb, void * const logctx, HEVCSEIContext * const s,
++                                  const HEVCRpiParamSets * const ps, const int nal_unit_type)
++{
++    int payload_type = 0;
++    int payload_size = 0;
++    int byte = 0xFF;
++    av_log(logctx, AV_LOG_DEBUG, "Decoding SEI\n");
++
++    while (byte == 0xFF) {
++       if (get_bits_left(gb) < 16 || payload_type > INT_MAX - 255)
++           return AVERROR_INVALIDDATA;
++        byte          = get_bits(gb, 8);
++        payload_type += byte;
++    }
++    byte = 0xFF;
++    while (byte == 0xFF) {
++        if (get_bits_left(gb) < 8 + 8LL*payload_size)
++            return AVERROR_INVALIDDATA;
++         byte          = get_bits(gb, 8);
++        payload_size += byte;
++    }
++    if (nal_unit_type == HEVC_NAL_SEI_PREFIX) {
++        return decode_nal_sei_prefix(gb, logctx, s, ps, payload_type, payload_size);
++    } else { /* nal_unit_type == NAL_SEI_SUFFIX */
++        return decode_nal_sei_suffix(gb, logctx, s, payload_type, payload_size);
++    }
++}
++
++static int more_rbsp_data(GetBitContext *gb)
++{
++    return get_bits_left(gb) > 0 && show_bits(gb, 8) != 0x80;
++}
++
++int ff_hevc_rpi_decode_nal_sei(GetBitContext *gb, void *logctx, HEVCSEIContext *s,
++                           const HEVCRpiParamSets *ps, int type)
++{
++    int ret;
++
++    do {
++        ret = decode_nal_sei_message(gb, logctx, s, ps, type);
++        if (ret < 0)
++            return ret;
++    } while (more_rbsp_data(gb));
++    return 1;
++}
++
++void ff_hevc_rpi_reset_sei(HEVCSEIContext *s)
++{
++    s->a53_caption.a53_caption_size = 0;
++    av_freep(&s->a53_caption.a53_caption);
++}
+--- /dev/null
++++ b/libavcodec/rpi_hevc_sei.h
+@@ -0,0 +1,135 @@
++/*
++ * HEVC Supplementary Enhancement Information messages
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_RPI_HEVC_SEI_H
++#define AVCODEC_RPI_HEVC_SEI_H
++
++#include <stdint.h>
++
++#include "libavutil/md5.h"
++
++#include "get_bits.h"
++
++/**
++ * SEI message types
++ */
++typedef enum {
++    HEVC_SEI_TYPE_BUFFERING_PERIOD                     = 0,
++    HEVC_SEI_TYPE_PICTURE_TIMING                       = 1,
++    HEVC_SEI_TYPE_PAN_SCAN_RECT                        = 2,
++    HEVC_SEI_TYPE_FILLER_PAYLOAD                       = 3,
++    HEVC_SEI_TYPE_USER_DATA_REGISTERED_ITU_T_T35       = 4,
++    HEVC_SEI_TYPE_USER_DATA_UNREGISTERED               = 5,
++    HEVC_SEI_TYPE_RECOVERY_POINT                       = 6,
++    HEVC_SEI_TYPE_SCENE_INFO                           = 9,
++    HEVC_SEI_TYPE_FULL_FRAME_SNAPSHOT                  = 15,
++    HEVC_SEI_TYPE_PROGRESSIVE_REFINEMENT_SEGMENT_START = 16,
++    HEVC_SEI_TYPE_PROGRESSIVE_REFINEMENT_SEGMENT_END   = 17,
++    HEVC_SEI_TYPE_FILM_GRAIN_CHARACTERISTICS           = 19,
++    HEVC_SEI_TYPE_POST_FILTER_HINT                     = 22,
++    HEVC_SEI_TYPE_TONE_MAPPING_INFO                    = 23,
++    HEVC_SEI_TYPE_FRAME_PACKING                        = 45,
++    HEVC_SEI_TYPE_DISPLAY_ORIENTATION                  = 47,
++    HEVC_SEI_TYPE_SOP_DESCRIPTION                      = 128,
++    HEVC_SEI_TYPE_ACTIVE_PARAMETER_SETS                = 129,
++    HEVC_SEI_TYPE_DECODING_UNIT_INFO                   = 130,
++    HEVC_SEI_TYPE_TEMPORAL_LEVEL0_INDEX                = 131,
++    HEVC_SEI_TYPE_DECODED_PICTURE_HASH                 = 132,
++    HEVC_SEI_TYPE_SCALABLE_NESTING                     = 133,
++    HEVC_SEI_TYPE_REGION_REFRESH_INFO                  = 134,
++    HEVC_SEI_TYPE_MASTERING_DISPLAY_INFO               = 137,
++    HEVC_SEI_TYPE_CONTENT_LIGHT_LEVEL_INFO             = 144,
++    HEVC_SEI_TYPE_ALTERNATIVE_TRANSFER_CHARACTERISTICS = 147,
++} HEVC_SEI_Type;
++
++typedef struct HEVCSEIPictureHash {
++    uint8_t       md5[3][16];
++    uint8_t is_md5;
++} HEVCSEIPictureHash;
++
++typedef struct HEVCSEIFramePacking {
++    int present;
++    int arrangement_type;
++    int content_interpretation_type;
++    int quincunx_subsampling;
++    int current_frame_is_frame0_flag;
++} HEVCSEIFramePacking;
++
++typedef struct HEVCSEIDisplayOrientation {
++    int present;
++    int anticlockwise_rotation;
++    int hflip, vflip;
++} HEVCSEIDisplayOrientation;
++
++typedef struct HEVCSEIPictureTiming {
++    int picture_struct;
++} HEVCSEIPictureTiming;
++
++typedef struct HEVCSEIA53Caption {
++    int a53_caption_size;
++    uint8_t *a53_caption;
++} HEVCSEIA53Caption;
++
++typedef struct HEVCSEIMasteringDisplay {
++    int present;
++    uint16_t display_primaries[3][2];
++    uint16_t white_point[2];
++    uint32_t max_luminance;
++    uint32_t min_luminance;
++} HEVCSEIMasteringDisplay;
++
++typedef struct HEVCSEIContentLight {
++    int present;
++    uint16_t max_content_light_level;
++    uint16_t max_pic_average_light_level;
++} HEVCSEIContentLight;
++
++typedef struct HEVCSEIAlternativeTransfer {
++    int present;
++    int preferred_transfer_characteristics;
++} HEVCSEIAlternativeTransfer;
++
++typedef struct HEVCSEIContext {
++    HEVCSEIPictureHash picture_hash;
++    HEVCSEIFramePacking frame_packing;
++    HEVCSEIDisplayOrientation display_orientation;
++    HEVCSEIPictureTiming picture_timing;
++    HEVCSEIA53Caption a53_caption;
++    HEVCSEIMasteringDisplay mastering_display;
++    HEVCSEIContentLight content_light;
++    int active_seq_parameter_set_id;
++    HEVCSEIAlternativeTransfer alternative_transfer;
++} HEVCSEIContext;
++
++struct HEVCRpiParamSets;
++
++int ff_hevc_rpi_decode_nal_sei(GetBitContext *gb, void *logctx, HEVCSEIContext *s,
++                           const struct HEVCRpiParamSets *ps, int type);
++
++/**
++ * Reset SEI values that are stored on the Context.
++ * e.g. Caption data that was extracted during NAL
++ * parsing.
++ *
++ * @param s HEVCRpiContext.
++ */
++void ff_hevc_rpi_reset_sei(HEVCSEIContext *s);
++
++#endif /* AVCODEC_RPI_HEVC_SEI_H */
+--- /dev/null
++++ b/libavcodec/rpi_hevc_shader.c
+@@ -0,0 +1,1537 @@
++#include "rpi_hevc_shader.h"
++
++#ifdef _MSC_VER
++   #include <stdint.h>
++   /* cast through uintptr_t to avoid warnings */
++   #define POINTER_TO_UINT(X) ((unsigned int)(uintptr_t)(X))
++#else
++   #define POINTER_TO_UINT(X) ((unsigned int)(X))
++#endif
++
++#ifdef __cplusplus
++extern "C" { /* the types are probably wrong... */
++#endif
++#ifdef __cplusplus
++}
++#endif
++
++#ifdef _MSC_VER
++__declspec(align(8))
++#elif defined(__GNUC__)
++__attribute__((aligned(8)))
++#endif
++unsigned int ff_hevc_rpi_shader[] = {
++// ::mc_setup_c_q0
++// ::mc_start
++/* [0x00000000] */ 0x0000000c, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_setup_c_qn
++/* [0x00000008] */ 0x95801ff6, 0xd0025900, // mov tmurs, 1                  ; mov ra0, unif
++/* [0x00000010] */ 0xaaaaff00, 0xe6020827, // mov r0, [0,2,0,2,0,2,0,2,1,3,1,3,1,3,1,3]
++/* [0x00000018] */ 0x9181e1f6, 0xd00250d8, // shl rb_ef, r0, i_shift30      ; mov ra_base, unif
++/* [0x00000020] */ 0x0d801dc0, 0xd0020827, // sub r0, unif, 1
++/* [0x00000028] */ 0x119c11c0, 0xd00216a7, // shl rb_max_x, r0, v_x_shift
++/* [0x00000030] */ 0x0d801dc0, 0xd00217a7, // sub rb_max_y, unif, 1
++/* [0x00000038] */ 0xff800100, 0xe0020527, // mov ra_kff800100, 0xff800100
++/* [0x00000040] */ 0x000000ff, 0xe0021627, // mov rb_pmask, v_pmask
++/* [0x00000048] */ 0x001000ff, 0xe00205e7, // mov ra_blk_height_pmax, ((1 << v_bit_depth) - 1) | (v_blk_height << 16)
++/* [0x00000050] */ 0x00004000, 0xe00217e7, // mov rb_fir_off_h, (FIR_OFFSET << (v_bit_depth - 8))
++/* [0x00000058] */ 0x4000000e, 0xe0020667, // mov ra_fir_off_val_wt_den_p7, (FIR_OFFSET << 16) | (DENOM + 15 - v_bit_depth)
++/* [0x00000060] */ 0x95803ff6, 0x10024754, // mov ra_ef, rb_ef              ; mov rb_xpitch, unif
++/* [0x00000068] */ 0x15827d80, 0x10021427, // mov rb_pitch, unif
++/* [0x00000070] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0)
++/* [0x00000078] */ 0x0c9d03c0, 0x10021667, // add rb_dma1_base, r1, rb_pitch
++/* [0x00000080] */ 0x14981f80, 0xd0020827, // and r0, 1, elem_num
++/* [0x00000088] */ 0x409c5007, 0xd00049e0, // nop                           ; mul24 r0, r0, 5
++/* [0x00000090] */ 0x0c9a7180, 0x100210a7, // add rb_elem_x, r0, elem_num
++/* [0x00000098] */ 0x11001dc0, 0xd4020827, // shl r0, ra0.16b, v_x_shift
++/* [0x000000a0] */ 0x0c9c21c0, 0x10020827, // add r0, r0, rb_elem_x
++/* [0x000000a8] */ 0x930001f6, 0xd2225811, // max r0, r0, 0                 ; mov ra_y, ra0.16a
++/* [0x000000b0] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x000000b8] */ 0x119c31c0, 0xd0220567, // shl ra_xshift_next, r0, 3
++/* [0x000000c0] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x000000c8] */ 0x0d510dc0, 0x18020867, // sub r1, ra_k0, rb_pitch
++/* [0x000000d0] */ 0x149e7040, 0x10020867, // and r1, r0, r1
++/* [0x000000d8] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x000000e0] */ 0x8c827076, 0x10025800, // add r0, r0, r1                ; mov ra0, unif
++/* [0x000000e8] */ 0x0c627c00, 0x10020627, // add ra_base, ra_base, r0
++/* [0x000000f0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num
++/* [0x000000f8] */ 0x0f9c25c0, 0xd0020867, // asr r1, r2, 2
++/* [0x00000100] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6
++/* [0x00000108] */ 0x149c35c0, 0xd0020827, // and r0, r2, 3
++/* [0x00000110] */ 0x159e7040, 0x10020827, // or  r0, r0, r1
++/* [0x00000118] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0))
++/* [0x00000120] */ 0x0c9e7040, 0x10021727, // add r_vpm, r0, r1
++/* [0x00000128] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0))
++/* [0x00000130] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5
++/* [0x00000138] */ 0x0c9e7040, 0x100216e7, // add r_dma, r0, r1
++/* [0x00000140] */ 0x11001dc0, 0xd4020827, // shl r0, ra0.16b, v_x_shift
++/* [0x00000148] */ 0x8c0021f6, 0x12125811, // add r0, r0, rb_elem_x         ; mov ra_y2, ra0.16a
++/* [0x00000150] */ 0x938001f6, 0xd002480f, // max r0, r0, 0                 ; mov rb_base2, unif
++/* [0x00000158] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x00000160] */ 0x119c31c0, 0xd0021067, // shl rb_xshift2_next, r0, 3
++/* [0x00000168] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x00000170] */ 0x0d510dc0, 0x18020867, // sub r1, ra_k0, rb_pitch
++/* [0x00000178] */ 0x949c307f, 0xd0024863, // and r1, r0, r1                ; mov r3, PREREAD
++/* [0x00000180] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00000188] */ 0x8c467076, 0x12024822, // add r0, r0, r1                ; mov r2, ra_y2
++/* [0x00000190] */ 0x8c44fe36, 0x140253e0, // add rb_base2, rb_base2, r0    ; mov r0, ra_y
++// :1
++/* [0x00000198] */ 0x0d9c17c0, 0xd00228e7, // sub.setf r3, r3, 1
++/* [0x000001a0] */ 0x139c01c0, 0xd0020867, // max r1, r0, 0
++/* [0x000001a8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_max_y
++/* [0x000001b0] */ 0x4c51018f, 0x1a024821, // add r0, r0, ra_k1             ; mul24 r1, r1, rb_pitch
++/* [0x000001b8] */ 0x8c627c40, 0x10225e11, // add t0s, ra_base, r1          ; mov ra_y, r0
++/* [0x000001c0] */ 0x139c05c0, 0xd0020867, // max r1, r2, 0
++/* [0x000001c8] */ 0xffffffb0, 0xf03809e7, // brr.anynz -, r:1b
++/* [0x000001d0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_max_y
++/* [0x000001d8] */ 0x4c51058f, 0x1a0248a1, // add r2, r2, ra_k1             ; mul24 r1, r1, rb_pitch
++/* [0x000001e0] */ 0x8c9cfe52, 0x10125f11, // add t1s, rb_base2, r1         ; mov ra_y2, r2
++/* [0x000001e8] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x000001f0] */ 0x00000000, 0xe0024104, // mov ra4, 0                    ; mov rb4, 0
++/* [0x000001f8] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000200] */ 0x00000000, 0xe0024145, // mov ra5, 0                    ; mov rb5, 0
++/* [0x00000208] */ 0x00000000, 0xe0024186, // mov ra6, 0                    ; mov rb6, 0
++/* [0x00000210] */ 0x00000000, 0xe00241c7, // mov ra7, 0                    ; mov rb7, 0
++// ::mc_filter_c_p
++/* [0x00000218] */ 0x9581cff6, 0x10025c42, // mov vw_setup, rb_vpm_init     ; mov ra2, unif
++/* [0x00000220] */ 0x8c803ff6, 0x100269e3, // add.setf -, rb_ef, rb_ef      ; mov r3, unif
++/* [0x00000228] */ 0xf1081dc0, 0xd4024825, // shl r0, ra2.16b, v_x_shift    ; v8subs r5rep, r0, r0
++/* [0x00000230] */ 0x8c8021f6, 0x10025810, // add r0, r0, rb_elem_x         ; mov ra_width_height, unif
++/* [0x00000238] */ 0x8d810bf6, 0x10025840, // sub r1, r5, rb_pitch          ; mov ra0, unif
++/* [0x00000240] */ 0x93567176, 0x14024800, // max r0, r0, r5                ; mov vrx_xshift, vrx_xshift_next
++/* [0x00000248] */ 0x9209a1f6, 0x12225813, // min r0, r0, rb_max_x          ; mov vra_y_next, ra2.16a
++/* [0x00000250] */ 0x119c31c0, 0xd0220567, // shl vrx_xshift_next, r0, 3
++/* [0x00000258] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x00000260] */ 0x54402077, 0xd4024862, // and r1, r0, r1                ; mul24 r2, ra_width, v_x_mul
++/* [0x00000268] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00000270] */ 0x8c827076, 0x10025803, // add r0, r0, r1                ; mov ra3, unif
++/* [0x00000278] */ 0x8c427636, 0x120246a1, // add vrx_base_next, r3, r0     ; mov r1, ra_height
++/* [0x00000280] */ 0x8d819eb6, 0x10025756, // sub rb_dma1, rb_dma1_base, r2 ; mov ra_wt_off_mul_l0, unif
++/* [0x00000288] */ 0x8c5dc3ce, 0xdc025461, // add rb_i_tmu, r1, (3-4) - PREREAD ; v8min r1, r1, ra_blk_height
++/* [0x00000290] */ 0x8c81f3f6, 0xd0039496, // add rb_lcount, r1, (3-4)      ; mov.ifc ra_wt_off_mul_l0, unif
++/* [0x00000298] */ 0x918073f6, 0xd002581c, // shl r0, r1, v_dma_h_shift     ; mov ra_dest, unif
++/* [0x000002a0] */ 0x8c6670b6, 0x14024822, // add r0, r0, r2                ; mov r2, ra_fir_off_val
++/* [0x000002a8] */ 0x910d01f6, 0xdc02480a, // shl r0, r0, v_dma_wh_shift    ; mov rb10, ra3.8c
++/* [0x000002b0] */ 0x8c59b1f6, 0x140246e1, // add ra_dma0, r0, rb_dma0_base ; mov r1, ra_wt_off_l0
++/* [0x000002b8] */ 0x5158c3d6, 0xd2024860, // shl r1, r1, i_wt_den_p5       ; mul24 r0, r2, ra_wt_mul_l0
++/* [0x000002c0] */ 0x8d667236, 0x14025320, // sub rb_wt_off, r1, r0         ; mov r0, ra_kmul_add
++/* [0x000002c8] */ 0x8c59cc3f, 0xd21245a5, // add ra_wt_mul_l0, ra_wt_mul_l0, r0 ; mov r5rep, -4
++/* [0x000002d0] */ 0x950e0dbf, 0x1e0252de, // mov rb11, ra3.8d              ; mov ra_link, unif
++// :1
++/* [0x000002d8] */ 0x8d151bf6, 0xa00269c4, // sub.setf -, r5, rb_i_tmu      ; mov rb4, ra5                  ; ldtmu0
++/* [0x000002e0] */ 0x8e4c09f6, 0x140288a3, // shr r2, r4, vrx_xshift        ; mov.ifz  r3, vra_y_next
++/* [0x000002e8] */ 0x8e4485f6, 0xd402c863, // shr r1, r2, v_v_shift         ; mov.ifnz r3, vra_y
++/* [0x000002f0] */ 0x8c683ff6, 0x1002b9d8, // add.setf -, rb_ef, rb_ef      ; mov.ifz  vra_base, vrx_base_next
++/* [0x000002f8] */ 0x8c531789, 0xda224460, // add vra_y, r3, ra_k1          ; mov      r0, r1 << 15
++/* [0x00000300] */ 0x9353f792, 0xd803c8e1, // max r3, r3, ra_k0             ; mov.ifnc r1, r2 << 1
++/* [0x00000308] */ 0x929de7d2, 0x1003c8e0, // min r3, r3, rb_max_y          ; mov.ifnc r0, r2
++/* [0x00000310] */ 0x545d039f, 0x12024863, // and r1, r1, ra_pmax           ; mul24 r3, r3, rb_pitch
++/* [0x00000318] */ 0x8c618cc7, 0x10024e20, // add vr_txs, vra_base, r3      ; v8min r0, r0, rb_pmask
++/* [0x00000320] */ 0x4c001bf0, 0xd8025963, // add r5rep, r5, 1              ; mul24      r3, ra0.8a,       r0
++/* [0x00000328] */ 0x4d01fef1, 0x1e0248a3, // sub r2, rb_fir_off_h, r3      ; mul24      r3, ra0.8d,       r1
++/* [0x00000330] */ 0x4d03e4f0, 0xda0248a3, // sub r2, r2, r3                ; mul24      r3, ra0.8b << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00000338] */ 0x40034031, 0xda0109e3, // nop                           ; mul24.ifn  r3, ra0.8b << 12, r1 << 12 @ "mul_used", 0
++/* [0x00000340] */ 0x4c03c4f0, 0xdc0248a3, // add r2, r2, r3                ; mul24      r3, ra0.8c << 4,  r0 << 4  @ "mul_used", 0
++/* [0x00000348] */ 0x4c032b71, 0xdc0329e3, // add.setf -, r5, r5            ; mul24.ifn  r3, ra0.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00000350] */ 0xffffff68, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00000358] */ 0x4c1ca4f7, 0x100248a0, // add r2, r2, r3                ; mul24 r0, ra7, rb10
++/* [0x00000360] */ 0x550c6ffe, 0x1a024161, // mov ra5, rb6                  ; mul24 r1, rb6, ra3.8b
++/* [0x00000368] */ 0x8f1c05f6, 0xd00241c6, // asr ra7, r2, v_bit_depth - 8  ; mov rb6, ra7
++/* [0x00000370] */ 0x4c0c423e, 0x18024860, // add r1, r1, r0                ; mul24 r0, rb4, ra3.8a
++/* [0x00000378] */ 0x4d1cb237, 0x10024860, // sub r1, r1, r0                ; mul24 r0, ra7, rb11
++/* [0x00000380] */ 0x0d9e7200, 0x10020867, // sub r1, r1, r0
++/* [0x00000388] */ 0x8f5c63f6, 0xdc024863, // asr r1, r1, 6                 ; mov r3, ra_blk_height
++/* [0x00000390] */ 0x4d592bce, 0x120269e0, // sub.setf -, r5, rb_lcount     ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x00000398] */ 0x4c64c1ce, 0x14024821, // add r0, r0, rb_wt_off         ; mul24 r1, r1, ra_kmul_add
++/* [0x000003a0] */ 0xed427073, 0x12024860, // sub r1, r0, r1                ; v8subs r0, ra_height, r3
++/* [0x000003a8] */ 0xffffff10, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x000003b0] */ 0x0f9cd3c0, 0xd0020867, // asr r1, r1, i_wt_den_p6
++/* [0x000003b8] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x000003c0] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x000003c8] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x000003d0] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x000003d8] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x000003e0] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x000003e8] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x000003f0] */ 0xfffffec8, 0xf0f809e7, // brr -, r:1b
++/* [0x000003f8] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00000400] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00000408] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_c_p_l1
++/* [0x00000410] */ 0x9581cff6, 0x10025c42, // mov vw_setup, rb_vpm_init     ; mov ra2, unif
++/* [0x00000418] */ 0x8c803ff6, 0x100269e3, // add.setf -, rb_ef, rb_ef      ; mov r3, unif
++/* [0x00000420] */ 0xf1081dc0, 0xd4024825, // shl r0, ra2.16b, v_x_shift    ; v8subs r5rep, r0, r0
++/* [0x00000428] */ 0x8c8021f6, 0x10025810, // add r0, r0, rb_elem_x         ; mov ra_width_height, unif
++/* [0x00000430] */ 0x8d810bf6, 0x10025840, // sub r1, r5, rb_pitch          ; mov ra0, unif
++/* [0x00000438] */ 0x939c117f, 0x10125815, // max r0, r0, r5                ; mov vrx_xshift, vrx_xshift_next
++/* [0x00000440] */ 0x9209a1f6, 0x12125813, // min r0, r0, rb_max_x          ; mov vra_y_next, ra2.16a
++/* [0x00000448] */ 0x119c31c0, 0xd0021067, // shl vrx_xshift_next, r0, 3
++/* [0x00000450] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x00000458] */ 0x54402077, 0xd4024862, // and r1, r0, r1                ; mul24 r2, ra_width, v_x_mul
++/* [0x00000460] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00000468] */ 0x8c827076, 0x10025803, // add r0, r0, r1                ; mov ra3, unif
++/* [0x00000470] */ 0x8c427636, 0x120254e1, // add vrx_base_next, r3, r0     ; mov r1, ra_height
++/* [0x00000478] */ 0x8d819eb6, 0x10025756, // sub rb_dma1, rb_dma1_base, r2 ; mov ra_wt_off_mul_l0, unif
++/* [0x00000480] */ 0x8c5dc3ce, 0xdc025461, // add rb_i_tmu, r1, (3-4) - PREREAD ; v8min r1, r1, ra_blk_height
++/* [0x00000488] */ 0x8c81f3f6, 0xd0039496, // add rb_lcount, r1, (3-4)      ; mov.ifc ra_wt_off_mul_l0, unif
++/* [0x00000490] */ 0x918073f6, 0xd002581c, // shl r0, r1, v_dma_h_shift     ; mov ra_dest, unif
++/* [0x00000498] */ 0x8c6670b6, 0x14024822, // add r0, r0, r2                ; mov r2, ra_fir_off_val
++/* [0x000004a0] */ 0x910d01f6, 0xdc02480a, // shl r0, r0, v_dma_wh_shift    ; mov rb10, ra3.8c
++/* [0x000004a8] */ 0x8c59b1f6, 0x140246e1, // add ra_dma0, r0, rb_dma0_base ; mov r1, ra_wt_off_l0
++/* [0x000004b0] */ 0x5158c3d6, 0xd2024860, // shl r1, r1, i_wt_den_p5       ; mul24 r0, r2, ra_wt_mul_l0
++/* [0x000004b8] */ 0x8d667236, 0x14025320, // sub rb_wt_off, r1, r0         ; mov r0, ra_kmul_add
++/* [0x000004c0] */ 0x8c59cc3f, 0xd21245a5, // add ra_wt_mul_l0, ra_wt_mul_l0, r0 ; mov r5rep, -4
++/* [0x000004c8] */ 0x950e0dbf, 0x1e0252de, // mov rb11, ra3.8d              ; mov ra_link, unif
++// :1
++/* [0x000004d0] */ 0x8d151bf6, 0xb00269c4, // sub.setf -, r5, rb_i_tmu      ; mov rb4, ra5                  ; ldtmu1
++/* [0x000004d8] */ 0x8e5539bf, 0x1202888f, // shr r2, r4, vrx_xshift        ; mov.ifz  vra_base, vrx_base_next
++/* [0x000004e0] */ 0x8e4485f6, 0xd202c863, // shr r1, r2, v_v_shift         ; mov.ifnz r3, vra_y
++/* [0x000004e8] */ 0x8c4c3ff6, 0x1202a9e3, // add.setf -, rb_ef, rb_ef      ; mov.ifz  r3, vra_y_next
++/* [0x000004f0] */ 0x8c531789, 0xda124460, // add vra_y, r3, ra_k1          ; mov      r0, r1 << 15
++/* [0x000004f8] */ 0x9353f792, 0xd803c8e1, // max r3, r3, ra_k0             ; mov.ifnc r1, r2 << 1
++/* [0x00000500] */ 0x929de7d2, 0x1003c8e0, // min r3, r3, rb_max_y          ; mov.ifnc r0, r2
++/* [0x00000508] */ 0x545d039f, 0x12024863, // and r1, r1, ra_pmax           ; mul24 r3, r3, rb_pitch
++/* [0x00000510] */ 0x8c5cfec6, 0x12024f20, // add vr_txs, vra_base, r3      ; v8min r0, r0, ra_pmax
++/* [0x00000518] */ 0x4c001bf0, 0xd8025963, // add r5rep, r5, 1              ; mul24      r3, ra0.8a,       r0
++/* [0x00000520] */ 0x4d01fef1, 0x1e0248a3, // sub r2, rb_fir_off_h, r3      ; mul24      r3, ra0.8d,       r1
++/* [0x00000528] */ 0x4d03e4f0, 0xda0248a3, // sub r2, r2, r3                ; mul24      r3, ra0.8b << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00000530] */ 0x40034031, 0xda0109e3, // nop                           ; mul24.ifn  r3, ra0.8b << 12, r1 << 12 @ "mul_used", 0
++/* [0x00000538] */ 0x4c03c4f0, 0xdc0248a3, // add r2, r2, r3                ; mul24      r3, ra0.8c << 4,  r0 << 4  @ "mul_used", 0
++/* [0x00000540] */ 0x4c032b71, 0xdc0329e3, // add.setf -, r5, r5            ; mul24.ifn  r3, ra0.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00000548] */ 0xffffff68, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00000550] */ 0x4c1ca4f7, 0x100248a0, // add r2, r2, r3                ; mul24 r0, ra7, rb10
++/* [0x00000558] */ 0x550c6ffe, 0x1a024161, // mov ra5, rb6                  ; mul24 r1, rb6, ra3.8b
++/* [0x00000560] */ 0x8f1c05f6, 0xd00241c6, // asr ra7, r2, v_bit_depth - 8  ; mov rb6, ra7
++/* [0x00000568] */ 0x4c0c423e, 0x18024860, // add r1, r1, r0                ; mul24 r0, rb4, ra3.8a
++/* [0x00000570] */ 0x4d1cb237, 0x10024860, // sub r1, r1, r0                ; mul24 r0, ra7, rb11
++/* [0x00000578] */ 0x0d9e7200, 0x10020867, // sub r1, r1, r0
++/* [0x00000580] */ 0x8f5c63f6, 0xdc024863, // asr r1, r1, 6                 ; mov r3, ra_blk_height
++/* [0x00000588] */ 0x4d592bce, 0x120269e0, // sub.setf -, r5, rb_lcount     ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x00000590] */ 0x4c64c1ce, 0x14024821, // add r0, r0, rb_wt_off         ; mul24 r1, r1, ra_kmul_add
++/* [0x00000598] */ 0xed427073, 0x12024860, // sub r1, r0, r1                ; v8subs r0, ra_height, r3
++/* [0x000005a0] */ 0xffffff10, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x000005a8] */ 0x0f9cd3c0, 0xd0020867, // asr r1, r1, i_wt_den_p6
++/* [0x000005b0] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x000005b8] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x000005c0] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x000005c8] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x000005d0] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x000005d8] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x000005e0] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x000005e8] */ 0xfffffec8, 0xf0f809e7, // brr -, r:1b
++/* [0x000005f0] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x000005f8] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00000600] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_c_b
++/* [0x00000608] */ 0x9581cff6, 0x10025c42, // mov vw_setup, rb_vpm_init     ; mov ra2, unif
++/* [0x00000610] */ 0x8c803ff6, 0x100269e3, // add.setf -, rb_ef, rb_ef      ; mov r3, unif
++/* [0x00000618] */ 0xf1081dc9, 0xd4024825, // shl r0, ra2.16b, v_x_shift    ; v8subs r5rep, r1, r1
++/* [0x00000620] */ 0x8c0821f6, 0x12225813, // add r0, r0, rb_elem_x         ; mov ra_y_next, ra2.16a
++/* [0x00000628] */ 0x8d810bf6, 0x10025850, // sub r1, r5, rb_pitch          ; mov ra_width_height, unif
++/* [0x00000630] */ 0x93567176, 0x14125815, // max r0, r0, r5                ; mov ra_xshift, ra_xshift_next
++/* [0x00000638] */ 0x9281a1f6, 0x10025800, // min r0, r0, rb_max_x          ; mov ra0, unif
++/* [0x00000640] */ 0x119c31c0, 0xd0220567, // shl ra_xshift_next, r0, 3
++/* [0x00000648] */ 0x9481c1f6, 0xd0025802, // and r0, r0, -4                ; mov ra2, unif
++/* [0x00000650] */ 0x54402077, 0xd4024862, // and r1, r0, r1                ; mul24 r2, ra_width, v_x_mul
++/* [0x00000658] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00000660] */ 0x8c427076, 0x12024821, // add r0, r0, r1                ; mov r1, ra_height
++/* [0x00000668] */ 0x8c9c163f, 0x10024680, // add ra_base_next, r3, r0      ; mov rb_xshift2, rb_xshift2_next
++/* [0x00000670] */ 0x8d819eb6, 0x10025756, // sub rb_dma1, rb_dma1_base, r2 ; mov ra_wt_off_mul_l0, unif
++/* [0x00000678] */ 0x8c5dc3ce, 0xdc025461, // add rb_i_tmu, r1, (3-4) - PREREAD ; v8min r1, r1, ra_blk_height
++/* [0x00000680] */ 0x8c59f3f6, 0xd4139496, // add rb_lcount, r1, (3-4)      ; mov.ifc ra_wt_mul_l0, ra_wt_off_l0
++/* [0x00000688] */ 0x918073f6, 0xd0025803, // shl r0, r1, v_dma_h_shift     ; mov ra3, unif
++/* [0x00000690] */ 0x8c8270b6, 0x10024823, // add r0, r0, r2                ; mov r3, unif
++/* [0x00000698] */ 0x910d01f6, 0xd2125813, // shl r0, r0, v_dma_wh_shift    ; mov ra_y2_next, ra3.16a
++/* [0x000006a0] */ 0x8c0db1f6, 0x140246e0, // add ra_dma0, r0, rb_dma0_base ; mov r0, ra3.16b
++/* [0x000006a8] */ 0x918011f6, 0xd0025801, // shl r0, r0, v_x_shift         ; mov ra1, unif
++/* [0x000006b0] */ 0x8c8021f6, 0x10025803, // add r0, r0, rb_elem_x         ; mov ra3, unif
++/* [0x000006b8] */ 0x8d810bf6, 0x10025852, // sub r1, r5, rb_pitch          ; mov ra_wt_off_mul_l1, unif
++/* [0x000006c0] */ 0x939de17f, 0x10025809, // max r0, r0, r5                ; mov ra9, rb_max_y
++/* [0x000006c8] */ 0x9265a1f6, 0x14024822, // min r0, r0, rb_max_x          ; mov r2, ra_kmul_add
++/* [0x000006d0] */ 0x119c31c0, 0xd0021067, // shl rb_xshift2_next, r0, 3
++/* [0x000006d8] */ 0x9481c1f6, 0xd0039812, // and r0, r0, -4                ; mov.ifc ra_wt_off_mul_l1, unif
++/* [0x000006e0] */ 0x949dc07f, 0xd0024865, // and r1, r0, r1                ; mov r5rep, -4
++/* [0x000006e8] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x000006f0] */ 0x8c827076, 0x1002581c, // add r0, r0, r1                ; mov ra_dest, unif
++/* [0x000006f8] */ 0x8c667636, 0x140254e0, // add rb_base2_next, r3, r0     ; mov r0, ra_fir_off_val
++/* [0x00000700] */ 0x4c5a7c86, 0x121245a1, // add ra_wt_mul_l0, ra_wt_mul_l0, r2 ; mul24 r1, r0, ra_wt_mul_l0
++/* [0x00000708] */ 0x4c4a7c86, 0x121244a0, // add ra_wt_mul_l1, ra_wt_mul_l1, r2 ; mul24 r0, r0, ra_wt_mul_l1
++/* [0x00000710] */ 0x8c4a7076, 0x14024821, // add r0, r0, r1                ; mov r1, ra_wt_off_l1
++/* [0x00000718] */ 0x910cd3f6, 0xde02484b, // shl r1, r1, i_wt_den_p6       ; mov rb11, ra3.8d
++/* [0x00000720] */ 0x8d827236, 0x1002531e, // sub rb_wt_off, r1, r0         ; mov ra_link, unif
++/* [0x00000728] */ 0x95080ff6, 0x1e024287, // mov ra10, rb_xshift2          ; mov rb7,  ra2.8d
++// :1
++/* [0x00000730] */ 0x0d9d1bc0, 0xa00229e7, // sub.setf -, r5, rb_i_tmu      ; nop                           ; ldtmu0
++/* [0x00000738] */ 0x8e5539bf, 0x1202888f, // shr r2, r4, ra_xshift         ; mov.ifz rb_base2, rb_base2_next
++/* [0x00000740] */ 0x8e4c85f6, 0xd0029851, // shr r1, r2, v_v_shift         ; mov.ifz ra_y_y2, ra_y_y2_next
++/* [0x00000748] */ 0x8c683ff6, 0x1002b9d8, // add.setf -, rb_ef, rb_ef      ; mov.ifz ra_base, ra_base_next
++/* [0x00000750] */ 0x8c441fb6, 0xd4224463, // add ra_y, 1, ra_y             ; mov r3, ra_y
++/* [0x00000758] */ 0x93531789, 0xd80248e0, // max r3, r3, ra_k0             ; mov      r0, r1 << 15
++/* [0x00000760] */ 0x9227f792, 0xd003c8e1, // min r3, r3, ra9               ; mov.ifnc r1, r2 << 1
++/* [0x00000768] */ 0x559d049f, 0x100e4823, // mov.ifnc r0, r2               ; mul24 r3, r3, rb_pitch
++/* [0x00000770] */ 0x8c618cc7, 0x10024e20, // add t0s, ra_base, r3          ; v8min r0, r0, rb_pmask
++/* [0x00000778] */ 0x540183f0, 0x18024862, // and r1, r1, rb_pmask          ; mul24      r2, ra0.8a,       r0
++/* [0x00000780] */ 0x4d01feb1, 0x1e0248a3, // sub r2, rb_fir_off_h, r2      ; mul24      r3, ra0.8d,       r1
++/* [0x00000788] */ 0x4d03e4f0, 0xda0248a3, // sub r2, r2, r3                ; mul24      r3, ra0.8b << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00000790] */ 0x40034031, 0xda0109e3, // nop                           ; mul24.ifn  r3, ra0.8b << 12, r1 << 12 @ "mul_used", 0
++/* [0x00000798] */ 0x4c03c4f0, 0xdc0248a3, // add r2, r2, r3                ; mul24      r3, ra0.8c << 4,  r0 << 4  @ "mul_used", 0
++/* [0x000007a0] */ 0x40032031, 0xdc0109e3, // nop                           ; mul24.ifn  r3, ra0.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x000007a8] */ 0x4c0854fe, 0xb8025804, // add r0, r2, r3                ; mul24 ra4, rb5, ra2.8a        ; ldtmu1
++/* [0x000007b0] */ 0x8e2869bf, 0x10024885, // shr r2, r4, ra10              ; mov rb5, rb6
++/* [0x000007b8] */ 0x8e4485f6, 0xd2024863, // shr r1, r2, v_v_shift         ; mov r3, ra_y2
++/* [0x000007c0] */ 0x8e1c01f6, 0xd00241c6, // shr ra7, r0, v_bit_depth - 8  ; mov rb6, ra7
++/* [0x000007c8] */ 0x8c531789, 0xda124460, // add ra_y2, r3, ra_k1          ; mov      r0, r1 << 15
++/* [0x000007d0] */ 0x9353f792, 0xd803c8e1, // max r3, r3, ra_k0             ; mov.ifnc r1, r2 << 1
++/* [0x000007d8] */ 0x925de7ce, 0x120248e1, // min r3, r3, rb_max_y          ; v8min r1, r1, ra_pmax
++/* [0x000007e0] */ 0x559d049f, 0x100e4823, // mov.ifnc r0, r2               ; mul24 r3, r3, rb_pitch
++/* [0x000007e8] */ 0x8c5cfec6, 0x12024f20, // add t1s, rb_base2, r3         ; v8min r0, r0, ra_pmax
++/* [0x000007f0] */ 0x4c041bf0, 0xd8025962, // add r5rep, r5, 1              ; mul24      r2, ra1.8a,       r0
++/* [0x000007f8] */ 0x4d05feb1, 0x1e0248a3, // sub r2, rb_fir_off_h, r2      ; mul24      r3, ra1.8d,       r1
++/* [0x00000800] */ 0x4d07e4f0, 0xda0248a3, // sub r2, r2, r3                ; mul24      r3, ra1.8b << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00000808] */ 0x40074031, 0xda0109e3, // nop                           ; mul24.ifn  r3, ra1.8b << 12, r1 << 12 @ "mul_used", 0
++/* [0x00000810] */ 0x4c07c6b0, 0xdc0248a3, // add r2, r3, r2                ; mul24      r3, ra1.8c << 4,  r0 << 4  @ "mul_used", 0
++/* [0x00000818] */ 0x4c072b71, 0xdc0329e3, // add.setf -, r5, r5            ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00000820] */ 0xfffffef0, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00000828] */ 0x4c0c94fe, 0x180248a0, // add r2, r2, r3                ; mul24 r0, rb9,  ra3.8a
++/* [0x00000830] */ 0x550caffe, 0x1a025261, // mov rb9, rb10                 ; mul24 r1, rb10, ra3.8b
++/* [0x00000838] */ 0x8e2c05f6, 0xd00242ca, // shr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++/* [0x00000840] */ 0x4d08523e, 0x1a0248a1, // sub r2, r1, r0                ; mul24 r1, rb5,  ra2.8b
++/* [0x00000848] */ 0x8d112bf6, 0x100269e0, // sub.setf -, r5, rb_lcount     ; mov r0, ra4
++/* [0x00000850] */ 0x4d08623e, 0x1c024860, // sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++/* [0x00000858] */ 0x4c1c7237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra7,  rb7
++/* [0x00000860] */ 0x4d0ca23e, 0x1c024860, // sub r1, r1, r0                ; mul24 r0, rb10, ra3.8c
++/* [0x00000868] */ 0x4c2cb437, 0x100248a0, // add r2, r2, r0                ; mul24 r0, ra11, rb11
++/* [0x00000870] */ 0x0d9e7400, 0x100208a7, // sub r2, r2, r0
++/* [0x00000878] */ 0x0e9c63c0, 0xd0020867, // shr r1, r1, 6
++/* [0x00000880] */ 0x4e5865ce, 0xd20248a0, // shr r2, r2, 6                 ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x00000888] */ 0x4c4a7456, 0x120248a1, // add r2, r2, r1                ; mul24 r1, r2, ra_wt_mul_l1
++/* [0x00000890] */ 0x4c667216, 0x14024862, // add r1, r1, r0                ; mul24 r2, r2, ra_kmul_add
++/* [0x00000898] */ 0x8d5e72b6, 0x1c024863, // sub r1, r1, r2                ; mov r3, ra_blk_height
++/* [0x000008a0] */ 0xec40c3f3, 0x12024860, // add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3
++/* [0x000008a8] */ 0xfffffe68, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x000008b0] */ 0x0f667380, 0x18020867, // asr r1, r1, ra_wt_den_p7
++/* [0x000008b8] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x000008c0] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x000008c8] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x000008d0] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x000008d8] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x000008e0] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x000008e8] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x000008f0] */ 0xfffffe20, 0xf0f809e7, // brr -, r:1b
++/* [0x000008f8] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00000900] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00000908] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_sync_q0
++/* [0x00000910] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000918] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000920] */ 0x00000010, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000928] */ 0x00000010, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000930] */ 0x00000010, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000938] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000940] */ 0x0000001c, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000948] */ 0x00000001, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000950] */ 0x0000000d, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q1
++/* [0x00000958] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000960] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000968] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000970] */ 0x00000000, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000978] */ 0x00000011, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000980] */ 0x00000002, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q2
++/* [0x00000988] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000990] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000998] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x000009a0] */ 0x00000000, 0xe80009e7, // mov  dst, srel(i)
++/* [0x000009a8] */ 0x00000012, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x000009b0] */ 0x00000003, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q3
++/* [0x000009b8] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x000009c0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x000009c8] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x000009d0] */ 0x00000000, 0xe80009e7, // mov  dst, srel(i)
++/* [0x000009d8] */ 0x00000013, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x000009e0] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_sync_q4
++/* [0x000009e8] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x000009f0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x000009f8] */ 0x00000014, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000a00] */ 0x00000014, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000a08] */ 0x00000014, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000a10] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000a18] */ 0x0000001d, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000a20] */ 0x00000005, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000a28] */ 0x0000000e, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q5
++/* [0x00000a30] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000a38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000a40] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000a48] */ 0x00000004, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000a50] */ 0x00000015, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000a58] */ 0x00000006, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q6
++/* [0x00000a60] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000a68] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000a70] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000a78] */ 0x00000004, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000a80] */ 0x00000016, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000a88] */ 0x00000007, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q7
++/* [0x00000a90] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000a98] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000aa0] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000aa8] */ 0x00000004, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000ab0] */ 0x00000017, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000ab8] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_sync_q8
++/* [0x00000ac0] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000ac8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000ad0] */ 0x00000018, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000ad8] */ 0x00000018, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000ae0] */ 0x00000018, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000ae8] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000af0] */ 0x0000001e, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000af8] */ 0x00000009, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000b00] */ 0x0000000c, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q9
++/* [0x00000b08] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000b10] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000b18] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000b20] */ 0x00000008, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000b28] */ 0x00000019, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000b30] */ 0x0000000a, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q10
++/* [0x00000b38] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000b40] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000b48] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000b50] */ 0x00000008, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000b58] */ 0x0000001a, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000b60] */ 0x0000000b, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync_q11
++/* [0x00000b68] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000b70] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00000b78] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000b80] */ 0x00000008, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00000b88] */ 0x0000001b, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000b90] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_exit_c_qn
++// ::mc_exit_y_qn
++/* [0x00000b98] */ 0x00000002, 0xe00228e7, // mov.setf r3, PREREAD - 1
++// :1
++/* [0x00000ba0] */ 0xffffffe0, 0xf03809e7, // brr.anynz -, r:1b
++/* [0x00000ba8] */ 0x009e7000, 0xa00009e7, // nop                   ; nop           ; ldtmu0
++/* [0x00000bb0] */ 0x009e7000, 0xb00009e7, // nop                   ; nop           ; ldtmu1
++/* [0x00000bb8] */ 0x0d9c17c0, 0xd00228e7, // sub.setf r3, r3, 1
++/* [0x00000bc0] */ 0x159f2fc0, 0x100009e7, // mov  -, vw_wait
++/* [0x00000bc8] */ 0x009e7000, 0x300009e7, // nop                   ; nop           ; thrend
++/* [0x00000bd0] */ 0x009e7000, 0x100009e7, // nop
++/* [0x00000bd8] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_exit_c_q0
++// ::mc_exit_y_q0
++/* [0x00000be0] */ 0x00000002, 0xe00228e7, // mov.setf r3, PREREAD - 1
++// :1
++/* [0x00000be8] */ 0xffffffe0, 0xf03809e7, // brr.anynz -, r:1b
++/* [0x00000bf0] */ 0x009e7000, 0xa00009e7, // nop                   ; nop           ; ldtmu0
++/* [0x00000bf8] */ 0x009e7000, 0xb00009e7, // nop                   ; nop           ; ldtmu1
++/* [0x00000c00] */ 0x0d9c17c0, 0xd00228e7, // sub.setf r3, r3, 1
++/* [0x00000c08] */ 0x159f2fc0, 0x100009e7, // mov  -, vw_wait
++/* [0x00000c10] */ 0x0000001c, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00000c18] */ 0x009e7000, 0x300009e7, // nop                   ; nop           ; thrend
++/* [0x00000c20] */ 0x00000001, 0xe00209a7, // mov interrupt, 1
++/* [0x00000c28] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_setup_y_q0
++/* [0x00000c30] */ 0x0000000c, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_setup_y_qn
++/* [0x00000c38] */ 0x95801ff6, 0xd0025900, // mov tmurs, 1                  ; mov ra0, unif
++/* [0x00000c40] */ 0x15827d80, 0x10020267, // mov ra9, unif
++/* [0x00000c48] */ 0x15827d80, 0x10020067, // mov ra1, unif
++/* [0x00000c50] */ 0xaaaaff00, 0xe6020827, // mov r0, [0,2,0,2,0,2,0,2,1,3,1,3,1,3,1,3]
++/* [0x00000c58] */ 0x9181e1f6, 0xd00250cb, // shl rb_ef, r0, i_shift30      ; mov ra11, unif
++/* [0x00000c60] */ 0xff800100, 0xe0020527, // mov ra_kff800100, 0xff800100
++/* [0x00000c68] */ 0x000000ff, 0xe0021627, // mov rb_pmask, v_pmask
++/* [0x00000c70] */ 0x001000ff, 0xe00205e7, // mov ra_blk_height_pmax, ((1 << v_bit_depth) - 1) | (v_blk_height << 16)
++/* [0x00000c78] */ 0x00004000, 0xe00217e7, // mov rb_fir_off_h, (FIR_OFFSET << (v_bit_depth - 8))
++/* [0x00000c80] */ 0x4000000e, 0xe0020667, // mov ra_fir_off_val_wt_den_p7, (FIR_OFFSET << 16) | (DENOM + 15 - v_bit_depth)
++/* [0x00000c88] */ 0x050b0a00, 0xe0021567, // mov rb_y_coeffs_2, 0x050b0a00
++/* [0x00000c90] */ 0x11283a40, 0xe00215a7, // mov rb_y_coeffs_3, 0x11283a40
++/* [0x00000c98] */ 0x0a0b0500, 0xe00215e7, // mov rb_y_coeffs_5, 0x0a0b0500
++/* [0x00000ca0] */ 0x15827d80, 0x100200e7, // mov ra3, unif
++/* [0x00000ca8] */ 0x95803ff6, 0x10024754, // mov ra_ef, rb_ef              ; mov rb_xpitch, unif
++/* [0x00000cb0] */ 0x0d0c1dc0, 0xd40216a7, // sub rb_max_x, ra3.16b, 1
++/* [0x00000cb8] */ 0x0d0c1dc0, 0xd20217a7, // sub rb_max_y, ra3.16a, 1
++/* [0x00000cc0] */ 0x959a0dbf, 0x100248d0, // mov r3, elem_num              ; mov rb_pitch, unif
++/* [0x00000cc8] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0)
++/* [0x00000cd0] */ 0x159d03c0, 0x10021667, // or  rb_dma1_base, r1, rb_pitch
++/* [0x00000cd8] */ 0x0c027cc0, 0x14020827, // add r0, ra0.16b, r3
++/* [0x00000ce0] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0
++/* [0x00000ce8] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x00000cf0] */ 0x119c31c0, 0xd0220567, // shl ra_xshift_next, r0, 3
++/* [0x00000cf8] */ 0xf49dc1d2, 0xd0024822, // and r0, r0, -4                ; v8subs r2, r2, r2
++/* [0x00000d00] */ 0x0d9d05c0, 0x100208a7, // sub r2, r2, rb_pitch
++/* [0x00000d08] */ 0x149e7080, 0x10020867, // and r1, r0, r2
++/* [0x00000d10] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00000d18] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1
++/* [0x00000d20] */ 0x0c267c00, 0x10020627, // add ra_base, ra9, r0
++/* [0x00000d28] */ 0x0c067cc0, 0x14020827, // add r0, ra1.16b, r3
++/* [0x00000d30] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0
++/* [0x00000d38] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x00000d40] */ 0x119c31c0, 0xd0021067, // shl rb_xshift2_next, r0, 3
++/* [0x00000d48] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x00000d50] */ 0x149e7080, 0x10020867, // and r1, r0, r2
++/* [0x00000d58] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00000d60] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1
++/* [0x00000d68] */ 0x0c2e7c00, 0x100213e7, // add rb_base2, ra11, r0
++/* [0x00000d70] */ 0x80027036, 0x120049e0, // nop                           ; mov r0, ra0.16a
++/* [0x00000d78] */ 0x95043ff6, 0xd20248e2, // mov r3, PREREAD               ; mov r2, ra1.16a
++// :1
++/* [0x00000d80] */ 0x0d9c17c0, 0xd00228e7, // sub.setf r3, r3, 1
++/* [0x00000d88] */ 0x139c01c0, 0xd0020867, // max r1, r0, 0
++/* [0x00000d90] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_max_y
++/* [0x00000d98] */ 0x4c51018f, 0x1a024821, // add r0, r0, ra_k1             ; mul24 r1, r1, rb_pitch
++/* [0x00000da0] */ 0x8c627c40, 0x10225e11, // add t0s, ra_base, r1          ; mov ra_y, r0
++/* [0x00000da8] */ 0x139c05c0, 0xd0020867, // max r1, r2, 0
++/* [0x00000db0] */ 0xffffffb0, 0xf03809e7, // brr.anynz -, r:1b
++/* [0x00000db8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_max_y
++/* [0x00000dc0] */ 0x4c51058f, 0x1a0248a1, // add r2, r2, ra_k1             ; mul24 r1, r1, rb_pitch
++/* [0x00000dc8] */ 0x8c9cfe52, 0x10125f11, // add t1s, rb_base2, r1         ; mov ra_y2, r2
++/* [0x00000dd0] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num
++/* [0x00000dd8] */ 0x0f9c25c0, 0xd0020867, // asr r1, r2, 2
++/* [0x00000de0] */ 0x119c63c0, 0xd0020867, // shl r1, r1, 6
++/* [0x00000de8] */ 0x149c35c0, 0xd0020827, // and r0, r2, 3
++/* [0x00000df0] */ 0x159e7040, 0x10020827, // or  r0, r0, r1
++/* [0x00000df8] */ 0x00004800, 0xe0020867, // mov r1, vpm_setup(0, 4, h8p(0, 0))
++/* [0x00000e00] */ 0x0c9e7040, 0x10021727, // add r_vpm, r0, r1
++/* [0x00000e08] */ 0x80004004, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0))
++/* [0x00000e10] */ 0x119c51c0, 0xd0020827, // shl r0, r0, 5
++/* [0x00000e18] */ 0x0c9e7040, 0x100216e7, // add r_dma, r0, r1
++/* [0x00000e20] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00000e28] */ 0x00000000, 0xe0024208, // mov ra8,  0                   ; mov rb8,  0
++/* [0x00000e30] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000e38] */ 0x00000000, 0xe0024249, // mov ra9,  0                   ; mov rb9,  0
++/* [0x00000e40] */ 0x00000000, 0xe002428a, // mov ra10, 0                   ; mov rb10, 0
++/* [0x00000e48] */ 0x00000000, 0xe00242cb, // mov ra11, 0                   ; mov rb11, 0
++// :per_block_setup_8
++/* [0x00000e50] */ 0x93567176, 0x14125815, // max r0, r0, r5                ; mov ra_xshift, ra_xshift_next
++/* [0x00000e58] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x00000e60] */ 0x119c31c0, 0xd0220567, // shl ra_xshift_next, r0, 3
++/* [0x00000e68] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x00000e70] */ 0x8d810bf6, 0x1002589a, // sub r2, r5, rb_pitch          ; mov ra_base_next, unif
++/* [0x00000e78] */ 0x940270b6, 0x12225853, // and r1, r0, r2                ; mov ra_y_next, ra0.16a
++/* [0x00000e80] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00000e88] */ 0x8c827076, 0x10025801, // add r0, r0, r1                ; mov ra1, unif
++/* [0x00000e90] */ 0x0c6a7c00, 0x100206a7, // add ra_base_next, ra_base_next, r0
++/* [0x00000e98] */ 0x0c067cc0, 0x14020827, // add r0, ra1.16b, r3
++/* [0x00000ea0] */ 0x93067176, 0x12125813, // max r0, r0, r5                ; mov ra_y2_next, ra1.16a
++/* [0x00000ea8] */ 0x9281a1f6, 0x10024813, // min r0, r0, rb_max_x          ; mov rb_base2_next, unif
++/* [0x00000eb0] */ 0x119c31c0, 0xd0021067, // shl rb_xshift2_next, r0, 3
++/* [0x00000eb8] */ 0x9481c1f6, 0xd0025810, // and r0, r0, -4                ; mov ra_width_height, unif
++/* [0x00000ec0] */ 0x949dc0bf, 0x10024871, // and r1, r0, r2                ; mov vw_setup, rb_vpm_init
++/* [0x00000ec8] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00000ed0] */ 0x4c401077, 0xd4024821, // add r0, r0, r1                ; mul24 r1, ra_width, v_x_mul
++/* [0x00000ed8] */ 0x0c9d3e00, 0x100214e7, // add rb_base2_next, rb_base2_next, r0
++/* [0x00000ee0] */ 0x8d419e76, 0x12025760, // sub rb_dma1, rb_dma1_base, r1 ; mov r0, ra_height
++/* [0x00000ee8] */ 0x8c5dc1c6, 0xdc025460, // add rb_i_tmu, r0, (7-8) - PREREAD ; v8min r0, r0, ra_blk_height
++/* [0x00000ef0] */ 0x0c9df1c0, 0xd00214a7, // add rb_lcount, r0, (7-8)
++/* [0x00000ef8] */ 0x916471f6, 0xd4024823, // shl r0, r0, v_dma_h_shift     ; mov r3, ra_kmul_add
++/* [0x00000f00] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1
++/* [0x00000f08] */ 0x916501f6, 0xd4024822, // shl r0, r0, v_dma_wh_shift    ; mov r2, ra_fir_off_val
++/* [0x00000f10] */ 0x8c81b1f6, 0x100246e0, // add ra_dma0, r0, rb_dma0_base ; mov r0, unif
++/* [0x00000f18] */ 0x918101f6, 0xd00a5816, // shl.ifnn r0, r0, i_shift16    ; mov ra_wt_off_mul_l0, unif
++/* [0x00000f20] */ 0x915031f6, 0xde024205, // shl ra8, r0, 3                ; mov rb5, ra_k255
++/* [0x00000f28] */ 0x01040400, 0xe0020867, // mov r1, 0x01040400
++/* [0x00000f30] */ 0x10227380, 0x1e5200a7, // ror ra2.8b, r1, ra8.8d
++/* [0x00000f38] */ 0x10227380, 0x1c520027, // ror ra0.8b, r1, ra8.8c
++/* [0x00000f40] */ 0x10215f80, 0x1e6200a7, // ror ra2.8c, rb_y_coeffs_2, ra8.8d
++/* [0x00000f48] */ 0x10215f80, 0x1c620027, // ror ra0.8c, rb_y_coeffs_2, ra8.8c
++/* [0x00000f50] */ 0x00010100, 0xe0020867, // mov r1,0x00010100
++/* [0x00000f58] */ 0x902203bf, 0x1e025812, // ror r0, r1, ra8.8d            ; mov ra_wt_off_mul_l1, unif
++/* [0x00000f60] */ 0x90205387, 0x1c424004, // ror ra0.8a, r1, ra8.8c        ; v8min rb4, r0, rb5
++/* [0x00000f68] */ 0x914883f6, 0xd0031856, // shl r1, r1, 8                 ; mov.ifn ra_wt_off_mul_l0, ra_wt_off_mul_l1
++/* [0x00000f70] */ 0x902203bf, 0x1e02581c, // ror r0, r1, ra8.8d            ; mov ra_dest, unif
++/* [0x00000f78] */ 0x90205387, 0x1c72404b, // ror ra1.8d, r1, ra8.8c        ; v8min rb11, r0, rb5
++/* [0x00000f80] */ 0x10216f80, 0x1e7200a7, // ror ra2.8d, rb_y_coeffs_3, ra8.8d
++/* [0x00000f88] */ 0x10216f80, 0x1c720027, // ror ra0.8d, rb_y_coeffs_3, ra8.8c
++/* [0x00000f90] */ 0x10217f80, 0x1e5200e7, // ror ra3.8b, rb_y_coeffs_5, ra8.8d
++/* [0x00000f98] */ 0x10217f80, 0x1c520067, // ror ra1.8b, rb_y_coeffs_5, ra8.8c
++/* [0x00000fa0] */ 0x04040100, 0xe0020867, // mov r1,0x04040100
++/* [0x00000fa8] */ 0x10227380, 0x1e6200e7, // ror ra3.8c, r1, ra8.8d
++/* [0x00000fb0] */ 0x902183bf, 0xdc624065, // ror ra1.8c, r1, ra8.8c        ; mov r5rep, -8
++/* [0x00000fb8] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00000fc0] */ 0x3a281100, 0xe0020867, // mov r1,0x3a281100
++/* [0x00000fc8] */ 0x902203bf, 0x1e02581e, // ror r0, r1, ra8.8d            ; mov ra_link, unif
++/* [0x00000fd0] */ 0x90205387, 0x1c424048, // ror ra1.8a, r1, ra8.8c        ; v8min rb8, r0, rb5
++// ::mc_filter_y_pxx
++/* [0x00000fd8] */ 0xfffffe58, 0xf0f807a7, // brr ra_link, r:per_block_setup_8
++/* [0x00000fe0] */ 0x959a0ff6, 0x10024023, // mov ra0, unif                 ; mov r3, elem_num
++/* [0x00000fe8] */ 0xec9c3fd2, 0x100269e5, // add.setf -, rb_ef, rb_ef      ; v8subs r5rep, r2, r2
++/* [0x00000ff0] */ 0x8c001cff, 0x14024800, // add r0, ra0.16b, r3           ; mov rb_xshift2, rb_xshift2_next
++/* [0x00000ff8] */ 0x1158cdc0, 0xd4020867, // shl r1, ra_wt_off_l0, i_wt_den_p5
++/* [0x00001000] */ 0x4c5a7cd6, 0x121245a0, // add ra_wt_mul_l0, ra_wt_mul_l0, r3 ; mul24 r0, r2, ra_wt_mul_l0
++/* [0x00001008] */ 0x8d9c423f, 0x1042531d, // sub rb_wt_off, r1, r0         ; mov ra_ef.8a, rb4
++// :1
++/* [0x00001010] */ 0x4c745dbe, 0x100279c4, // add.setf -, ra_ef, ra_ef      ; mul24 ra4, rb5, ra_ef
++/* [0x00001018] */ 0x93440dff, 0xd40248a1, // max r2, ra_y, 0               ; mov r1, 0
++/* [0x00001020] */ 0x9251e5f6, 0x1a0248a3, // min r2, r2, rb_max_y          ; mov r3, ra_k1
++/* [0x00001028] */ 0x4c450cd7, 0xa4224462, // add ra_y, ra_y, r3            ; mul24 r2, r2, rb_pitch        ; ldtmu0
++/* [0x00001030] */ 0x8c606cbf, 0x10024e05, // add t0s, ra_base, r2          ; mov rb5,  rb6
++/* [0x00001038] */ 0x8e5479bf, 0x12024806, // shr r0, r4, ra_xshift         ; mov rb6,  rb7
++/* [0x00001040] */ 0x93458c47, 0xb20248a0, // max r2, ra_y2, r1             ; v8min r0, r0, rb_pmask        ; ldtmu1
++/* [0x00001048] */ 0x8e2009f6, 0x10024847, // shr r1, r4, rb_xshift2        ; mov rb7, ra8
++/* [0x00001050] */ 0x925de5ce, 0x120248a1, // min r2, r2, rb_max_y          ; v8min r1, r1, ra_pmax
++/* [0x00001058] */ 0x4c450cd7, 0x12124462, // add ra_y2, ra_y2, r3          ; mul24 r2, r2, rb_pitch
++/* [0x00001060] */ 0x8c24feb6, 0x10025f08, // add t1s, rb_base2, r2         ; mov ra8,  ra9
++/* [0x00001068] */ 0x4c038af1, 0xd8025962, // add r5rep, r5, r3     ; mul24      r2, ra0.8a << 8,  r1 << 8  @ "mul_used", 0
++/* [0x00001070] */ 0x5501fff0, 0x180348e2, // mov r3, rb_fir_off_h  ; mul24.ifnn r2, ra0.8a,       r0
++/* [0x00001078] */ 0x4d03f6b0, 0xda0248a3, // sub r2, r3, r2        ; mul24      r3, ra0.8b << 1,  r0 << 1  @ "mul_used", 0
++/* [0x00001080] */ 0x40037031, 0xda0109e3, // nop                   ; mul24.ifn  r3, ra0.8b << 9,  r1 << 9  @ "mul_used", 0
++/* [0x00001088] */ 0x4c03e4f0, 0xdc0248a3, // add r2, r2, r3        ; mul24      r3, ra0.8c << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00001090] */ 0x40036031, 0xdc0109e3, // nop                   ; mul24.ifn  r3, ra0.8c << 10, r1 << 10 @ "mul_used", 0
++/* [0x00001098] */ 0x4d03d4f0, 0xde0248a3, // sub r2, r2, r3        ; mul24      r3, ra0.8d << 3,  r0 << 3  @ "mul_used", 0
++/* [0x000010a0] */ 0x40035031, 0xde0109e3, // nop                   ; mul24.ifn  r3, ra0.8d << 11, r1 << 11 @ "mul_used", 0
++/* [0x000010a8] */ 0x4c07c4f0, 0xd80248a3, // add r2, r2, r3        ; mul24      r3, ra1.8a << 4,  r0 << 4  @ "mul_used", 0
++/* [0x000010b0] */ 0x40074031, 0xd80109e3, // nop                   ; mul24.ifn  r3, ra1.8a << 12, r1 << 12 @ "mul_used", 0
++/* [0x000010b8] */ 0x4c07b4f0, 0xda0248a3, // add r2, r2, r3        ; mul24      r3, ra1.8b << 5,  r0 << 5  @ "mul_used", 0
++/* [0x000010c0] */ 0x40073031, 0xda0109e3, // nop                   ; mul24.ifn  r3, ra1.8b << 13, r1 << 13 @ "mul_used", 0
++/* [0x000010c8] */ 0x4d07a4f0, 0xdc0248a3, // sub r2, r2, r3        ; mul24      r3, ra1.8c << 6,  r0 << 6  @ "mul_used", 0
++/* [0x000010d0] */ 0x40072031, 0xdc0109e3, // nop                   ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x000010d8] */ 0x4c0794f0, 0xde0248a3, // add r2, r2, r3        ; mul24      r3, ra1.8d << 7,  r0 << 7  @ "mul_used", 0
++/* [0x000010e0] */ 0x4c071b71, 0xde0329e3, // add.setf -, r5, r5    ; mul24.ifn  r3, ra1.8d << 15, r1 << 15 @ "mul_used", 0
++/* [0x000010e8] */ 0xffffff08, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x000010f0] */ 0x4d0854fe, 0x1a0248a1, // sub r2, r2, r3                ; mul24 r1, rb5,  ra2.8b
++/* [0x000010f8] */ 0x550caffe, 0x1a024260, // mov ra9,  rb10                ; mul24 r0, rb10, ra3.8b
++/* [0x00001100] */ 0x8f2c05f6, 0xd00242ca, // asr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++/* [0x00001108] */ 0x4d08623e, 0x1c024860, // sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++/* [0x00001110] */ 0x4d08723e, 0x1e024860, // sub r1, r1, r0                ; mul24 r0, rb7,  ra2.8d
++/* [0x00001118] */ 0x4c208237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra8,  rb8
++/* [0x00001120] */ 0x4c0ca23e, 0x1c024860, // add r1, r1, r0                ; mul24 r0, rb10, ra3.8c
++/* [0x00001128] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra11, rb11
++/* [0x00001130] */ 0x8d5d1bf6, 0x1c0269e3, // sub.setf -, r5, rb_i_tmu      ; mov r3, ra_blk_height
++/* [0x00001138] */ 0x8d1133bf, 0x1002884f, // sub r1, r1, ra4               ; mov.ifz rb_base2, rb_base2_next
++/* [0x00001140] */ 0x8d6a7236, 0x10029858, // sub r1, r1, r0                ; mov.ifz ra_base, ra_base_next
++/* [0x00001148] */ 0x8f4c63f6, 0xd0029851, // asr r1, r1, 6                 ; mov.ifz ra_y_y2, ra_y_y2_next
++/* [0x00001150] */ 0x4d592bce, 0x120269e0, // sub.setf -, r5, rb_lcount     ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x00001158] */ 0x4c64c1ce, 0x14024821, // add r0, r0, rb_wt_off         ; mul24 r1, r1, ra_kmul_add
++/* [0x00001160] */ 0xed427073, 0x12024860, // sub r1, r0, r1                ; v8subs r0, ra_height, r3
++/* [0x00001168] */ 0xfffffe88, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00001170] */ 0x0f9cd3c0, 0xd0020867, // asr r1, r1, i_wt_den_p6
++/* [0x00001178] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00001180] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x00001188] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x00001190] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00001198] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x000011a0] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x000011a8] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x000011b0] */ 0xfffffe40, 0xf0f809e7, // brr -, r:1b
++/* [0x000011b8] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x000011c0] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x000011c8] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_y_bxx
++/* [0x000011d0] */ 0xfffffc60, 0xf0f807a7, // brr ra_link, r:per_block_setup_8
++/* [0x000011d8] */ 0x959a0ff6, 0x10024023, // mov ra0, unif                 ; mov r3, elem_num
++/* [0x000011e0] */ 0xec9c3fd2, 0x100269e5, // add.setf -, rb_ef, rb_ef      ; v8subs r5rep, r2, r2
++/* [0x000011e8] */ 0x8c001cff, 0x14024800, // add r0, ra0.16b, r3           ; mov rb_xshift2, rb_xshift2_next
++/* [0x000011f0] */ 0x1158ddc0, 0xd4020867, // shl r1, ra_wt_off_l0, i_wt_den_p6
++/* [0x000011f8] */ 0x4c5a7cd6, 0x121245a0, // add ra_wt_mul_l0, ra_wt_mul_l0, r3 ; mul24 r0, r2, ra_wt_mul_l0
++/* [0x00001200] */ 0x4d4a7216, 0x12024860, // sub r1, r1, r0                ; mul24 r0, r2, ra_wt_mul_l1
++/* [0x00001208] */ 0x8d9c423f, 0x1042531d, // sub rb_wt_off, r1, r0         ; mov ra_ef.8a, rb4
++// :1
++/* [0x00001210] */ 0x4c745dbe, 0x100279c4, // add.setf -, ra_ef, ra_ef      ; mul24 ra4, rb5, ra_ef
++/* [0x00001218] */ 0x93440dff, 0xd40248a1, // max r2, ra_y, 0               ; mov r1, 0
++/* [0x00001220] */ 0x9251e5f6, 0x1a0248a3, // min r2, r2, rb_max_y          ; mov r3, ra_k1
++/* [0x00001228] */ 0x4c450cd7, 0xa4224462, // add ra_y, ra_y, r3            ; mul24 r2, r2, rb_pitch        ; ldtmu0
++/* [0x00001230] */ 0x8c606cbf, 0x10024e05, // add t0s, ra_base, r2          ; mov rb5,  rb6
++/* [0x00001238] */ 0x8e5479bf, 0x12024806, // shr r0, r4, ra_xshift         ; mov rb6,  rb7
++/* [0x00001240] */ 0x93458c47, 0xb20248a0, // max r2, ra_y2, r1             ; v8min r0, r0, rb_pmask        ; ldtmu1
++/* [0x00001248] */ 0x8e2009f6, 0x10024847, // shr r1, r4, rb_xshift2        ; mov rb7, ra8
++/* [0x00001250] */ 0x925de5ce, 0x120248a1, // min r2, r2, rb_max_y          ; v8min r1, r1, ra_pmax
++/* [0x00001258] */ 0x4c450cd7, 0x12124462, // add ra_y2, ra_y2, r3          ; mul24 r2, r2, rb_pitch
++/* [0x00001260] */ 0x8c24feb6, 0x10025f08, // add t1s, rb_base2, r2         ; mov ra8,  ra9
++/* [0x00001268] */ 0x4c038af1, 0xd8025962, // add r5rep, r5, r3     ; mul24      r2, ra0.8a << 8,  r1 << 8  @ "mul_used", 0
++/* [0x00001270] */ 0x5501fff0, 0x180348e2, // mov r3, rb_fir_off_h  ; mul24.ifnn r2, ra0.8a,       r0
++/* [0x00001278] */ 0x4d03f6b0, 0xda0248a3, // sub r2, r3, r2        ; mul24      r3, ra0.8b << 1,  r0 << 1  @ "mul_used", 0
++/* [0x00001280] */ 0x40037031, 0xda0109e3, // nop                   ; mul24.ifn  r3, ra0.8b << 9,  r1 << 9  @ "mul_used", 0
++/* [0x00001288] */ 0x4c03e4f0, 0xdc0248a3, // add r2, r2, r3        ; mul24      r3, ra0.8c << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00001290] */ 0x40036031, 0xdc0109e3, // nop                   ; mul24.ifn  r3, ra0.8c << 10, r1 << 10 @ "mul_used", 0
++/* [0x00001298] */ 0x4d03d4f0, 0xde0248a3, // sub r2, r2, r3        ; mul24      r3, ra0.8d << 3,  r0 << 3  @ "mul_used", 0
++/* [0x000012a0] */ 0x40035031, 0xde0109e3, // nop                   ; mul24.ifn  r3, ra0.8d << 11, r1 << 11 @ "mul_used", 0
++/* [0x000012a8] */ 0x4c07c4f0, 0xd80248a3, // add r2, r2, r3        ; mul24      r3, ra1.8a << 4,  r0 << 4  @ "mul_used", 0
++/* [0x000012b0] */ 0x40074031, 0xd80109e3, // nop                   ; mul24.ifn  r3, ra1.8a << 12, r1 << 12 @ "mul_used", 0
++/* [0x000012b8] */ 0x4c07b4f0, 0xda0248a3, // add r2, r2, r3        ; mul24      r3, ra1.8b << 5,  r0 << 5  @ "mul_used", 0
++/* [0x000012c0] */ 0x40073031, 0xda0109e3, // nop                   ; mul24.ifn  r3, ra1.8b << 13, r1 << 13 @ "mul_used", 0
++/* [0x000012c8] */ 0x4d07a4f0, 0xdc0248a3, // sub r2, r2, r3        ; mul24      r3, ra1.8c << 6,  r0 << 6  @ "mul_used", 0
++/* [0x000012d0] */ 0x40072031, 0xdc0109e3, // nop                   ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x000012d8] */ 0x4c0794f0, 0xde0248a3, // add r2, r2, r3        ; mul24      r3, ra1.8d << 7,  r0 << 7  @ "mul_used", 0
++/* [0x000012e0] */ 0x4c071b71, 0xde0329e3, // add.setf -, r5, r5    ; mul24.ifn  r3, ra1.8d << 15, r1 << 15 @ "mul_used", 0
++/* [0x000012e8] */ 0xffffff08, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x000012f0] */ 0x4d0854fe, 0x1a0248a1, // sub r2, r2, r3                ; mul24 r1, rb5,  ra2.8b
++/* [0x000012f8] */ 0x550caffe, 0x1a024260, // mov ra9,  rb10                ; mul24 r0, rb10, ra3.8b
++/* [0x00001300] */ 0x8f2c05f6, 0xd00242ca, // asr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++/* [0x00001308] */ 0x4d08623e, 0x1c024860, // sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++/* [0x00001310] */ 0x4d08723e, 0x1e024860, // sub r1, r1, r0                ; mul24 r0, rb7,  ra2.8d
++/* [0x00001318] */ 0x4c208237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra8,  rb8
++/* [0x00001320] */ 0x4c0ca23e, 0x1c024860, // add r1, r1, r0                ; mul24 r0, rb10, ra3.8c
++/* [0x00001328] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra11, rb11
++/* [0x00001330] */ 0x0d127380, 0x10020867, // sub r1, r1, ra4
++/* [0x00001338] */ 0x8d9cc23f, 0x10024862, // sub r1, r1, r0                ; mov r2, rb_wt_off
++/* [0x00001340] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6
++/* [0x00001348] */ 0x4d591bce, 0x120269e0, // sub.setf -, r5, rb_i_tmu      ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x00001350] */ 0x55653fce, 0x140453e1, // mov.ifz rb_base2, rb_base2_next ; mul24 r1, r1, ra_kmul_add
++/* [0x00001358] */ 0x8d4e7076, 0x10029851, // sub r1, r0, r1                ; mov.ifz ra_y_y2, ra_y_y2_next
++/* [0x00001360] */ 0x8d692bf6, 0x1002b9d8, // sub.setf -, r5, rb_lcount     ; mov.ifz ra_base, ra_base_next
++/* [0x00001368] */ 0x8c9f8289, 0xd0024860, // add r1, r1, r2                ; mov r0, r1 << 8
++/* [0x00001370] */ 0x8c5e7236, 0x1c024863, // add r1, r1, r0                ; mov r3, ra_blk_height
++/* [0x00001378] */ 0xfffffe78, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00001380] */ 0x4f65039f, 0x18024862, // asr r1, r1, ra_wt_den_p7      ; mul24 r2, r3, rb_pitch
++/* [0x00001388] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00001390] */ 0xf34003f3, 0xd2024c20, // max vpm, r1, 0                ; v8subs r0, ra_height, r3
++/* [0x00001398] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x000013a0] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x000013a8] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x000013b0] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x000013b8] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x000013c0] */ 0xfffffe30, 0xf0f809e7, // brr -, r:1b
++/* [0x000013c8] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x000013d0] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x000013d8] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_y_p00
++/* [0x000013e0] */ 0x959a0ff6, 0x10024020, // mov ra0, unif                 ; mov r0, elem_num
++/* [0x000013e8] */ 0xf5567dad, 0x14124565, // mov ra_xshift, ra_xshift_next ; v8subs r5rep, r5, r5
++/* [0x000013f0] */ 0x8c020c3f, 0x1402581a, // add r0, ra0.16b, r0           ; mov ra_base_next, unif
++/* [0x000013f8] */ 0x93027176, 0x12225813, // max r0, r0, r5                ; mov ra_y_next, ra0.16a
++/* [0x00001400] */ 0x9281a1f6, 0x10025810, // min r0, r0, rb_max_x          ; mov ra_width_height, unif
++/* [0x00001408] */ 0x119c31c0, 0xd0220567, // shl ra_xshift_next, r0, 3
++/* [0x00001410] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x00001418] */ 0x8d810bf6, 0x10025896, // sub r2, r5, rb_pitch          ; mov ra_wt_off_mul_l0, unif
++/* [0x00001420] */ 0x149e7080, 0x10020867, // and r1, r0, r2
++/* [0x00001428] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00001430] */ 0x8c827076, 0x1002581c, // add r0, r0, r1                ; mov ra_dest, unif
++/* [0x00001438] */ 0x8c69cc3f, 0x100246b1, // add ra_base_next, ra_base_next, r0 ; mov vw_setup, rb_vpm_init
++/* [0x00001440] */ 0x11400dc0, 0xd4020867, // shl r1, ra_width, v_x_shift
++/* [0x00001448] */ 0x8d419e76, 0x12025760, // sub rb_dma1, rb_dma1_base, r1 ; mov r0, ra_height
++/* [0x00001450] */ 0x8d5c31c6, 0xdc025460, // sub rb_i_tmu, r0, PREREAD     ; v8min r0, r0, ra_blk_height
++/* [0x00001458] */ 0x919c71c0, 0xd0024812, // shl r0, r0, v_dma_h_shift     ; mov rb_lcount, r0
++/* [0x00001460] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1
++/* [0x00001468] */ 0x1158edc0, 0xd4021327, // shl rb_wt_off, ra_wt_off_l0, DENOM + 7
++/* [0x00001470] */ 0x918101f6, 0xd002581e, // shl r0, r0, v_dma_wh_shift    ; mov ra_link, unif
++/* [0x00001478] */ 0x0c9db1c0, 0x100206e7, // add ra_dma0, r0, rb_dma0_base
++// :1
++/* [0x00001480] */ 0xcd511bee, 0x1a0269e5, // sub.setf -, r5, rb_i_tmu      ; v8adds r5rep, r5, ra_k1
++/* [0x00001488] */ 0x804e7036, 0xa42099d1, // nop                           ; mov.ifz ra_y, ra_y_next       ; ldtmu0
++/* [0x00001490] */ 0x8e5509bf, 0x12024823, // shr r0, r4, ra_xshift         ; mov r3, rb_pitch
++/* [0x00001498] */ 0x13440dc0, 0xd40208a7, // max r2, ra_y, 0
++/* [0x000014a0] */ 0x9269e5f6, 0x10029898, // min r2, r2, rb_max_y          ; mov.ifz ra_base, ra_base_next
++/* [0x000014a8] */ 0x4c441dd3, 0xd4224462, // add ra_y, ra_y, 1             ; mul24 r2, r2, r3
++/* [0x000014b0] */ 0x8c618c87, 0x10024e20, // add t0s, ra_base, r2          ; v8min r0, r0, rb_pmask
++/* [0x000014b8] */ 0x4d592bc6, 0x120269e1, // sub.setf -, r5, rb_lcount     ; mul24 r1, r0, ra_wt_mul_l0
++/* [0x000014c0] */ 0x915c83f6, 0xdc024863, // shl r1, r1, 8                 ; mov r3, ra_blk_height
++/* [0x000014c8] */ 0xec40c3f3, 0x12024860, // add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3
++/* [0x000014d0] */ 0xffffff90, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x000014d8] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, DENOM + 8
++/* [0x000014e0] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x000014e8] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x000014f0] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0 ; mov vw_setup, ra_dma0
++/* [0x000014f8] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00001500] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3        ; mov vw_setup, rb_dma1
++/* [0x00001508] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3        ; mov vw_addr, ra_dest
++/* [0x00001510] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x00001518] */ 0xffffff48, 0xf0f809e7, // brr -, r:1b
++/* [0x00001520] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00001528] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00001530] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_y_b00
++/* [0x00001538] */ 0xfffff8f8, 0xf0f807a7, // brr ra_link, r:per_block_setup_8
++/* [0x00001540] */ 0x959a0ff6, 0x10024023, // mov ra0, unif                 ; mov r3, elem_num
++/* [0x00001548] */ 0xec9c3fd2, 0x100269e5, // add.setf -, rb_ef, rb_ef      ; v8subs r5rep, r2, r2
++/* [0x00001550] */ 0x8c001cff, 0x14024800, // add r0, ra0.16b, r3           ; mov rb_xshift2, rb_xshift2_next
++/* [0x00001558] */ 0x00000001, 0xe00208a7, // mov r2, 1
++/* [0x00001560] */ 0x8c591eb6, 0x10025461, // add rb_i_tmu, rb_i_tmu, r2    ; mov r1, ra_wt_off_mul_l0
++/* [0x00001568] */ 0xf158fded, 0xd4025325, // shl rb_wt_off, ra_wt_off_l0, DENOM + 8 ; v8subs r5quad, r5, r5
++/* [0x00001570] */ 0x809f8009, 0xd000d9d6, // nop                           ; mov.ifnz ra_wt_off_mul_l0, r1 << 8
++// :1
++/* [0x00001578] */ 0x0d9d1bc0, 0xb00229e7, // sub.setf -, r5, rb_i_tmu      ; nop                           ; ldtmu1
++/* [0x00001580] */ 0x8e4c09f6, 0xa0029851, // shr r1, r4, rb_xshift2        ; mov.ifz ra_y_y2, ra_y_y2_next ; ldtmu0
++/* [0x00001588] */ 0x8e5509bf, 0x12024823, // shr r0, r4, ra_xshift         ; mov r3, rb_pitch
++/* [0x00001590] */ 0x13440dc0, 0xd40208a7, // max r2, ra_y, 0
++/* [0x00001598] */ 0x9269e5f6, 0x10029898, // min r2, r2, rb_max_y          ; mov.ifz ra_base, ra_base_next
++/* [0x000015a0] */ 0x4c441dd3, 0xd4224462, // add ra_y, ra_y, 1             ; mul24 r2, r2, r3
++/* [0x000015a8] */ 0x8c613cbf, 0x10028e0f, // add t0s, ra_base, r2          ; mov.ifz rb_base2, rb_base2_next
++/* [0x000015b0] */ 0x13440dc0, 0xd20208a7, // max r2, ra_y2, 0
++/* [0x000015b8] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_max_y
++/* [0x000015c0] */ 0x4c441dd3, 0xd2124462, // add ra_y2, ra_y2, 1           ; mul24 r2, r2, r3
++/* [0x000015c8] */ 0x8c5cfe86, 0x12024f20, // add t1s, rb_base2, r2         ; v8min r0, r0, ra_pmax
++/* [0x000015d0] */ 0x545983c6, 0x12024860, // and r1, r1, rb_pmask          ; mul24 r0, r0, ra_wt_mul_l0
++/* [0x000015d8] */ 0x4d492bce, 0x120269e1, // sub.setf -, r5, rb_lcount     ; mul24 r1, r1, ra_wt_mul_l1
++/* [0x000015e0] */ 0xcc52706e, 0x1a024865, // add r1, r0, r1                ; v8adds r5rep, r5, ra_k1
++/* [0x000015e8] */ 0x915c83f6, 0xdc024863, // shl r1, r1, 8                 ; mov r3, ra_blk_height
++/* [0x000015f0] */ 0xec40c3f3, 0x12024860, // add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3
++/* [0x000015f8] */ 0xffffff60, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00001600] */ 0x0f9d03c0, 0xd0020867, // asr r1, r1, (DENOM + 9) - 32
++/* [0x00001608] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00001610] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x00001618] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x00001620] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00001628] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x00001630] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x00001638] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x00001640] */ 0xffffff18, 0xf0f809e7, // brr -, r:1b
++/* [0x00001648] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00001650] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00001658] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_setup_c10_q0
++/* [0x00001660] */ 0x0000000c, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_setup_c10_qn
++/* [0x00001668] */ 0x95801ff6, 0xd0025900, // mov tmurs, 1                  ; mov ra0, unif
++/* [0x00001670] */ 0xaaaaff00, 0xe6020827, // mov r0, [0,2,0,2,0,2,0,2,1,3,1,3,1,3,1,3]
++/* [0x00001678] */ 0x9181e1f6, 0xd00250d8, // shl rb_ef, r0, i_shift30      ; mov ra_base, unif
++/* [0x00001680] */ 0x0d801dc0, 0xd0020827, // sub r0, unif, 1
++/* [0x00001688] */ 0x119c21c0, 0xd00216a7, // shl rb_max_x, r0, v_x_shift
++/* [0x00001690] */ 0x0d801dc0, 0xd00217a7, // sub rb_max_y, unif, 1
++/* [0x00001698] */ 0xff800100, 0xe0020527, // mov ra_kff800100, 0xff800100
++/* [0x000016a0] */ 0x0000ffff, 0xe0021627, // mov rb_pmask, v_pmask
++/* [0x000016a8] */ 0x000803ff, 0xe00205e7, // mov ra_blk_height_pmax, ((1 << v_bit_depth) - 1) | (v_blk_height << 16)
++/* [0x000016b0] */ 0x00010000, 0xe00217e7, // mov rb_fir_off_h, (FIR_OFFSET << (v_bit_depth - 8))
++/* [0x000016b8] */ 0x4000000c, 0xe0020667, // mov ra_fir_off_val_wt_den_p7, (FIR_OFFSET << 16) | (DENOM + 15 - v_bit_depth)
++/* [0x000016c0] */ 0x95803ff6, 0x10024754, // mov ra_ef, rb_ef              ; mov rb_xpitch, unif
++/* [0x000016c8] */ 0x15827d80, 0x10021427, // mov rb_pitch, unif
++/* [0x000016d0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0)
++/* [0x000016d8] */ 0x0c9d03c0, 0x10021667, // add rb_dma1_base, r1, rb_pitch
++/* [0x000016e0] */ 0x14981f80, 0xd0020827, // and r0, 1, elem_num
++/* [0x000016e8] */ 0x409c5007, 0xd00049e0, // nop                           ; mul24 r0, r0, 5
++/* [0x000016f0] */ 0x0c9a7180, 0x10020827, // add r0, r0, elem_num
++/* [0x000016f8] */ 0x0c9e7000, 0x100210a7, // add rb_elem_x, r0, r0
++/* [0x00001700] */ 0x11002dc0, 0xd4020827, // shl r0, ra0.16b, v_x_shift
++/* [0x00001708] */ 0x0c9c21c0, 0x10020827, // add r0, r0, rb_elem_x
++/* [0x00001710] */ 0x930001f6, 0xd2225811, // max r0, r0, 0                 ; mov ra_y, ra0.16a
++/* [0x00001718] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x00001720] */ 0x00000000, 0xe0224541, // mov ra_xshift_next, 0         ; mov rb_xshift2_next, 0
++/* [0x00001728] */ 0x0d510dc0, 0x18020867, // sub r1, ra_k0, rb_pitch
++/* [0x00001730] */ 0x149e7040, 0x10020867, // and r1, r0, r1
++/* [0x00001738] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00001740] */ 0x8c827076, 0x10025800, // add r0, r0, r1                ; mov ra0, unif
++/* [0x00001748] */ 0x0c627c00, 0x10020627, // add ra_base, ra_base, r0
++/* [0x00001750] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num
++/* [0x00001758] */ 0x0f9c15c0, 0xd0020867, // asr r1, r2, 1
++/* [0x00001760] */ 0x119c43c0, 0xd0020867, // shl r1, r1, 4
++/* [0x00001768] */ 0x149c15c0, 0xd0020827, // and r0, r2, 1
++/* [0x00001770] */ 0x159e7040, 0x10020827, // or  r0, r0, r1
++/* [0x00001778] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0))
++/* [0x00001780] */ 0x0c9e7040, 0x10021727, // add r_vpm, r0, r1
++/* [0x00001788] */ 0x80004002, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h16p(0,0,0))
++/* [0x00001790] */ 0x119c61c0, 0xd0020827, // shl r0, r0, 6
++/* [0x00001798] */ 0x0c9e7040, 0x100216e7, // add r_dma, r0, r1
++/* [0x000017a0] */ 0x11002dc0, 0xd4020827, // shl r0, ra0.16b, v_x_shift
++/* [0x000017a8] */ 0x8c0021f6, 0x12125811, // add r0, r0, rb_elem_x         ; mov ra_y2, ra0.16a
++/* [0x000017b0] */ 0x938001f6, 0xd002480f, // max r0, r0, 0                 ; mov rb_base2, unif
++/* [0x000017b8] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x000017c0] */ 0x0d510dc0, 0x18020867, // sub r1, ra_k0, rb_pitch
++/* [0x000017c8] */ 0x949c307f, 0xd0024863, // and r1, r0, r1                ; mov r3, PREREAD
++/* [0x000017d0] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x000017d8] */ 0x8c467076, 0x12024822, // add r0, r0, r1                ; mov r2, ra_y2
++/* [0x000017e0] */ 0x8c44fe36, 0x140253e0, // add rb_base2, rb_base2, r0    ; mov r0, ra_y
++// :1
++/* [0x000017e8] */ 0x0d9c17c0, 0xd00228e7, // sub.setf r3, r3, 1
++/* [0x000017f0] */ 0x139c01c0, 0xd0020867, // max r1, r0, 0
++/* [0x000017f8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_max_y
++/* [0x00001800] */ 0x4c51018f, 0x1a024821, // add r0, r0, ra_k1             ; mul24 r1, r1, rb_pitch
++/* [0x00001808] */ 0x8c627c40, 0x10225e11, // add t0s, ra_base, r1          ; mov ra_y, r0
++/* [0x00001810] */ 0x139c05c0, 0xd0020867, // max r1, r2, 0
++/* [0x00001818] */ 0xffffffb0, 0xf03809e7, // brr.anynz -, r:1b
++/* [0x00001820] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_max_y
++/* [0x00001828] */ 0x4c51058f, 0x1a0248a1, // add r2, r2, ra_k1             ; mul24 r1, r1, rb_pitch
++/* [0x00001830] */ 0x8c9cfe52, 0x10125f11, // add t1s, rb_base2, r1         ; mov ra_y2, r2
++/* [0x00001838] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00001840] */ 0x00000000, 0xe0024104, // mov ra4, 0                    ; mov rb4, 0
++/* [0x00001848] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00001850] */ 0x00000000, 0xe0024145, // mov ra5, 0                    ; mov rb5, 0
++/* [0x00001858] */ 0x00000000, 0xe0024186, // mov ra6, 0                    ; mov rb6, 0
++/* [0x00001860] */ 0x00000000, 0xe00241c7, // mov ra7, 0                    ; mov rb7, 0
++// ::mc_filter_c10_p
++/* [0x00001868] */ 0x9581cff6, 0x10025c42, // mov vw_setup, rb_vpm_init     ; mov ra2, unif
++/* [0x00001870] */ 0x8c803ff6, 0x100269e3, // add.setf -, rb_ef, rb_ef      ; mov r3, unif
++/* [0x00001878] */ 0xf1082dc0, 0xd4024825, // shl r0, ra2.16b, v_x_shift    ; v8subs r5rep, r0, r0
++/* [0x00001880] */ 0x8c8021f6, 0x10025810, // add r0, r0, rb_elem_x         ; mov ra_width_height, unif
++/* [0x00001888] */ 0x8d810bf6, 0x10025840, // sub r1, r5, rb_pitch          ; mov ra0, unif
++/* [0x00001890] */ 0x93567176, 0x14024800, // max r0, r0, r5                ; mov vrx_xshift, vrx_xshift_next
++/* [0x00001898] */ 0x9209a1f6, 0x12225813, // min r0, r0, rb_max_x          ; mov vra_y_next, ra2.16a
++/* [0x000018a0] */ 0x54404077, 0xd4024862, // and r1, r0, r1                ; mul24 r2, ra_width, v_x_mul
++/* [0x000018a8] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x000018b0] */ 0x8c827076, 0x10025803, // add r0, r0, r1                ; mov ra3, unif
++/* [0x000018b8] */ 0x8c427636, 0x120246a1, // add vrx_base_next, r3, r0     ; mov r1, ra_height
++/* [0x000018c0] */ 0x8d819eb6, 0x10025756, // sub rb_dma1, rb_dma1_base, r2 ; mov ra_wt_off_mul_l0, unif
++/* [0x000018c8] */ 0x8c5dc3ce, 0xdc025461, // add rb_i_tmu, r1, (3-4) - PREREAD ; v8min r1, r1, ra_blk_height
++/* [0x000018d0] */ 0x8c81f3f6, 0xd0039496, // add rb_lcount, r1, (3-4)      ; mov.ifc ra_wt_off_mul_l0, unif
++/* [0x000018d8] */ 0x918083f6, 0xd002581c, // shl r0, r1, v_dma_h_shift     ; mov ra_dest, unif
++/* [0x000018e0] */ 0x8c6670b6, 0x14024822, // add r0, r0, r2                ; mov r2, ra_fir_off_val
++/* [0x000018e8] */ 0x910cf1f6, 0xdc02480a, // shl r0, r0, v_dma_wh_shift    ; mov rb10, ra3.8c
++/* [0x000018f0] */ 0x8c59b1f6, 0x140246e1, // add ra_dma0, r0, rb_dma0_base ; mov r1, ra_wt_off_l0
++/* [0x000018f8] */ 0x5158a3d6, 0xd2024860, // shl r1, r1, i_wt_den_p5       ; mul24 r0, r2, ra_wt_mul_l0
++/* [0x00001900] */ 0x8d667236, 0x14025320, // sub rb_wt_off, r1, r0         ; mov r0, ra_kmul_add
++/* [0x00001908] */ 0x8c59cc3f, 0xd21245a5, // add ra_wt_mul_l0, ra_wt_mul_l0, r0 ; mov r5rep, -4
++/* [0x00001910] */ 0x950e0dbf, 0x1e0252de, // mov rb11, ra3.8d              ; mov ra_link, unif
++// :1
++/* [0x00001918] */ 0x8d151bf6, 0xa00269c4, // sub.setf -, r5, rb_i_tmu      ; mov rb4, ra5                  ; ldtmu0
++/* [0x00001920] */ 0x8e4c09f6, 0x140288a3, // shr r2, r4, vrx_xshift        ; mov.ifz  r3, vra_y_next
++/* [0x00001928] */ 0x8e4505f6, 0xd402c863, // shr r1, r2, v_v_shift         ; mov.ifnz r3, vra_y
++/* [0x00001930] */ 0x8c683ff6, 0x1002b9d8, // add.setf -, rb_ef, rb_ef      ; mov.ifz  vra_base, vrx_base_next
++/* [0x00001938] */ 0x8c531789, 0xda224460, // add vra_y, r3, ra_k1          ; mov      r0, r1 << 15
++/* [0x00001940] */ 0x9353f792, 0xd803c8e1, // max r3, r3, ra_k0             ; mov.ifnc r1, r2 << 1
++/* [0x00001948] */ 0x929de7d2, 0x1003c8e0, // min r3, r3, rb_max_y          ; mov.ifnc r0, r2
++/* [0x00001950] */ 0x545d039f, 0x12024863, // and r1, r1, ra_pmax           ; mul24 r3, r3, rb_pitch
++/* [0x00001958] */ 0x8c618cc7, 0x10024e20, // add vr_txs, vra_base, r3      ; v8min r0, r0, rb_pmask
++/* [0x00001960] */ 0x4c001bf0, 0xd8025963, // add r5rep, r5, 1              ; mul24      r3, ra0.8a,       r0
++/* [0x00001968] */ 0x4d01fef1, 0x1e0248a3, // sub r2, rb_fir_off_h, r3      ; mul24      r3, ra0.8d,       r1
++/* [0x00001970] */ 0x4d03e4f0, 0xda0248a3, // sub r2, r2, r3                ; mul24      r3, ra0.8b << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00001978] */ 0x40034031, 0xda0109e3, // nop                           ; mul24.ifn  r3, ra0.8b << 12, r1 << 12 @ "mul_used", 0
++/* [0x00001980] */ 0x4c03c4f0, 0xdc0248a3, // add r2, r2, r3                ; mul24      r3, ra0.8c << 4,  r0 << 4  @ "mul_used", 0
++/* [0x00001988] */ 0x4c032b71, 0xdc0329e3, // add.setf -, r5, r5            ; mul24.ifn  r3, ra0.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00001990] */ 0xffffff68, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00001998] */ 0x4c1ca4f7, 0x100248a0, // add r2, r2, r3                ; mul24 r0, ra7, rb10
++/* [0x000019a0] */ 0x550c6ffe, 0x1a024161, // mov ra5, rb6                  ; mul24 r1, rb6, ra3.8b
++/* [0x000019a8] */ 0x8f1c25f6, 0xd00241c6, // asr ra7, r2, v_bit_depth - 8  ; mov rb6, ra7
++/* [0x000019b0] */ 0x4c0c423e, 0x18024860, // add r1, r1, r0                ; mul24 r0, rb4, ra3.8a
++/* [0x000019b8] */ 0x4d1cb237, 0x10024860, // sub r1, r1, r0                ; mul24 r0, ra7, rb11
++/* [0x000019c0] */ 0x0d9e7200, 0x10020867, // sub r1, r1, r0
++/* [0x000019c8] */ 0x8f5c63f6, 0xdc024863, // asr r1, r1, 6                 ; mov r3, ra_blk_height
++/* [0x000019d0] */ 0x4d592bce, 0x120269e0, // sub.setf -, r5, rb_lcount     ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x000019d8] */ 0x4c64c1ce, 0x14024821, // add r0, r0, rb_wt_off         ; mul24 r1, r1, ra_kmul_add
++/* [0x000019e0] */ 0xed427073, 0x12024860, // sub r1, r0, r1                ; v8subs r0, ra_height, r3
++/* [0x000019e8] */ 0xffffff10, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x000019f0] */ 0x0f9cb3c0, 0xd0020867, // asr r1, r1, i_wt_den_p6
++/* [0x000019f8] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00001a00] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x00001a08] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x00001a10] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00001a18] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x00001a20] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x00001a28] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x00001a30] */ 0xfffffec8, 0xf0f809e7, // brr -, r:1b
++/* [0x00001a38] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00001a40] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00001a48] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_c10_p_l1
++/* [0x00001a50] */ 0x9581cff6, 0x10025c42, // mov vw_setup, rb_vpm_init     ; mov ra2, unif
++/* [0x00001a58] */ 0x8c803ff6, 0x100269e3, // add.setf -, rb_ef, rb_ef      ; mov r3, unif
++/* [0x00001a60] */ 0xf1082dc0, 0xd4024825, // shl r0, ra2.16b, v_x_shift    ; v8subs r5rep, r0, r0
++/* [0x00001a68] */ 0x8c8021f6, 0x10025810, // add r0, r0, rb_elem_x         ; mov ra_width_height, unif
++/* [0x00001a70] */ 0x8d810bf6, 0x10025840, // sub r1, r5, rb_pitch          ; mov ra0, unif
++/* [0x00001a78] */ 0x939c117f, 0x10125815, // max r0, r0, r5                ; mov vrx_xshift, vrx_xshift_next
++/* [0x00001a80] */ 0x9209a1f6, 0x12125813, // min r0, r0, rb_max_x          ; mov vra_y_next, ra2.16a
++/* [0x00001a88] */ 0x54404077, 0xd4024862, // and r1, r0, r1                ; mul24 r2, ra_width, v_x_mul
++/* [0x00001a90] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00001a98] */ 0x8c827076, 0x10025803, // add r0, r0, r1                ; mov ra3, unif
++/* [0x00001aa0] */ 0x8c427636, 0x120254e1, // add vrx_base_next, r3, r0     ; mov r1, ra_height
++/* [0x00001aa8] */ 0x8d819eb6, 0x10025756, // sub rb_dma1, rb_dma1_base, r2 ; mov ra_wt_off_mul_l0, unif
++/* [0x00001ab0] */ 0x8c5dc3ce, 0xdc025461, // add rb_i_tmu, r1, (3-4) - PREREAD ; v8min r1, r1, ra_blk_height
++/* [0x00001ab8] */ 0x8c81f3f6, 0xd0039496, // add rb_lcount, r1, (3-4)      ; mov.ifc ra_wt_off_mul_l0, unif
++/* [0x00001ac0] */ 0x918083f6, 0xd002581c, // shl r0, r1, v_dma_h_shift     ; mov ra_dest, unif
++/* [0x00001ac8] */ 0x8c6670b6, 0x14024822, // add r0, r0, r2                ; mov r2, ra_fir_off_val
++/* [0x00001ad0] */ 0x910cf1f6, 0xdc02480a, // shl r0, r0, v_dma_wh_shift    ; mov rb10, ra3.8c
++/* [0x00001ad8] */ 0x8c59b1f6, 0x140246e1, // add ra_dma0, r0, rb_dma0_base ; mov r1, ra_wt_off_l0
++/* [0x00001ae0] */ 0x5158a3d6, 0xd2024860, // shl r1, r1, i_wt_den_p5       ; mul24 r0, r2, ra_wt_mul_l0
++/* [0x00001ae8] */ 0x8d667236, 0x14025320, // sub rb_wt_off, r1, r0         ; mov r0, ra_kmul_add
++/* [0x00001af0] */ 0x8c59cc3f, 0xd21245a5, // add ra_wt_mul_l0, ra_wt_mul_l0, r0 ; mov r5rep, -4
++/* [0x00001af8] */ 0x950e0dbf, 0x1e0252de, // mov rb11, ra3.8d              ; mov ra_link, unif
++// :1
++/* [0x00001b00] */ 0x8d151bf6, 0xb00269c4, // sub.setf -, r5, rb_i_tmu      ; mov rb4, ra5                  ; ldtmu1
++/* [0x00001b08] */ 0x8e5539bf, 0x1202888f, // shr r2, r4, vrx_xshift        ; mov.ifz  vra_base, vrx_base_next
++/* [0x00001b10] */ 0x8e4505f6, 0xd202c863, // shr r1, r2, v_v_shift         ; mov.ifnz r3, vra_y
++/* [0x00001b18] */ 0x8c4c3ff6, 0x1202a9e3, // add.setf -, rb_ef, rb_ef      ; mov.ifz  r3, vra_y_next
++/* [0x00001b20] */ 0x8c531789, 0xda124460, // add vra_y, r3, ra_k1          ; mov      r0, r1 << 15
++/* [0x00001b28] */ 0x9353f792, 0xd803c8e1, // max r3, r3, ra_k0             ; mov.ifnc r1, r2 << 1
++/* [0x00001b30] */ 0x929de7d2, 0x1003c8e0, // min r3, r3, rb_max_y          ; mov.ifnc r0, r2
++/* [0x00001b38] */ 0x545d039f, 0x12024863, // and r1, r1, ra_pmax           ; mul24 r3, r3, rb_pitch
++/* [0x00001b40] */ 0x8c5cfec6, 0x12024f20, // add vr_txs, vra_base, r3      ; v8min r0, r0, ra_pmax
++/* [0x00001b48] */ 0x4c001bf0, 0xd8025963, // add r5rep, r5, 1              ; mul24      r3, ra0.8a,       r0
++/* [0x00001b50] */ 0x4d01fef1, 0x1e0248a3, // sub r2, rb_fir_off_h, r3      ; mul24      r3, ra0.8d,       r1
++/* [0x00001b58] */ 0x4d03e4f0, 0xda0248a3, // sub r2, r2, r3                ; mul24      r3, ra0.8b << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00001b60] */ 0x40034031, 0xda0109e3, // nop                           ; mul24.ifn  r3, ra0.8b << 12, r1 << 12 @ "mul_used", 0
++/* [0x00001b68] */ 0x4c03c4f0, 0xdc0248a3, // add r2, r2, r3                ; mul24      r3, ra0.8c << 4,  r0 << 4  @ "mul_used", 0
++/* [0x00001b70] */ 0x4c032b71, 0xdc0329e3, // add.setf -, r5, r5            ; mul24.ifn  r3, ra0.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00001b78] */ 0xffffff68, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00001b80] */ 0x4c1ca4f7, 0x100248a0, // add r2, r2, r3                ; mul24 r0, ra7, rb10
++/* [0x00001b88] */ 0x550c6ffe, 0x1a024161, // mov ra5, rb6                  ; mul24 r1, rb6, ra3.8b
++/* [0x00001b90] */ 0x8f1c25f6, 0xd00241c6, // asr ra7, r2, v_bit_depth - 8  ; mov rb6, ra7
++/* [0x00001b98] */ 0x4c0c423e, 0x18024860, // add r1, r1, r0                ; mul24 r0, rb4, ra3.8a
++/* [0x00001ba0] */ 0x4d1cb237, 0x10024860, // sub r1, r1, r0                ; mul24 r0, ra7, rb11
++/* [0x00001ba8] */ 0x0d9e7200, 0x10020867, // sub r1, r1, r0
++/* [0x00001bb0] */ 0x8f5c63f6, 0xdc024863, // asr r1, r1, 6                 ; mov r3, ra_blk_height
++/* [0x00001bb8] */ 0x4d592bce, 0x120269e0, // sub.setf -, r5, rb_lcount     ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x00001bc0] */ 0x4c64c1ce, 0x14024821, // add r0, r0, rb_wt_off         ; mul24 r1, r1, ra_kmul_add
++/* [0x00001bc8] */ 0xed427073, 0x12024860, // sub r1, r0, r1                ; v8subs r0, ra_height, r3
++/* [0x00001bd0] */ 0xffffff10, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00001bd8] */ 0x0f9cb3c0, 0xd0020867, // asr r1, r1, i_wt_den_p6
++/* [0x00001be0] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00001be8] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x00001bf0] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x00001bf8] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00001c00] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x00001c08] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x00001c10] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x00001c18] */ 0xfffffec8, 0xf0f809e7, // brr -, r:1b
++/* [0x00001c20] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00001c28] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00001c30] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_c10_b
++/* [0x00001c38] */ 0x9581cff6, 0x10025c42, // mov vw_setup, rb_vpm_init     ; mov ra2, unif
++/* [0x00001c40] */ 0x8c803ff6, 0x100269e3, // add.setf -, rb_ef, rb_ef      ; mov r3, unif
++/* [0x00001c48] */ 0xf1082dc9, 0xd4024825, // shl r0, ra2.16b, v_x_shift    ; v8subs r5rep, r1, r1
++/* [0x00001c50] */ 0x8c0821f6, 0x12225813, // add r0, r0, rb_elem_x         ; mov ra_y_next, ra2.16a
++/* [0x00001c58] */ 0x8d810bf6, 0x10025850, // sub r1, r5, rb_pitch          ; mov ra_width_height, unif
++/* [0x00001c60] */ 0x93567176, 0x14125815, // max r0, r0, r5                ; mov ra_xshift, ra_xshift_next
++/* [0x00001c68] */ 0x9281a1f6, 0x10025800, // min r0, r0, rb_max_x          ; mov ra0, unif
++/* [0x00001c70] */ 0x9481c1f6, 0xd0025802, // and r0, r0, -4                ; mov ra2, unif
++/* [0x00001c78] */ 0x54404077, 0xd4024862, // and r1, r0, r1                ; mul24 r2, ra_width, v_x_mul
++/* [0x00001c80] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00001c88] */ 0x8c427076, 0x12024821, // add r0, r0, r1                ; mov r1, ra_height
++/* [0x00001c90] */ 0x8c9c163f, 0x10024680, // add ra_base_next, r3, r0      ; mov rb_xshift2, rb_xshift2_next
++/* [0x00001c98] */ 0x8d819eb6, 0x10025756, // sub rb_dma1, rb_dma1_base, r2 ; mov ra_wt_off_mul_l0, unif
++/* [0x00001ca0] */ 0x8c5dc3ce, 0xdc025461, // add rb_i_tmu, r1, (3-4) - PREREAD ; v8min r1, r1, ra_blk_height
++/* [0x00001ca8] */ 0x8c59f3f6, 0xd4139496, // add rb_lcount, r1, (3-4)      ; mov.ifc ra_wt_mul_l0, ra_wt_off_l0
++/* [0x00001cb0] */ 0x918083f6, 0xd0025803, // shl r0, r1, v_dma_h_shift     ; mov ra3, unif
++/* [0x00001cb8] */ 0x8c8270b6, 0x10024823, // add r0, r0, r2                ; mov r3, unif
++/* [0x00001cc0] */ 0x910cf1f6, 0xd2125813, // shl r0, r0, v_dma_wh_shift    ; mov ra_y2_next, ra3.16a
++/* [0x00001cc8] */ 0x8c0db1f6, 0x140246e0, // add ra_dma0, r0, rb_dma0_base ; mov r0, ra3.16b
++/* [0x00001cd0] */ 0x918021f6, 0xd0025801, // shl r0, r0, v_x_shift         ; mov ra1, unif
++/* [0x00001cd8] */ 0x8c8021f6, 0x10025803, // add r0, r0, rb_elem_x         ; mov ra3, unif
++/* [0x00001ce0] */ 0x8d810bf6, 0x10025852, // sub r1, r5, rb_pitch          ; mov ra_wt_off_mul_l1, unif
++/* [0x00001ce8] */ 0x939de17f, 0x10025809, // max r0, r0, r5                ; mov ra9, rb_max_y
++/* [0x00001cf0] */ 0x9265a1f6, 0x14024822, // min r0, r0, rb_max_x          ; mov r2, ra_kmul_add
++/* [0x00001cf8] */ 0x9481c1f6, 0xd0039812, // and r0, r0, -4                ; mov.ifc ra_wt_off_mul_l1, unif
++/* [0x00001d00] */ 0x949dc07f, 0xd0024865, // and r1, r0, r1                ; mov r5rep, -4
++/* [0x00001d08] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00001d10] */ 0x8c827076, 0x1002581c, // add r0, r0, r1                ; mov ra_dest, unif
++/* [0x00001d18] */ 0x8c667636, 0x140254e0, // add rb_base2_next, r3, r0     ; mov r0, ra_fir_off_val
++/* [0x00001d20] */ 0x4c5a7c86, 0x121245a1, // add ra_wt_mul_l0, ra_wt_mul_l0, r2 ; mul24 r1, r0, ra_wt_mul_l0
++/* [0x00001d28] */ 0x4c4a7c86, 0x121244a0, // add ra_wt_mul_l1, ra_wt_mul_l1, r2 ; mul24 r0, r0, ra_wt_mul_l1
++/* [0x00001d30] */ 0x8c4a7076, 0x14024821, // add r0, r0, r1                ; mov r1, ra_wt_off_l1
++/* [0x00001d38] */ 0x910cb3f6, 0xde02484b, // shl r1, r1, i_wt_den_p6       ; mov rb11, ra3.8d
++/* [0x00001d40] */ 0x8d827236, 0x1002531e, // sub rb_wt_off, r1, r0         ; mov ra_link, unif
++/* [0x00001d48] */ 0x95080ff6, 0x1e024287, // mov ra10, rb_xshift2          ; mov rb7,  ra2.8d
++// :1
++/* [0x00001d50] */ 0x0d9d1bc0, 0xa00229e7, // sub.setf -, r5, rb_i_tmu      ; nop                           ; ldtmu0
++/* [0x00001d58] */ 0x8e5539bf, 0x1202888f, // shr r2, r4, ra_xshift         ; mov.ifz rb_base2, rb_base2_next
++/* [0x00001d60] */ 0x8e4d05f6, 0xd0029851, // shr r1, r2, v_v_shift         ; mov.ifz ra_y_y2, ra_y_y2_next
++/* [0x00001d68] */ 0x8c683ff6, 0x1002b9d8, // add.setf -, rb_ef, rb_ef      ; mov.ifz ra_base, ra_base_next
++/* [0x00001d70] */ 0x8c441fb6, 0xd4224463, // add ra_y, 1, ra_y             ; mov r3, ra_y
++/* [0x00001d78] */ 0x93531789, 0xd80248e0, // max r3, r3, ra_k0             ; mov      r0, r1 << 15
++/* [0x00001d80] */ 0x9227f792, 0xd003c8e1, // min r3, r3, ra9               ; mov.ifnc r1, r2 << 1
++/* [0x00001d88] */ 0x559d049f, 0x100e4823, // mov.ifnc r0, r2               ; mul24 r3, r3, rb_pitch
++/* [0x00001d90] */ 0x8c618cc7, 0x10024e20, // add t0s, ra_base, r3          ; v8min r0, r0, rb_pmask
++/* [0x00001d98] */ 0x540183f0, 0x18024862, // and r1, r1, rb_pmask          ; mul24      r2, ra0.8a,       r0
++/* [0x00001da0] */ 0x4d01feb1, 0x1e0248a3, // sub r2, rb_fir_off_h, r2      ; mul24      r3, ra0.8d,       r1
++/* [0x00001da8] */ 0x4d03e4f0, 0xda0248a3, // sub r2, r2, r3                ; mul24      r3, ra0.8b << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00001db0] */ 0x40034031, 0xda0109e3, // nop                           ; mul24.ifn  r3, ra0.8b << 12, r1 << 12 @ "mul_used", 0
++/* [0x00001db8] */ 0x4c03c4f0, 0xdc0248a3, // add r2, r2, r3                ; mul24      r3, ra0.8c << 4,  r0 << 4  @ "mul_used", 0
++/* [0x00001dc0] */ 0x40032031, 0xdc0109e3, // nop                           ; mul24.ifn  r3, ra0.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00001dc8] */ 0x4c0854fe, 0xb8025804, // add r0, r2, r3                ; mul24 ra4, rb5, ra2.8a        ; ldtmu1
++/* [0x00001dd0] */ 0x8e2869bf, 0x10024885, // shr r2, r4, ra10              ; mov rb5, rb6
++/* [0x00001dd8] */ 0x8e4505f6, 0xd2024863, // shr r1, r2, v_v_shift         ; mov r3, ra_y2
++/* [0x00001de0] */ 0x8e1c21f6, 0xd00241c6, // shr ra7, r0, v_bit_depth - 8  ; mov rb6, ra7
++/* [0x00001de8] */ 0x8c531789, 0xda124460, // add ra_y2, r3, ra_k1          ; mov      r0, r1 << 15
++/* [0x00001df0] */ 0x9353f792, 0xd803c8e1, // max r3, r3, ra_k0             ; mov.ifnc r1, r2 << 1
++/* [0x00001df8] */ 0x925de7ce, 0x120248e1, // min r3, r3, rb_max_y          ; v8min r1, r1, ra_pmax
++/* [0x00001e00] */ 0x559d049f, 0x100e4823, // mov.ifnc r0, r2               ; mul24 r3, r3, rb_pitch
++/* [0x00001e08] */ 0x8c5cfec6, 0x12024f20, // add t1s, rb_base2, r3         ; v8min r0, r0, ra_pmax
++/* [0x00001e10] */ 0x4c041bf0, 0xd8025962, // add r5rep, r5, 1              ; mul24      r2, ra1.8a,       r0
++/* [0x00001e18] */ 0x4d05feb1, 0x1e0248a3, // sub r2, rb_fir_off_h, r2      ; mul24      r3, ra1.8d,       r1
++/* [0x00001e20] */ 0x4d07e4f0, 0xda0248a3, // sub r2, r2, r3                ; mul24      r3, ra1.8b << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00001e28] */ 0x40074031, 0xda0109e3, // nop                           ; mul24.ifn  r3, ra1.8b << 12, r1 << 12 @ "mul_used", 0
++/* [0x00001e30] */ 0x4c07c6b0, 0xdc0248a3, // add r2, r3, r2                ; mul24      r3, ra1.8c << 4,  r0 << 4  @ "mul_used", 0
++/* [0x00001e38] */ 0x4c072b71, 0xdc0329e3, // add.setf -, r5, r5            ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00001e40] */ 0xfffffef0, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00001e48] */ 0x4c0c94fe, 0x180248a0, // add r2, r2, r3                ; mul24 r0, rb9,  ra3.8a
++/* [0x00001e50] */ 0x550caffe, 0x1a025261, // mov rb9, rb10                 ; mul24 r1, rb10, ra3.8b
++/* [0x00001e58] */ 0x8e2c25f6, 0xd00242ca, // shr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++/* [0x00001e60] */ 0x4d08523e, 0x1a0248a1, // sub r2, r1, r0                ; mul24 r1, rb5,  ra2.8b
++/* [0x00001e68] */ 0x8d112bf6, 0x100269e0, // sub.setf -, r5, rb_lcount     ; mov r0, ra4
++/* [0x00001e70] */ 0x4d08623e, 0x1c024860, // sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++/* [0x00001e78] */ 0x4c1c7237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra7,  rb7
++/* [0x00001e80] */ 0x4d0ca23e, 0x1c024860, // sub r1, r1, r0                ; mul24 r0, rb10, ra3.8c
++/* [0x00001e88] */ 0x4c2cb437, 0x100248a0, // add r2, r2, r0                ; mul24 r0, ra11, rb11
++/* [0x00001e90] */ 0x0d9e7400, 0x100208a7, // sub r2, r2, r0
++/* [0x00001e98] */ 0x0e9c63c0, 0xd0020867, // shr r1, r1, 6
++/* [0x00001ea0] */ 0x4e5865ce, 0xd20248a0, // shr r2, r2, 6                 ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x00001ea8] */ 0x4c4a7456, 0x120248a1, // add r2, r2, r1                ; mul24 r1, r2, ra_wt_mul_l1
++/* [0x00001eb0] */ 0x4c667216, 0x14024862, // add r1, r1, r0                ; mul24 r2, r2, ra_kmul_add
++/* [0x00001eb8] */ 0x8d5e72b6, 0x1c024863, // sub r1, r1, r2                ; mov r3, ra_blk_height
++/* [0x00001ec0] */ 0xec40c3f3, 0x12024860, // add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3
++/* [0x00001ec8] */ 0xfffffe68, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00001ed0] */ 0x0f667380, 0x18020867, // asr r1, r1, ra_wt_den_p7
++/* [0x00001ed8] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00001ee0] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x00001ee8] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x00001ef0] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00001ef8] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x00001f00] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x00001f08] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x00001f10] */ 0xfffffe20, 0xf0f809e7, // brr -, r:1b
++/* [0x00001f18] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00001f20] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00001f28] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_sync10_q0
++/* [0x00001f30] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00001f38] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00001f40] */ 0x00000010, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00001f48] */ 0x00000010, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00001f50] */ 0x00000010, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00001f58] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00001f60] */ 0x0000001c, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00001f68] */ 0x00000001, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00001f70] */ 0x0000000d, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q1
++/* [0x00001f78] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00001f80] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00001f88] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00001f90] */ 0x00000000, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00001f98] */ 0x00000011, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00001fa0] */ 0x00000002, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q2
++/* [0x00001fa8] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00001fb0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00001fb8] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00001fc0] */ 0x00000000, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00001fc8] */ 0x00000012, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00001fd0] */ 0x00000003, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q3
++/* [0x00001fd8] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00001fe0] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00001fe8] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00001ff0] */ 0x00000000, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00001ff8] */ 0x00000013, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002000] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_sync10_q4
++/* [0x00002008] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00002010] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00002018] */ 0x00000014, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002020] */ 0x00000014, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002028] */ 0x00000014, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002030] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00002038] */ 0x0000001d, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002040] */ 0x00000005, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00002048] */ 0x0000000e, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q5
++/* [0x00002050] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00002058] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00002060] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00002068] */ 0x00000004, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00002070] */ 0x00000015, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002078] */ 0x00000006, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q6
++/* [0x00002080] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00002088] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00002090] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00002098] */ 0x00000004, 0xe80009e7, // mov  dst, srel(i)
++/* [0x000020a0] */ 0x00000016, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x000020a8] */ 0x00000007, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q7
++/* [0x000020b0] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x000020b8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x000020c0] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x000020c8] */ 0x00000004, 0xe80009e7, // mov  dst, srel(i)
++/* [0x000020d0] */ 0x00000017, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x000020d8] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_sync10_q8
++/* [0x000020e0] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x000020e8] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x000020f0] */ 0x00000018, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x000020f8] */ 0x00000018, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002100] */ 0x00000018, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002108] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00002110] */ 0x0000001e, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002118] */ 0x00000009, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00002120] */ 0x0000000c, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q9
++/* [0x00002128] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00002130] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00002138] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00002140] */ 0x00000008, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00002148] */ 0x00000019, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002150] */ 0x0000000a, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q10
++/* [0x00002158] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00002160] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00002168] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00002170] */ 0x00000008, 0xe80009e7, // mov  dst, srel(i)
++/* [0x00002178] */ 0x0000001a, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x00002180] */ 0x0000000b, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_sync10_q11
++/* [0x00002188] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00002190] */ 0x159f2fc0, 0x100009e7, // mov -, vw_wait
++/* [0x00002198] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x000021a0] */ 0x00000008, 0xe80009e7, // mov  dst, srel(i)
++/* [0x000021a8] */ 0x0000001b, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x000021b0] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_exit_c10_q0
++// ::mc_exit_y10_q0
++/* [0x000021b8] */ 0x00000002, 0xe00228e7, // mov.setf r3, PREREAD - 1
++// :1
++/* [0x000021c0] */ 0xffffffe0, 0xf03809e7, // brr.anynz -, r:1b
++/* [0x000021c8] */ 0x009e7000, 0xa00009e7, // nop                   ; nop           ; ldtmu0
++/* [0x000021d0] */ 0x009e7000, 0xb00009e7, // nop                   ; nop           ; ldtmu1
++/* [0x000021d8] */ 0x0d9c17c0, 0xd00228e7, // sub.setf r3, r3, 1
++/* [0x000021e0] */ 0x159f2fc0, 0x100009e7, // mov  -, vw_wait
++/* [0x000021e8] */ 0x0000001c, 0xe80009e7, // mov  dst, sacq(i)
++/* [0x000021f0] */ 0x009e7000, 0x300009e7, // nop                   ; nop           ; thrend
++/* [0x000021f8] */ 0x00000001, 0xe00209a7, // mov interrupt, 1
++/* [0x00002200] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_exit_c10_qn
++// ::mc_exit_y10_qn
++/* [0x00002208] */ 0x00000002, 0xe00228e7, // mov.setf r3, PREREAD - 1
++// :1
++/* [0x00002210] */ 0xffffffe0, 0xf03809e7, // brr.anynz -, r:1b
++/* [0x00002218] */ 0x009e7000, 0xa00009e7, // nop                   ; nop           ; ldtmu0
++/* [0x00002220] */ 0x009e7000, 0xb00009e7, // nop                   ; nop           ; ldtmu1
++/* [0x00002228] */ 0x0d9c17c0, 0xd00228e7, // sub.setf r3, r3, 1
++/* [0x00002230] */ 0x159f2fc0, 0x100009e7, // mov  -, vw_wait
++/* [0x00002238] */ 0x009e7000, 0x300009e7, // nop                   ; nop           ; thrend
++/* [0x00002240] */ 0x009e7000, 0x100009e7, // nop
++/* [0x00002248] */ 0x009e7000, 0x100009e7, // nop
++// ::mc_setup_y10_q0
++/* [0x00002250] */ 0x0000000c, 0xe80009e7, // mov  dst, srel(i)
++// ::mc_setup_y10_qn
++/* [0x00002258] */ 0x95801ff6, 0xd0025900, // mov tmurs, 1                  ; mov ra0, unif
++/* [0x00002260] */ 0x15827d80, 0x10020267, // mov ra9, unif
++/* [0x00002268] */ 0x15827d80, 0x10020067, // mov ra1, unif
++/* [0x00002270] */ 0xaaaaff00, 0xe6020827, // mov r0, [0,2,0,2,0,2,0,2,1,3,1,3,1,3,1,3]
++/* [0x00002278] */ 0x9181e1f6, 0xd00250cb, // shl rb_ef, r0, i_shift30      ; mov ra11, unif
++/* [0x00002280] */ 0xff800100, 0xe0020527, // mov ra_kff800100, 0xff800100
++/* [0x00002288] */ 0x0000ffff, 0xe0021627, // mov rb_pmask, v_pmask
++/* [0x00002290] */ 0x000803ff, 0xe00205e7, // mov ra_blk_height_pmax, ((1 << v_bit_depth) - 1) | (v_blk_height << 16)
++/* [0x00002298] */ 0x00010000, 0xe00217e7, // mov rb_fir_off_h, (FIR_OFFSET << (v_bit_depth - 8))
++/* [0x000022a0] */ 0x4000000c, 0xe0020667, // mov ra_fir_off_val_wt_den_p7, (FIR_OFFSET << 16) | (DENOM + 15 - v_bit_depth)
++/* [0x000022a8] */ 0x050b0a00, 0xe0021567, // mov rb_y_coeffs_2, 0x050b0a00
++/* [0x000022b0] */ 0x11283a40, 0xe00215a7, // mov rb_y_coeffs_3, 0x11283a40
++/* [0x000022b8] */ 0x0a0b0500, 0xe00215e7, // mov rb_y_coeffs_5, 0x0a0b0500
++/* [0x000022c0] */ 0x15827d80, 0x100200e7, // mov ra3, unif
++/* [0x000022c8] */ 0x95803ff6, 0x10024754, // mov ra_ef, rb_ef              ; mov rb_xpitch, unif
++/* [0x000022d0] */ 0x0d0c1dc0, 0xd4020827, // sub r0, ra3.16b, 1
++/* [0x000022d8] */ 0x119c11c0, 0xd00216a7, // shl rb_max_x, r0, v_x_shift
++/* [0x000022e0] */ 0x0d0c1dc0, 0xd20217a7, // sub rb_max_y, ra3.16a, 1
++/* [0x000022e8] */ 0x959a0dbf, 0x100248d0, // mov r3, elem_num              ; mov rb_pitch, unif
++/* [0x000022f0] */ 0xc0000000, 0xe0020867, // mov r1, vdw_setup_1(0)
++/* [0x000022f8] */ 0x159d03c0, 0x10021667, // or  rb_dma1_base, r1, rb_pitch
++/* [0x00002300] */ 0x0c027cc0, 0x14020827, // add r0, ra0.16b, r3
++/* [0x00002308] */ 0x119c11c0, 0xd0020827, // shl r0, r0, v_x_shift
++/* [0x00002310] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0
++/* [0x00002318] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x00002320] */ 0x119c31c0, 0xd0220567, // shl ra_xshift_next, r0, 3
++/* [0x00002328] */ 0xf49dc1d2, 0xd0024822, // and r0, r0, -4                ; v8subs r2, r2, r2
++/* [0x00002330] */ 0x0d9d05c0, 0x100208a7, // sub r2, r2, rb_pitch
++/* [0x00002338] */ 0x149e7080, 0x10020867, // and r1, r0, r2
++/* [0x00002340] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00002348] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1
++/* [0x00002350] */ 0x0c267c00, 0x10020627, // add ra_base, ra9, r0
++/* [0x00002358] */ 0x0c067cc0, 0x14020827, // add r0, ra1.16b, r3
++/* [0x00002360] */ 0x119c11c0, 0xd0020827, // shl r0, r0, v_x_shift
++/* [0x00002368] */ 0x139c01c0, 0xd0020827, // max r0, r0, 0
++/* [0x00002370] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x00002378] */ 0x119c31c0, 0xd0021067, // shl rb_xshift2_next, r0, 3
++/* [0x00002380] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x00002388] */ 0x149e7080, 0x10020867, // and r1, r0, r2
++/* [0x00002390] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00002398] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1
++/* [0x000023a0] */ 0x0c2e7c00, 0x100213e7, // add rb_base2, ra11, r0
++/* [0x000023a8] */ 0x80027036, 0x120049e0, // nop                           ; mov r0, ra0.16a
++/* [0x000023b0] */ 0x95043ff6, 0xd20248e2, // mov r3, PREREAD               ; mov r2, ra1.16a
++// :1
++/* [0x000023b8] */ 0x0d9c17c0, 0xd00228e7, // sub.setf r3, r3, 1
++/* [0x000023c0] */ 0x139c01c0, 0xd0020867, // max r1, r0, 0
++/* [0x000023c8] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_max_y
++/* [0x000023d0] */ 0x4c51018f, 0x1a024821, // add r0, r0, ra_k1             ; mul24 r1, r1, rb_pitch
++/* [0x000023d8] */ 0x8c627c40, 0x10225e11, // add t0s, ra_base, r1          ; mov ra_y, r0
++/* [0x000023e0] */ 0x139c05c0, 0xd0020867, // max r1, r2, 0
++/* [0x000023e8] */ 0xffffffb0, 0xf03809e7, // brr.anynz -, r:1b
++/* [0x000023f0] */ 0x129de3c0, 0x10020867, // min r1, r1, rb_max_y
++/* [0x000023f8] */ 0x4c51058f, 0x1a0248a1, // add r2, r2, ra_k1             ; mul24 r1, r1, rb_pitch
++/* [0x00002400] */ 0x8c9cfe52, 0x10125f11, // add t1s, rb_base2, r1         ; mov ra_y2, r2
++/* [0x00002408] */ 0x159e6fc0, 0x100208a7, // mov r2, qpu_num
++/* [0x00002410] */ 0x0f9c15c0, 0xd0020867, // asr r1, r2, 1
++/* [0x00002418] */ 0x119c43c0, 0xd0020867, // shl r1, r1, 4
++/* [0x00002420] */ 0x149c15c0, 0xd0020827, // and r0, r2, 1
++/* [0x00002428] */ 0x159e7040, 0x10020827, // or  r0, r0, r1
++/* [0x00002430] */ 0x00002900, 0xe0020867, // mov r1, vpm_setup(0, 2, h16p(0, 0))
++/* [0x00002438] */ 0x0c9e7040, 0x10021727, // add r_vpm, r0, r1
++/* [0x00002440] */ 0x80004002, 0xe0020867, // mov r1, vdw_setup_0(0, 0, dma_h16p(0,0,0))
++/* [0x00002448] */ 0x119c61c0, 0xd0020827, // shl r0, r0, 6
++/* [0x00002450] */ 0x0c9e7040, 0x100216e7, // add r_dma, r0, r1
++/* [0x00002458] */ 0x15827d80, 0x100207a7, // mov ra_link, unif
++/* [0x00002460] */ 0x00000000, 0xe0024208, // mov ra8,  0                   ; mov rb8,  0
++/* [0x00002468] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00002470] */ 0x00000000, 0xe0024249, // mov ra9,  0                   ; mov rb9,  0
++/* [0x00002478] */ 0x00000000, 0xe002428a, // mov ra10, 0                   ; mov rb10, 0
++/* [0x00002480] */ 0x00000000, 0xe00242cb, // mov ra11, 0                   ; mov rb11, 0
++// :per_block_setup_10
++/* [0x00002488] */ 0x119c11c0, 0xd0020827, // shl r0, r0, v_x_shift
++/* [0x00002490] */ 0x93567176, 0x14125815, // max r0, r0, r5                ; mov ra_xshift, ra_xshift_next
++/* [0x00002498] */ 0x129da1c0, 0x10020827, // min r0, r0, rb_max_x
++/* [0x000024a0] */ 0x119c31c0, 0xd0220567, // shl ra_xshift_next, r0, 3
++/* [0x000024a8] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x000024b0] */ 0x8d810bf6, 0x1002589a, // sub r2, r5, rb_pitch          ; mov ra_base_next, unif
++/* [0x000024b8] */ 0x940270b6, 0x12225853, // and r1, r0, r2                ; mov ra_y_next, ra0.16a
++/* [0x000024c0] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x000024c8] */ 0x8c827076, 0x10025801, // add r0, r0, r1                ; mov ra1, unif
++/* [0x000024d0] */ 0x0c6a7c00, 0x100206a7, // add ra_base_next, ra_base_next, r0
++/* [0x000024d8] */ 0x0c067cc0, 0x14020827, // add r0, ra1.16b, r3
++/* [0x000024e0] */ 0x119c11c0, 0xd0020827, // shl r0, r0, v_x_shift
++/* [0x000024e8] */ 0x93067176, 0x12125813, // max r0, r0, r5                ; mov ra_y2_next, ra1.16a
++/* [0x000024f0] */ 0x9281a1f6, 0x10024813, // min r0, r0, rb_max_x          ; mov rb_base2_next, unif
++/* [0x000024f8] */ 0x119c31c0, 0xd0021067, // shl rb_xshift2_next, r0, 3
++/* [0x00002500] */ 0x9481c1f6, 0xd0025810, // and r0, r0, -4                ; mov ra_width_height, unif
++/* [0x00002508] */ 0x949dc0bf, 0x10024871, // and r1, r0, r2                ; mov vw_setup, rb_vpm_init
++/* [0x00002510] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00002518] */ 0x4c402077, 0xd4024821, // add r0, r0, r1                ; mul24 r1, ra_width, v_x_mul
++/* [0x00002520] */ 0x0c9d3e00, 0x100214e7, // add rb_base2_next, rb_base2_next, r0
++/* [0x00002528] */ 0x8d419e76, 0x12025760, // sub rb_dma1, rb_dma1_base, r1 ; mov r0, ra_height
++/* [0x00002530] */ 0x8c5dc1c6, 0xdc025460, // add rb_i_tmu, r0, (7-8) - PREREAD ; v8min r0, r0, ra_blk_height
++/* [0x00002538] */ 0x0c9df1c0, 0xd00214a7, // add rb_lcount, r0, (7-8)
++/* [0x00002540] */ 0x916481f6, 0xd4024823, // shl r0, r0, v_dma_h_shift     ; mov r3, ra_kmul_add
++/* [0x00002548] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1
++/* [0x00002550] */ 0x9164f1f6, 0xd4024822, // shl r0, r0, v_dma_wh_shift    ; mov r2, ra_fir_off_val
++/* [0x00002558] */ 0x8c81b1f6, 0x100246e0, // add ra_dma0, r0, rb_dma0_base ; mov r0, unif
++/* [0x00002560] */ 0x918101f6, 0xd00a5816, // shl.ifnn r0, r0, i_shift16    ; mov ra_wt_off_mul_l0, unif
++/* [0x00002568] */ 0x915031f6, 0xde024205, // shl ra8, r0, 3                ; mov rb5, ra_k255
++/* [0x00002570] */ 0x01040400, 0xe0020867, // mov r1, 0x01040400
++/* [0x00002578] */ 0x10227380, 0x1e5200a7, // ror ra2.8b, r1, ra8.8d
++/* [0x00002580] */ 0x10227380, 0x1c520027, // ror ra0.8b, r1, ra8.8c
++/* [0x00002588] */ 0x10215f80, 0x1e6200a7, // ror ra2.8c, rb_y_coeffs_2, ra8.8d
++/* [0x00002590] */ 0x10215f80, 0x1c620027, // ror ra0.8c, rb_y_coeffs_2, ra8.8c
++/* [0x00002598] */ 0x00010100, 0xe0020867, // mov r1,0x00010100
++/* [0x000025a0] */ 0x902203bf, 0x1e025812, // ror r0, r1, ra8.8d            ; mov ra_wt_off_mul_l1, unif
++/* [0x000025a8] */ 0x90205387, 0x1c424004, // ror ra0.8a, r1, ra8.8c        ; v8min rb4, r0, rb5
++/* [0x000025b0] */ 0x914883f6, 0xd0031856, // shl r1, r1, 8                 ; mov.ifn ra_wt_off_mul_l0, ra_wt_off_mul_l1
++/* [0x000025b8] */ 0x902203bf, 0x1e02581c, // ror r0, r1, ra8.8d            ; mov ra_dest, unif
++/* [0x000025c0] */ 0x90205387, 0x1c72404b, // ror ra1.8d, r1, ra8.8c        ; v8min rb11, r0, rb5
++/* [0x000025c8] */ 0x10216f80, 0x1e7200a7, // ror ra2.8d, rb_y_coeffs_3, ra8.8d
++/* [0x000025d0] */ 0x10216f80, 0x1c720027, // ror ra0.8d, rb_y_coeffs_3, ra8.8c
++/* [0x000025d8] */ 0x10217f80, 0x1e5200e7, // ror ra3.8b, rb_y_coeffs_5, ra8.8d
++/* [0x000025e0] */ 0x10217f80, 0x1c520067, // ror ra1.8b, rb_y_coeffs_5, ra8.8c
++/* [0x000025e8] */ 0x04040100, 0xe0020867, // mov r1,0x04040100
++/* [0x000025f0] */ 0x10227380, 0x1e6200e7, // ror ra3.8c, r1, ra8.8d
++/* [0x000025f8] */ 0x902183bf, 0xdc624065, // ror ra1.8c, r1, ra8.8c        ; mov r5rep, -8
++/* [0x00002600] */ 0x00000000, 0xf0f7c9e7, // bra -, ra_link
++/* [0x00002608] */ 0x3a281100, 0xe0020867, // mov r1,0x3a281100
++/* [0x00002610] */ 0x902203bf, 0x1e02581e, // ror r0, r1, ra8.8d            ; mov ra_link, unif
++/* [0x00002618] */ 0x90205387, 0x1c424048, // ror ra1.8a, r1, ra8.8c        ; v8min rb8, r0, rb5
++// ::mc_filter_y10_pxx
++/* [0x00002620] */ 0xfffffe48, 0xf0f807a7, // brr ra_link, r:per_block_setup_10
++/* [0x00002628] */ 0x959a0ff6, 0x10024023, // mov ra0, unif                 ; mov r3, elem_num
++/* [0x00002630] */ 0xec9c3fd2, 0x100269e5, // add.setf -, rb_ef, rb_ef      ; v8subs r5rep, r2, r2
++/* [0x00002638] */ 0x8c001cff, 0x14024800, // add r0, ra0.16b, r3           ; mov rb_xshift2, rb_xshift2_next
++/* [0x00002640] */ 0x1158adc0, 0xd4020867, // shl r1, ra_wt_off_l0, i_wt_den_p5
++/* [0x00002648] */ 0x4c5a7cd6, 0x121245a0, // add ra_wt_mul_l0, ra_wt_mul_l0, r3 ; mul24 r0, r2, ra_wt_mul_l0
++/* [0x00002650] */ 0x8d9c423f, 0x1042531d, // sub rb_wt_off, r1, r0         ; mov ra_ef.8a, rb4
++// :1
++/* [0x00002658] */ 0x4c745dbe, 0x100279c4, // add.setf -, ra_ef, ra_ef      ; mul24 ra4, rb5, ra_ef
++/* [0x00002660] */ 0x93440dff, 0xd40248a1, // max r2, ra_y, 0               ; mov r1, 0
++/* [0x00002668] */ 0x9251e5f6, 0x1a0248a3, // min r2, r2, rb_max_y          ; mov r3, ra_k1
++/* [0x00002670] */ 0x4c450cd7, 0xa4224462, // add ra_y, ra_y, r3            ; mul24 r2, r2, rb_pitch        ; ldtmu0
++/* [0x00002678] */ 0x8c606cbf, 0x10024e05, // add t0s, ra_base, r2          ; mov rb5,  rb6
++/* [0x00002680] */ 0x8e5479bf, 0x12024806, // shr r0, r4, ra_xshift         ; mov rb6,  rb7
++/* [0x00002688] */ 0x93458c47, 0xb20248a0, // max r2, ra_y2, r1             ; v8min r0, r0, rb_pmask        ; ldtmu1
++/* [0x00002690] */ 0x8e2009f6, 0x10024847, // shr r1, r4, rb_xshift2        ; mov rb7, ra8
++/* [0x00002698] */ 0x925de5ce, 0x120248a1, // min r2, r2, rb_max_y          ; v8min r1, r1, ra_pmax
++/* [0x000026a0] */ 0x4c450cd7, 0x12124462, // add ra_y2, ra_y2, r3          ; mul24 r2, r2, rb_pitch
++/* [0x000026a8] */ 0x8c24feb6, 0x10025f08, // add t1s, rb_base2, r2         ; mov ra8,  ra9
++/* [0x000026b0] */ 0x4c038af1, 0xd8025962, // add r5rep, r5, r3     ; mul24      r2, ra0.8a << 8,  r1 << 8  @ "mul_used", 0
++/* [0x000026b8] */ 0x5501fff0, 0x180348e2, // mov r3, rb_fir_off_h  ; mul24.ifnn r2, ra0.8a,       r0
++/* [0x000026c0] */ 0x4d03f6b0, 0xda0248a3, // sub r2, r3, r2        ; mul24      r3, ra0.8b << 1,  r0 << 1  @ "mul_used", 0
++/* [0x000026c8] */ 0x40037031, 0xda0109e3, // nop                   ; mul24.ifn  r3, ra0.8b << 9,  r1 << 9  @ "mul_used", 0
++/* [0x000026d0] */ 0x4c03e4f0, 0xdc0248a3, // add r2, r2, r3        ; mul24      r3, ra0.8c << 2,  r0 << 2  @ "mul_used", 0
++/* [0x000026d8] */ 0x40036031, 0xdc0109e3, // nop                   ; mul24.ifn  r3, ra0.8c << 10, r1 << 10 @ "mul_used", 0
++/* [0x000026e0] */ 0x4d03d4f0, 0xde0248a3, // sub r2, r2, r3        ; mul24      r3, ra0.8d << 3,  r0 << 3  @ "mul_used", 0
++/* [0x000026e8] */ 0x40035031, 0xde0109e3, // nop                   ; mul24.ifn  r3, ra0.8d << 11, r1 << 11 @ "mul_used", 0
++/* [0x000026f0] */ 0x4c07c4f0, 0xd80248a3, // add r2, r2, r3        ; mul24      r3, ra1.8a << 4,  r0 << 4  @ "mul_used", 0
++/* [0x000026f8] */ 0x40074031, 0xd80109e3, // nop                   ; mul24.ifn  r3, ra1.8a << 12, r1 << 12 @ "mul_used", 0
++/* [0x00002700] */ 0x4c07b4f0, 0xda0248a3, // add r2, r2, r3        ; mul24      r3, ra1.8b << 5,  r0 << 5  @ "mul_used", 0
++/* [0x00002708] */ 0x40073031, 0xda0109e3, // nop                   ; mul24.ifn  r3, ra1.8b << 13, r1 << 13 @ "mul_used", 0
++/* [0x00002710] */ 0x4d07a4f0, 0xdc0248a3, // sub r2, r2, r3        ; mul24      r3, ra1.8c << 6,  r0 << 6  @ "mul_used", 0
++/* [0x00002718] */ 0x40072031, 0xdc0109e3, // nop                   ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00002720] */ 0x4c0794f0, 0xde0248a3, // add r2, r2, r3        ; mul24      r3, ra1.8d << 7,  r0 << 7  @ "mul_used", 0
++/* [0x00002728] */ 0x4c071b71, 0xde0329e3, // add.setf -, r5, r5    ; mul24.ifn  r3, ra1.8d << 15, r1 << 15 @ "mul_used", 0
++/* [0x00002730] */ 0xffffff08, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00002738] */ 0x4d0854fe, 0x1a0248a1, // sub r2, r2, r3                ; mul24 r1, rb5,  ra2.8b
++/* [0x00002740] */ 0x550caffe, 0x1a024260, // mov ra9,  rb10                ; mul24 r0, rb10, ra3.8b
++/* [0x00002748] */ 0x8f2c25f6, 0xd00242ca, // asr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++/* [0x00002750] */ 0x4d08623e, 0x1c024860, // sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++/* [0x00002758] */ 0x4d08723e, 0x1e024860, // sub r1, r1, r0                ; mul24 r0, rb7,  ra2.8d
++/* [0x00002760] */ 0x4c208237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra8,  rb8
++/* [0x00002768] */ 0x4c0ca23e, 0x1c024860, // add r1, r1, r0                ; mul24 r0, rb10, ra3.8c
++/* [0x00002770] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra11, rb11
++/* [0x00002778] */ 0x8d5d1bf6, 0x1c0269e3, // sub.setf -, r5, rb_i_tmu      ; mov r3, ra_blk_height
++/* [0x00002780] */ 0x8d1133bf, 0x1002884f, // sub r1, r1, ra4               ; mov.ifz rb_base2, rb_base2_next
++/* [0x00002788] */ 0x8d6a7236, 0x10029858, // sub r1, r1, r0                ; mov.ifz ra_base, ra_base_next
++/* [0x00002790] */ 0x8f4c63f6, 0xd0029851, // asr r1, r1, 6                 ; mov.ifz ra_y_y2, ra_y_y2_next
++/* [0x00002798] */ 0x4d592bce, 0x120269e0, // sub.setf -, r5, rb_lcount     ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x000027a0] */ 0x4c64c1ce, 0x14024821, // add r0, r0, rb_wt_off         ; mul24 r1, r1, ra_kmul_add
++/* [0x000027a8] */ 0xed427073, 0x12024860, // sub r1, r0, r1                ; v8subs r0, ra_height, r3
++/* [0x000027b0] */ 0xfffffe88, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x000027b8] */ 0x0f9cb3c0, 0xd0020867, // asr r1, r1, i_wt_den_p6
++/* [0x000027c0] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x000027c8] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x000027d0] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x000027d8] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x000027e0] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x000027e8] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x000027f0] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x000027f8] */ 0xfffffe40, 0xf0f809e7, // brr -, r:1b
++/* [0x00002800] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00002808] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00002810] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_y10_p00
++/* [0x00002818] */ 0x959a0ff6, 0x10024020, // mov ra0, unif                 ; mov r0, elem_num
++/* [0x00002820] */ 0xf5567dad, 0x14124565, // mov ra_xshift, ra_xshift_next ; v8subs r5rep, r5, r5
++/* [0x00002828] */ 0x8c020c3f, 0x1402581a, // add r0, ra0.16b, r0           ; mov ra_base_next, unif
++/* [0x00002830] */ 0x119c11c0, 0xd0020827, // shl r0, r0, v_x_shift
++/* [0x00002838] */ 0x93027176, 0x12225813, // max r0, r0, r5                ; mov ra_y_next, ra0.16a
++/* [0x00002840] */ 0x9281a1f6, 0x10025810, // min r0, r0, rb_max_x          ; mov ra_width_height, unif
++/* [0x00002848] */ 0x119c31c0, 0xd0220567, // shl ra_xshift_next, r0, 3
++/* [0x00002850] */ 0x149dc1c0, 0xd0020827, // and r0, r0, -4
++/* [0x00002858] */ 0x8d810bf6, 0x10025896, // sub r2, r5, rb_pitch          ; mov ra_wt_off_mul_l0, unif
++/* [0x00002860] */ 0x149e7080, 0x10020867, // and r1, r0, r2
++/* [0x00002868] */ 0x569d404f, 0x10024821, // xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++/* [0x00002870] */ 0x8c827076, 0x1002581c, // add r0, r0, r1                ; mov ra_dest, unif
++/* [0x00002878] */ 0x8c69cc3f, 0x100246b1, // add ra_base_next, ra_base_next, r0 ; mov vw_setup, rb_vpm_init
++/* [0x00002880] */ 0x11401dc0, 0xd4020867, // shl r1, ra_width, v_x_shift
++/* [0x00002888] */ 0x8d419e76, 0x12025760, // sub rb_dma1, rb_dma1_base, r1 ; mov r0, ra_height
++/* [0x00002890] */ 0x8d5c31c6, 0xdc025460, // sub rb_i_tmu, r0, PREREAD     ; v8min r0, r0, ra_blk_height
++/* [0x00002898] */ 0x919c81c0, 0xd0024812, // shl r0, r0, v_dma_h_shift     ; mov rb_lcount, r0
++/* [0x000028a0] */ 0x0c9e7040, 0x10020827, // add r0, r0, r1
++/* [0x000028a8] */ 0x1158edc0, 0xd4021327, // shl rb_wt_off, ra_wt_off_l0, DENOM + 7
++/* [0x000028b0] */ 0x9180f1f6, 0xd002581e, // shl r0, r0, v_dma_wh_shift    ; mov ra_link, unif
++/* [0x000028b8] */ 0x0c9db1c0, 0x100206e7, // add ra_dma0, r0, rb_dma0_base
++// :1
++/* [0x000028c0] */ 0xcd511bee, 0x1a0269e5, // sub.setf -, r5, rb_i_tmu      ; v8adds r5rep, r5, ra_k1
++/* [0x000028c8] */ 0x804e7036, 0xa42099d1, // nop                           ; mov.ifz ra_y, ra_y_next       ; ldtmu0
++/* [0x000028d0] */ 0x8e5509bf, 0x12024823, // shr r0, r4, ra_xshift         ; mov r3, rb_pitch
++/* [0x000028d8] */ 0x13440dc0, 0xd40208a7, // max r2, ra_y, 0
++/* [0x000028e0] */ 0x9269e5f6, 0x10029898, // min r2, r2, rb_max_y          ; mov.ifz ra_base, ra_base_next
++/* [0x000028e8] */ 0x4c441dd3, 0xd4224462, // add ra_y, ra_y, 1             ; mul24 r2, r2, r3
++/* [0x000028f0] */ 0x8c618c87, 0x10024e20, // add t0s, ra_base, r2          ; v8min r0, r0, rb_pmask
++/* [0x000028f8] */ 0x4d592bc6, 0x120269e1, // sub.setf -, r5, rb_lcount     ; mul24 r1, r0, ra_wt_mul_l0
++/* [0x00002900] */ 0x915c83f6, 0xdc024863, // shl r1, r1, 8                 ; mov r3, ra_blk_height
++/* [0x00002908] */ 0xec40c3f3, 0x12024860, // add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3
++/* [0x00002910] */ 0xffffff90, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00002918] */ 0x0f9cf3c0, 0xd0020867, // asr r1, r1, DENOM + 8
++/* [0x00002920] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00002928] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x00002930] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0 ; mov vw_setup, ra_dma0
++/* [0x00002938] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00002940] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3        ; mov vw_setup, rb_dma1
++/* [0x00002948] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3        ; mov vw_addr, ra_dest
++/* [0x00002950] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x00002958] */ 0xffffff48, 0xf0f809e7, // brr -, r:1b
++/* [0x00002960] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00002968] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00002970] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_y10_bxx
++/* [0x00002978] */ 0xfffffaf0, 0xf0f807a7, // brr ra_link, r:per_block_setup_10
++/* [0x00002980] */ 0x959a0ff6, 0x10024023, // mov ra0, unif                 ; mov r3, elem_num
++/* [0x00002988] */ 0xec9c3fd2, 0x100269e5, // add.setf -, rb_ef, rb_ef      ; v8subs r5rep, r2, r2
++/* [0x00002990] */ 0x8c001cff, 0x14024800, // add r0, ra0.16b, r3           ; mov rb_xshift2, rb_xshift2_next
++/* [0x00002998] */ 0x1158bdc0, 0xd4020867, // shl r1, ra_wt_off_l0, i_wt_den_p6
++/* [0x000029a0] */ 0x4c5a7cd6, 0x121245a0, // add ra_wt_mul_l0, ra_wt_mul_l0, r3 ; mul24 r0, r2, ra_wt_mul_l0
++/* [0x000029a8] */ 0x4d4a7216, 0x12024860, // sub r1, r1, r0                ; mul24 r0, r2, ra_wt_mul_l1
++/* [0x000029b0] */ 0x8d9c423f, 0x1042531d, // sub rb_wt_off, r1, r0         ; mov ra_ef.8a, rb4
++// :1
++/* [0x000029b8] */ 0x4c745dbe, 0x100279c4, // add.setf -, ra_ef, ra_ef      ; mul24 ra4, rb5, ra_ef
++/* [0x000029c0] */ 0x93440dff, 0xd40248a1, // max r2, ra_y, 0               ; mov r1, 0
++/* [0x000029c8] */ 0x9251e5f6, 0x1a0248a3, // min r2, r2, rb_max_y          ; mov r3, ra_k1
++/* [0x000029d0] */ 0x4c450cd7, 0xa4224462, // add ra_y, ra_y, r3            ; mul24 r2, r2, rb_pitch        ; ldtmu0
++/* [0x000029d8] */ 0x8c606cbf, 0x10024e05, // add t0s, ra_base, r2          ; mov rb5,  rb6
++/* [0x000029e0] */ 0x8e5479bf, 0x12024806, // shr r0, r4, ra_xshift         ; mov rb6,  rb7
++/* [0x000029e8] */ 0x93458c47, 0xb20248a0, // max r2, ra_y2, r1             ; v8min r0, r0, rb_pmask        ; ldtmu1
++/* [0x000029f0] */ 0x8e2009f6, 0x10024847, // shr r1, r4, rb_xshift2        ; mov rb7, ra8
++/* [0x000029f8] */ 0x925de5ce, 0x120248a1, // min r2, r2, rb_max_y          ; v8min r1, r1, ra_pmax
++/* [0x00002a00] */ 0x4c450cd7, 0x12124462, // add ra_y2, ra_y2, r3          ; mul24 r2, r2, rb_pitch
++/* [0x00002a08] */ 0x8c24feb6, 0x10025f08, // add t1s, rb_base2, r2         ; mov ra8,  ra9
++/* [0x00002a10] */ 0x4c038af1, 0xd8025962, // add r5rep, r5, r3     ; mul24      r2, ra0.8a << 8,  r1 << 8  @ "mul_used", 0
++/* [0x00002a18] */ 0x5501fff0, 0x180348e2, // mov r3, rb_fir_off_h  ; mul24.ifnn r2, ra0.8a,       r0
++/* [0x00002a20] */ 0x4d03f6b0, 0xda0248a3, // sub r2, r3, r2        ; mul24      r3, ra0.8b << 1,  r0 << 1  @ "mul_used", 0
++/* [0x00002a28] */ 0x40037031, 0xda0109e3, // nop                   ; mul24.ifn  r3, ra0.8b << 9,  r1 << 9  @ "mul_used", 0
++/* [0x00002a30] */ 0x4c03e4f0, 0xdc0248a3, // add r2, r2, r3        ; mul24      r3, ra0.8c << 2,  r0 << 2  @ "mul_used", 0
++/* [0x00002a38] */ 0x40036031, 0xdc0109e3, // nop                   ; mul24.ifn  r3, ra0.8c << 10, r1 << 10 @ "mul_used", 0
++/* [0x00002a40] */ 0x4d03d4f0, 0xde0248a3, // sub r2, r2, r3        ; mul24      r3, ra0.8d << 3,  r0 << 3  @ "mul_used", 0
++/* [0x00002a48] */ 0x40035031, 0xde0109e3, // nop                   ; mul24.ifn  r3, ra0.8d << 11, r1 << 11 @ "mul_used", 0
++/* [0x00002a50] */ 0x4c07c4f0, 0xd80248a3, // add r2, r2, r3        ; mul24      r3, ra1.8a << 4,  r0 << 4  @ "mul_used", 0
++/* [0x00002a58] */ 0x40074031, 0xd80109e3, // nop                   ; mul24.ifn  r3, ra1.8a << 12, r1 << 12 @ "mul_used", 0
++/* [0x00002a60] */ 0x4c07b4f0, 0xda0248a3, // add r2, r2, r3        ; mul24      r3, ra1.8b << 5,  r0 << 5  @ "mul_used", 0
++/* [0x00002a68] */ 0x40073031, 0xda0109e3, // nop                   ; mul24.ifn  r3, ra1.8b << 13, r1 << 13 @ "mul_used", 0
++/* [0x00002a70] */ 0x4d07a4f0, 0xdc0248a3, // sub r2, r2, r3        ; mul24      r3, ra1.8c << 6,  r0 << 6  @ "mul_used", 0
++/* [0x00002a78] */ 0x40072031, 0xdc0109e3, // nop                   ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++/* [0x00002a80] */ 0x4c0794f0, 0xde0248a3, // add r2, r2, r3        ; mul24      r3, ra1.8d << 7,  r0 << 7  @ "mul_used", 0
++/* [0x00002a88] */ 0x4c071b71, 0xde0329e3, // add.setf -, r5, r5    ; mul24.ifn  r3, ra1.8d << 15, r1 << 15 @ "mul_used", 0
++/* [0x00002a90] */ 0xffffff08, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00002a98] */ 0x4d0854fe, 0x1a0248a1, // sub r2, r2, r3                ; mul24 r1, rb5,  ra2.8b
++/* [0x00002aa0] */ 0x550caffe, 0x1a024260, // mov ra9,  rb10                ; mul24 r0, rb10, ra3.8b
++/* [0x00002aa8] */ 0x8f2c25f6, 0xd00242ca, // asr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++/* [0x00002ab0] */ 0x4d08623e, 0x1c024860, // sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++/* [0x00002ab8] */ 0x4d08723e, 0x1e024860, // sub r1, r1, r0                ; mul24 r0, rb7,  ra2.8d
++/* [0x00002ac0] */ 0x4c208237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra8,  rb8
++/* [0x00002ac8] */ 0x4c0ca23e, 0x1c024860, // add r1, r1, r0                ; mul24 r0, rb10, ra3.8c
++/* [0x00002ad0] */ 0x4c2cb237, 0x10024860, // add r1, r1, r0                ; mul24 r0, ra11, rb11
++/* [0x00002ad8] */ 0x0d127380, 0x10020867, // sub r1, r1, ra4
++/* [0x00002ae0] */ 0x8d9cc23f, 0x10024862, // sub r1, r1, r0                ; mov r2, rb_wt_off
++/* [0x00002ae8] */ 0x0f9c63c0, 0xd0020867, // asr r1, r1, 6
++/* [0x00002af0] */ 0x4d591bce, 0x120269e0, // sub.setf -, r5, rb_i_tmu      ; mul24 r0, r1, ra_wt_mul_l0
++/* [0x00002af8] */ 0x55653fce, 0x140453e1, // mov.ifz rb_base2, rb_base2_next ; mul24 r1, r1, ra_kmul_add
++/* [0x00002b00] */ 0x8d4e7076, 0x10029851, // sub r1, r0, r1                ; mov.ifz ra_y_y2, ra_y_y2_next
++/* [0x00002b08] */ 0x8d692bf6, 0x1002b9d8, // sub.setf -, r5, rb_lcount     ; mov.ifz ra_base, ra_base_next
++/* [0x00002b10] */ 0x8c9f8289, 0xd0024860, // add r1, r1, r2                ; mov r0, r1 << 8
++/* [0x00002b18] */ 0x8c5e7236, 0x1c024863, // add r1, r1, r0                ; mov r3, ra_blk_height
++/* [0x00002b20] */ 0xfffffe78, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00002b28] */ 0x4f65039f, 0x18024862, // asr r1, r1, ra_wt_den_p7      ; mul24 r2, r3, rb_pitch
++/* [0x00002b30] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00002b38] */ 0xf34003f3, 0xd2024c20, // max vpm, r1, 0                ; v8subs r0, ra_height, r3
++/* [0x00002b40] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x00002b48] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00002b50] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x00002b58] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x00002b60] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x00002b68] */ 0xfffffe30, 0xf0f809e7, // brr -, r:1b
++/* [0x00002b70] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00002b78] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00002b80] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_filter_y10_b00
++/* [0x00002b88] */ 0xfffff8e0, 0xf0f807a7, // brr ra_link, r:per_block_setup_10
++/* [0x00002b90] */ 0x959a0ff6, 0x10024023, // mov ra0, unif                 ; mov r3, elem_num
++/* [0x00002b98] */ 0xec9c3fd2, 0x100269e5, // add.setf -, rb_ef, rb_ef      ; v8subs r5rep, r2, r2
++/* [0x00002ba0] */ 0x8c001cff, 0x14024800, // add r0, ra0.16b, r3           ; mov rb_xshift2, rb_xshift2_next
++/* [0x00002ba8] */ 0x00000001, 0xe00208a7, // mov r2, 1
++/* [0x00002bb0] */ 0x8c591eb6, 0x10025461, // add rb_i_tmu, rb_i_tmu, r2    ; mov r1, ra_wt_off_mul_l0
++/* [0x00002bb8] */ 0xf158fded, 0xd4025325, // shl rb_wt_off, ra_wt_off_l0, DENOM + 8 ; v8subs r5quad, r5, r5
++/* [0x00002bc0] */ 0x809f8009, 0xd000d9d6, // nop                           ; mov.ifnz ra_wt_off_mul_l0, r1 << 8
++// :1
++/* [0x00002bc8] */ 0x0d9d1bc0, 0xb00229e7, // sub.setf -, r5, rb_i_tmu      ; nop                           ; ldtmu1
++/* [0x00002bd0] */ 0x8e4c09f6, 0xa0029851, // shr r1, r4, rb_xshift2        ; mov.ifz ra_y_y2, ra_y_y2_next ; ldtmu0
++/* [0x00002bd8] */ 0x8e5509bf, 0x12024823, // shr r0, r4, ra_xshift         ; mov r3, rb_pitch
++/* [0x00002be0] */ 0x13440dc0, 0xd40208a7, // max r2, ra_y, 0
++/* [0x00002be8] */ 0x9269e5f6, 0x10029898, // min r2, r2, rb_max_y          ; mov.ifz ra_base, ra_base_next
++/* [0x00002bf0] */ 0x4c441dd3, 0xd4224462, // add ra_y, ra_y, 1             ; mul24 r2, r2, r3
++/* [0x00002bf8] */ 0x8c613cbf, 0x10028e0f, // add t0s, ra_base, r2          ; mov.ifz rb_base2, rb_base2_next
++/* [0x00002c00] */ 0x13440dc0, 0xd20208a7, // max r2, ra_y2, 0
++/* [0x00002c08] */ 0x129de5c0, 0x100208a7, // min r2, r2, rb_max_y
++/* [0x00002c10] */ 0x4c441dd3, 0xd2124462, // add ra_y2, ra_y2, 1           ; mul24 r2, r2, r3
++/* [0x00002c18] */ 0x8c5cfe86, 0x12024f20, // add t1s, rb_base2, r2         ; v8min r0, r0, ra_pmax
++/* [0x00002c20] */ 0x545983c6, 0x12024860, // and r1, r1, rb_pmask          ; mul24 r0, r0, ra_wt_mul_l0
++/* [0x00002c28] */ 0x4d492bce, 0x120269e1, // sub.setf -, r5, rb_lcount     ; mul24 r1, r1, ra_wt_mul_l1
++/* [0x00002c30] */ 0xcc52706e, 0x1a024865, // add r1, r0, r1                ; v8adds r5rep, r5, ra_k1
++/* [0x00002c38] */ 0x915c83f6, 0xdc024863, // shl r1, r1, 8                 ; mov r3, ra_blk_height
++/* [0x00002c40] */ 0xec40c3f3, 0x12024860, // add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3
++/* [0x00002c48] */ 0xffffff60, 0xf06809e7, // brr.anyn -, r:1b
++/* [0x00002c50] */ 0x0f9d03c0, 0xd0020867, // asr r1, r1, (DENOM + 9) - 32
++/* [0x00002c58] */ 0x925f23bf, 0x12020867, // min r1, r1, ra_pmax           ; mov -, vw_wait
++/* [0x00002c60] */ 0x5351039f, 0x18024c22, // max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++/* [0x00002c68] */ 0x956e7036, 0x10126431, // mov.setf ra_height, r0        ; mov vw_setup, ra_dma0
++/* [0x00002c70] */ 0x00000000, 0xf027c9e7, // bra.anyz -, ra_link
++/* [0x00002c78] */ 0x929dd0ff, 0x10024831, // min r0, r0, r3                ; mov vw_setup, rb_dma1
++/* [0x00002c80] */ 0x8d7270f6, 0x10024872, // sub r1, r0, r3                ; mov vw_addr, ra_dest
++/* [0x00002c88] */ 0x119d73c0, 0xd0020867, // shl r1, r1, i_shift23
++/* [0x00002c90] */ 0xffffff18, 0xf0f809e7, // brr -, r:1b
++/* [0x00002c98] */ 0x0c9d2e00, 0x100214a7, // add rb_lcount, rb_lcount, r0
++/* [0x00002ca0] */ 0x0c6e7c40, 0x100206e7, // add ra_dma0, ra_dma0, r1
++/* [0x00002ca8] */ 0x8c71ccbf, 0x10024731, // add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init
++// ::mc_end
++};
++#ifdef __HIGHC__
++#pragma Align_to(8, ff_hevc_rpi_shader)
++#endif
+--- /dev/null
++++ b/libavcodec/rpi_hevc_shader.h
+@@ -0,0 +1,63 @@
++#ifndef rpi_hevc_shader_H
++#define rpi_hevc_shader_H
++
++extern unsigned int ff_hevc_rpi_shader[];
++
++#define mc_setup_c_q0 (ff_hevc_rpi_shader + 0)
++#define mc_start (ff_hevc_rpi_shader + 0)
++#define mc_setup_c_qn (ff_hevc_rpi_shader + 2)
++#define mc_filter_c_p (ff_hevc_rpi_shader + 134)
++#define mc_filter_c_p_l1 (ff_hevc_rpi_shader + 260)
++#define mc_filter_c_b (ff_hevc_rpi_shader + 386)
++#define mc_sync_q0 (ff_hevc_rpi_shader + 580)
++#define mc_sync_q1 (ff_hevc_rpi_shader + 598)
++#define mc_sync_q2 (ff_hevc_rpi_shader + 610)
++#define mc_sync_q3 (ff_hevc_rpi_shader + 622)
++#define mc_sync_q4 (ff_hevc_rpi_shader + 634)
++#define mc_sync_q5 (ff_hevc_rpi_shader + 652)
++#define mc_sync_q6 (ff_hevc_rpi_shader + 664)
++#define mc_sync_q7 (ff_hevc_rpi_shader + 676)
++#define mc_sync_q8 (ff_hevc_rpi_shader + 688)
++#define mc_sync_q9 (ff_hevc_rpi_shader + 706)
++#define mc_sync_q10 (ff_hevc_rpi_shader + 718)
++#define mc_sync_q11 (ff_hevc_rpi_shader + 730)
++#define mc_exit_c_qn (ff_hevc_rpi_shader + 742)
++#define mc_exit_y_qn (ff_hevc_rpi_shader + 742)
++#define mc_exit_c_q0 (ff_hevc_rpi_shader + 760)
++#define mc_exit_y_q0 (ff_hevc_rpi_shader + 760)
++#define mc_setup_y_q0 (ff_hevc_rpi_shader + 780)
++#define mc_setup_y_qn (ff_hevc_rpi_shader + 782)
++#define mc_filter_y_pxx (ff_hevc_rpi_shader + 1014)
++#define mc_filter_y_bxx (ff_hevc_rpi_shader + 1140)
++#define mc_filter_y_p00 (ff_hevc_rpi_shader + 1272)
++#define mc_filter_y_b00 (ff_hevc_rpi_shader + 1358)
++#define mc_setup_c10_q0 (ff_hevc_rpi_shader + 1432)
++#define mc_setup_c10_qn (ff_hevc_rpi_shader + 1434)
++#define mc_filter_c10_p (ff_hevc_rpi_shader + 1562)
++#define mc_filter_c10_p_l1 (ff_hevc_rpi_shader + 1684)
++#define mc_filter_c10_b (ff_hevc_rpi_shader + 1806)
++#define mc_sync10_q0 (ff_hevc_rpi_shader + 1996)
++#define mc_sync10_q1 (ff_hevc_rpi_shader + 2014)
++#define mc_sync10_q2 (ff_hevc_rpi_shader + 2026)
++#define mc_sync10_q3 (ff_hevc_rpi_shader + 2038)
++#define mc_sync10_q4 (ff_hevc_rpi_shader + 2050)
++#define mc_sync10_q5 (ff_hevc_rpi_shader + 2068)
++#define mc_sync10_q6 (ff_hevc_rpi_shader + 2080)
++#define mc_sync10_q7 (ff_hevc_rpi_shader + 2092)
++#define mc_sync10_q8 (ff_hevc_rpi_shader + 2104)
++#define mc_sync10_q9 (ff_hevc_rpi_shader + 2122)
++#define mc_sync10_q10 (ff_hevc_rpi_shader + 2134)
++#define mc_sync10_q11 (ff_hevc_rpi_shader + 2146)
++#define mc_exit_c10_q0 (ff_hevc_rpi_shader + 2158)
++#define mc_exit_y10_q0 (ff_hevc_rpi_shader + 2158)
++#define mc_exit_c10_qn (ff_hevc_rpi_shader + 2178)
++#define mc_exit_y10_qn (ff_hevc_rpi_shader + 2178)
++#define mc_setup_y10_q0 (ff_hevc_rpi_shader + 2196)
++#define mc_setup_y10_qn (ff_hevc_rpi_shader + 2198)
++#define mc_filter_y10_pxx (ff_hevc_rpi_shader + 2440)
++#define mc_filter_y10_p00 (ff_hevc_rpi_shader + 2566)
++#define mc_filter_y10_bxx (ff_hevc_rpi_shader + 2654)
++#define mc_filter_y10_b00 (ff_hevc_rpi_shader + 2786)
++#define mc_end (ff_hevc_rpi_shader + 2860)
++
++#endif
+--- /dev/null
++++ b/libavcodec/rpi_hevc_shader.qasm
+@@ -0,0 +1,1850 @@
++# Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++# All rights reserved.
++#
++# Redistribution and use in source and binary forms, with or without
++# modification, are permitted provided that the following conditions are met:
++#     * Redistributions of source code must retain the above copyright
++#       notice, this list of conditions and the following disclaimer.
++#     * Redistributions in binary form must reproduce the above copyright
++#       notice, this list of conditions and the following disclaimer in the
++#       documentation and/or other materials provided with the distribution.
++#     * Neither the name of the copyright holder nor the
++#       names of its contributors may be used to endorse or promote products
++#       derived from this software without specific prior written permission.
++#
++# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++#
++# Written by Peter de Rivaz, John Cox
++
++
++
++# Inter pred asm
++#
++# Logic here should be good to 14 bits without modification
++# but only 8 & 10 are currently instantiated & tested
++# 15 & 16 bits have different shift1, shift2 calc & I also suspect overflow
++# in _p00 & _b00
++
++# The @ "mul_used", 0 annotations that occur by various mul blocks suppress
++# the warning that we are using rotation & ra/rb registers. r0..3 can be
++# rotated through all 16 elems ra regs can only be rotated through their
++# local 4.  As it happens this is what is wanted here as we do not want the
++# constants from the other half of the calc.
++
++# Number limits in P/B calculation
++#
++# In order to avoid issues with mul24 being an unsigned 24->32 bit multiplier
++# we offset our intermediates s.t. they always end up +ve before the next
++# multiply (may be -ve whilst summing but that doesn't matter).
++#
++# Range calc for up to 14 bits (Y-B pred):
++#
++# denom: [0, 7]
++# bmax = (1 << bits) - 1
++# off: [-(1 << (bits-1)), (1 << (bits-1)) - 1]
++#
++# wt_mul: [-128, 255]
++# wt_off = off * 2 + 1: [-bmax, bmax]
++#
++# pel: [0, bmax]
++# H-filter: [(-22*pel + 88*pel) >> (bits-8) + 0x4000] = [0x2a00, 0x97ff]
++# V-filter: [(-22*hf + 88*hf) >> 6] = [0x580, 0xc28e]
++# mul_t = (V_L0 + V_l1) * (wt_mul + 128): [0, 0x24624e6]
++# mul_t - (V_l0 + V_l1)* 128: [-0xc28e00, 0x18396e4]
++# adj_wt_off = (wt_off << ((denom + 6) - (bits - 8))) - 0x4000 * (wt_mul * 2):
++#  [wt_off << (21 - bits)] - [wt_mul << 15] = [-0x1fffff, 0x1fffff] - [-0x400000, 0x7f8000]
++#
++# This all looks good and is mostly bit depth independant - and as we manage
++# to do unsigned multiplies everywhere (now) this should be good for any bit
++# depth up to 14 (we could probably do 16 - but that requires a few tweaks
++# to the shifts we don't currently have logic for)
++
++# PREREAD is the number of requests that we have sitting in the TMU request
++# queue.
++#
++# There are 8 slots availible in the TMU request Q for tm0s requests, but
++# only 4 output FIFO entries and overflow is bad (corruption or crash)
++# (If threaded then only 2 out FIFO entries, but we aren't.)
++# In s/w we are effectively limited to the min vertical read which is >= 4
++# so output FIFO is the limit.
++#
++# As the test for read-next is is the main part of the Luma loop (rather than
++# the preload FIFO part) we are limited to min_luma_height - 1
++# Min_luma_height is 4 so we can only have a preload of 3
++# Beware that min_chroma_height (and_width) is 2 so we can't do the same trick
++# in chroma without abandoning preload pretty much entirely (which would be bad)
++#
++# Timing tests vs preload of 4 suggests this doesn't hurt us much
++# Could have preread 4 for Chroma but when tested it didn't help
++
++.set PREREAD,                      3
++
++# Offset added (effectively) at the exit of the H FIR filter
++# This is enough to force the result +ve
++# Is good if it is a power of 2 as that allows for >> without loss
++#
++# Worst case for a single Y FIR is *-22 so we need an offset of 256*22
++# But we need twice offset to survive both H & V = 256*22*2 = 0x2c00
++# Round up to next power of 2
++
++.set FIR_OFFSET,                   0x4000
++
++# Block heights - 8 & 16 are the only numbers we currently support
++
++.set C_BLK_HEIGHT_8,               16
++.set C_BLK_HEIGHT_16,              8
++.set Y_BLK_HEIGHT_8,               16
++.set Y_BLK_HEIGHT_16,              8
++
++# QPU counts - depend on block size
++# If we have a 2-byte format & block_size > 8 then can only afford
++# 8 QPUs
++# These numbers must match the numbers in ff_hevc_rpi_shader_cmd.h
++
++.set N_QPU_8,                      12
++.set N_QPU_16,                     12
++
++# Value to add to the weight multiplier to convert it into an unsigned value
++# Should be power of two for convienience
++
++.set LOG2_MUL_ADD,                 14
++.set MUL_ADD,                      (1 << LOG2_MUL_ADD)
++
++# Fixed denom (max that it can be set to)
++.set DENOM,                        7
++
++# register allocation
++#
++
++# ra0-3
++# Used as temp and may be loop filter coeffs (split into .8s)
++# or temp in loop. Check usage on an individual basis.
++
++# ra4-11
++# V FIFO / temp / free
++
++# -- free --                       ra12
++
++# -- free --                       ra13
++
++# -- free --                       ra14
++
++# -- free --                       ra15
++
++# uniform: width:height
++.set ra_width_height,              ra16
++.set ra_width,                     ra16.16b
++.set ra_height,                    ra16.16a
++
++# y:y2 same layout as y_y2_next so we can update both together
++.set ra_y_y2,                      ra17
++.set ra_y2,                        ra17.16a
++.set ra_y,                         ra17.16b
++
++# uniform: L1 weight (U on left, V on right)
++# Only used in Y B
++.set ra_wt_off_mul_l1,             ra18
++.set ra_wt_off_l1,                 ra18.16b
++.set ra_wt_mul_l1,                 ra18.16a
++
++# y_next:y2_next same layout as y_y2 so we can update both together
++.set ra_y_y2_next,                 ra19
++.set ra_y_next,                    ra19.16b
++.set ra_y2_next,                   ra19.16a
++
++# Setup: consts - subdivide a single register
++.set ra_kff800100,                 ra20
++.set ra_k256,                      ra20.16a
++.set ra_k0,                        ra20.8a
++.set ra_k1,                        ra20.8b
++.set ra_k128,                      ra20.8c
++.set ra_k255,                      ra20.8d
++
++# Loop: xshifts
++.set ra_xshift,                    ra21.16a
++.set ra_xshift_next,               ra21.16b
++
++# Loop var: L0 weight (U on left, V on right)
++# _off_ is not used in loop as we want to modify it before use
++.set ra_wt_off_mul_l0,             ra22
++.set ra_wt_mul_l0,                 ra22.16a
++.set ra_wt_off_l0,                 ra22.16b
++
++# Max pel value (for 8 bit we can get away with sat ops but not 9+)
++# * Could merge with rb_pmask. For 10 bit Logically pmask needs 0xff in the
++#   2nd byte   but as the source should never be > 3 there 0x3ff should do
++.set ra_blk_height_pmax,           ra23
++.set ra_pmax,                      ra23.16a
++.set ra_blk_height,                ra23.8c
++# --free --                        ra23.8d
++
++# Loop:  src frame base (L0)
++.set ra_base,                      ra24
++
++# Misc  offsets
++.set ra_fir_off_val_wt_den_p7,     ra25
++.set ra_wt_den_p7,                 ra25.8a
++# -- free --                       ra25.8b
++.set ra_fir_off_val,               ra25.16b
++
++# As it happens these constants are the same
++.if FIR_OFFSET == MUL_ADD
++# Weight multiplier unsigned add
++.set ra_kmul_add,                  ra_fir_off_val
++.else
++.error "FIR_OFFSET != MUL_ADD: Need new register & init"
++.endif
++
++# Loop: next src frame base (L0)
++.set ra_base_next,                 ra26
++
++# Loop: height<<23 + width<<16 + vdw_setup_0
++.set ra_dma0,                      ra27
++
++# Loop: destination address
++.set ra_dest,                      ra28
++
++# Setup: Dup of rb_ef
++# Lo bits are used as Y coeff 0 as that lefts us combine test & coeff mul
++# (top bits are ignored by mul24)
++.set ra_ef,                        ra29
++
++# Use an even numbered register as a link register to avoid corrupting flags
++.set ra_link,                      ra30
++
++# -- free --                       ra31
++
++.set rb_xshift2,                   rb0
++.set rb_xshift2_next,              rb1
++
++# C:  (elem & 1) == 0 ? elem * 2 : (elem + 4) * 2
++.set rb_elem_x,                    rb2
++
++# El Flags
++# After adding to self we to have el even/odd on nc/c and lo/hi on nn/n
++# Duped into ra_ef as sometimes that is easier to use
++.set rb_ef,                        rb3
++
++# rb4-11
++# Loop: V filter FIFO or V filter coeff
++
++# Loop var: offset to add before shift (round + weighting offsets)
++# Exact value varies by loop
++.set rb_wt_off,                    rb12
++
++# -- free --                       rb13
++
++# -- free --                       rb14
++
++# Loop: src frame base (L1)
++.set rb_base2,                     rb15
++
++# Line pitch (128 for sand128)
++.set rb_pitch,                     rb16
++
++# Loop count - 2 (set up TMU for next xfer)
++.set rb_i_tmu,                     rb17
++
++# Loop count for min(height, 16)
++# Y will reset & loop again if height > 16
++.set rb_lcount,                    rb18
++
++# frame_base2_next
++.set rb_base2_next,                rb19
++
++# Setup: Height of Y+C in sand, (x&mask)*xpitch will give
++# offset to the slice
++.set rb_xpitch,                    rb20
++
++# These 3 consts each save 1 instruction in Y loop setup
++# so whilst they are worthwhile they should be the 1st to die if we need
++# another b reg
++.set rb_y_coeffs_2,                rb21                         # 0x050b0a00
++.set rb_y_coeffs_3,                rb22                         # 0x11283a40
++.set rb_y_coeffs_5,                rb23                         # 0x0a0b0500
++
++# Setup: 0xff (8-bit) / 0xffff (9+ bit)
++.set rb_pmask,                     rb24
++
++# vdw_setup_1(dst_pitch)
++.set rb_dma1_base,                 rb25
++
++# Setup: pic width - 1
++# In bytes so 8 bit luma is (width - 1)*1, 16 bit chroma is (width -1)*4 etc.
++.set rb_max_x,                     rb26
++
++# vdw_setup_0 (depends on QPU number)
++.set rb_dma0_base,                 rb27
++
++# Setup: vw_setup value to reset VPM write pointer
++.set rb_vpm_init,                  rb28
++
++# Loop: vdw_setup_1(dst_pitch-width) = stride
++.set rb_dma1,                      rb29
++
++# Setup: pic_height - 1
++.set rb_max_y,                     rb30
++
++# Setup: FIR H offset
++.set rb_fir_off_h,                 rb31
++
++
++# With shifts only the bottom 5 bits are considered so -16=16, -15=17 etc.
++.set i_shift16,                    -16
++.set i_shift21,                    -11
++.set i_shift23,                     -9
++.set i_shift30,                     -2
++
++# Much of the setup code is common between Y & C
++# Macros that express this - obviously these can't be overlapped
++# so are probably unsuitable for loop code
++
++.macro m_calc_dma_regs, v_bit_depth, v_blk_height, r_vpm, r_dma
++  mov r2, qpu_num
++.if v_bit_depth <= 8
++  # 8 bit version
++  asr r1, r2, 2
++  shl r1, r1, 6
++  and r0, r2, 3
++  or  r0, r0, r1
++
++  mov r1, vpm_setup(0, 4, h8p(0, 0))   # 4 is stride - stride acts on ADDR which is Y[5:0],B[1:0] for 8 bit
++  add r_vpm, r0, r1  # VPM 8bit storage
++
++  mov r1, vdw_setup_0(0, 0, dma_h8p(0,0,0)) # height,width added later
++  shl r0, r0, 5
++
++.else
++  # 16 bit version
++  # Limited to 8 QPUs if blk height > 8
++  asr r1, r2, 1
++.if v_blk_height <= 8
++  shl r1, r1, 4
++.else
++  shl r1, r1, 5
++.endif
++  and r0, r2, 1
++  or  r0, r0, r1
++
++  mov r1, vpm_setup(0, 2, h16p(0, 0))   # 2 is stride - stride acts on ADDR
++  add r_vpm, r0, r1
++
++  # X = H * 8 so the YH from VPMVCD_WR_SETUP[ADDR] drops into
++  # XY VPMVCD_WR_SETUP[VPMBASE] if shifted left 3 (+ 3 for pos of field in reg)
++  mov r1, vdw_setup_0(0, 0, dma_h16p(0,0,0))    # height,width added later
++  shl r0, r0, 6
++.endif
++  add r_dma, r0, r1  # DMA out
++.endm
++
++
++.macro m_setup_q0
++  srel -, 12
++.endm
++
++# Code start label
++::mc_start
++
++################################################################################
++# mc_setup_c
++#
++# typedef struct qpu_mc_pred_c_s_s {
++#     int16_t y;
++#     int16_t x;
++#     uint32_t base;
++#     uint32_t pic_cw;            // C Width (== Y width / 2)
++#     uint32_t pic_ch;            // C Height (== Y Height / 2)
++#     uint32_t stride2;
++#     uint32_t stride1;
++#     uint32_t wdenom;
++#     int16_t y2;
++#     int16_t x2;
++#     uint32_t base2;
++#     uint32_t next_fn;
++# } qpu_mc_pred_c_s_t;
++
++.macro m_setup_c, v_bit_depth
++
++# Cannot use mul24 on x as x might be -ve, so must use shift
++.if v_bit_depth <= 8
++.set v_x_shift,         1
++.set v_pmask,           0xff
++.set v_blk_height,      C_BLK_HEIGHT_8
++.else
++.set v_x_shift,         2
++.set v_pmask,           0xffff
++.set v_blk_height,      C_BLK_HEIGHT_16
++.endif
++
++  mov tmurs, 1                  ; mov ra0, unif                 # No TMU swap ; x_y
++
++  mov r0, [0,2,0,2,0,2,0,2,1,3,1,3,1,3,1,3]
++  shl rb_ef, r0, i_shift30      ; mov ra_base, unif             # ; ref_c_base
++
++# Read image dimensions
++  sub r0, unif, 1                                               # pic c width
++  shl rb_max_x, r0, v_x_shift                                   # rb_max_x in bytes
++  sub rb_max_y, unif, 1                                         # pic c height
++
++# load constants
++  mov ra_kff800100, 0xff800100
++  mov rb_pmask, v_pmask
++  mov ra_blk_height_pmax, ((1 << v_bit_depth) - 1) | (v_blk_height << 16)
++  mov rb_fir_off_h, (FIR_OFFSET << (v_bit_depth - 8))
++  mov ra_fir_off_val_wt_den_p7, (FIR_OFFSET << 16) | (DENOM + 15 - v_bit_depth)
++
++# get source pitch
++  mov ra_ef, rb_ef              ; mov rb_xpitch, unif           # ; stride2
++  mov rb_pitch, unif                                            # stride1
++  mov r1, vdw_setup_1(0)                                        # [rb_pitch delay] Merged with dst_stride shortly
++  add rb_dma1_base, r1, rb_pitch                                # vdw_setup_1
++
++  and r0, 1, elem_num
++  nop                           ; mul24 r0, r0, 5
++.if v_bit_depth <= 8
++  add rb_elem_x, r0, elem_num
++.else
++  add r0, r0, elem_num
++  add rb_elem_x, r0, r0
++.endif
++
++# Compute base address for first and second access
++# ra_base ends up with t0s base
++# ra_base2 ends up with t1s base
++
++  shl r0, ra0.16b, v_x_shift                                    # [rb_elem_x delay]
++  add r0, r0, rb_elem_x                                         # Add elem no to x to get X for this slice
++  max r0, r0, 0                 ; mov ra_y, ra0.16a             # ; stash Y
++  min r0, r0, rb_max_x
++
++# Get shift
++# Shift will always calculate as 0 for 9+ bit
++# Ideally we can optimize the shift out of the code in these cases but for now
++# it is tidier to leave it in
++.if v_bit_depth <= 8
++  shl ra_xshift_next, r0, 3
++.else
++  mov ra_xshift_next, 0         ; mov rb_xshift2_next, 0
++.endif
++
++# In a single 32 bit word we get 1 or 2 UV pairs so mask bottom bits of xs if we need to
++
++.if v_bit_depth <= 8
++  and r0, r0, -4
++.endif
++  sub r1, ra_k0, rb_pitch
++  and r1, r0, r1
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                ; mov ra0, unif                 # ; next_x2_y2
++  add ra_base, ra_base, r0
++
++# Compute part of VPM to use for DMA output
++# * We only get 8 QPUs if 16 bit - maybe reduce height and auto-loop?
++  m_calc_dma_regs v_bit_depth, v_blk_height, rb_vpm_init, rb_dma0_base
++
++# And again for L1, but only worrying about frame2 stuff
++
++# Compute base address for first and second access
++# ra_base ends up with t0s base
++# rb_base2 ends up with t1s base
++
++  shl r0, ra0.16b, v_x_shift
++  add r0, r0, rb_elem_x         ; mov ra_y2, ra0.16a            # Add QPU slice offset
++  max r0, r0, 0                 ; mov rb_base2, unif            # ref_c_base2
++  min r0, r0, rb_max_x
++
++# Get shift (already zero if 9+ bit so ignore)
++.if v_bit_depth <= 8
++  shl rb_xshift2_next, r0, 3
++.endif
++
++# In a single 32 bit word we get 2 UV pairs so mask bottom bit of xs
++
++.if v_bit_depth <= 8
++  and r0, r0, -4
++.endif
++  sub r1, ra_k0, rb_pitch
++  and r1, r0, r1                ; mov r3, PREREAD
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                ; mov r2, ra_y2
++  add rb_base2, rb_base2, r0    ; mov r0, ra_y
++
++# Do preloads
++# r0 = ra_y, r2 = ra_y2, r3 = PREREAD
++
++:1
++  sub.setf r3, r3, 1
++  max r1, r0, 0
++  min r1, r1, rb_max_y
++  add r0, r0, ra_k1             ; mul24 r1, r1, rb_pitch
++  add t0s, ra_base, r1          ; mov ra_y, r0
++
++  max r1, r2, 0
++  brr.anynz -, r:1b
++  min r1, r1, rb_max_y
++  add r2, r2, ra_k1             ; mul24 r1, r1, rb_pitch
++  add t1s, rb_base2, r1         ; mov ra_y2, r2
++# >>> .anynz 1b
++
++  mov ra_link, unif                                             # link
++# touch registers to keep simulator happy (and fills in delay slots)
++  mov ra4, 0                    ; mov rb4, 0
++  bra -, ra_link
++  mov ra5, 0                    ; mov rb5, 0
++  mov ra6, 0                    ; mov rb6, 0
++  mov ra7, 0                    ; mov rb7, 0
++# >>> ra_link
++.endm
++
++::mc_setup_c_q0
++  m_setup_q0
++::mc_setup_c_qn
++  m_setup_c 8
++
++################################################################################
++#
++# mc_filter_c_p
++#
++# typedef struct qpu_mc_pred_c_p_s {
++#     int16_t y;
++#     int16_t x;
++#     uint32_t base;
++#     uint16_t h;
++#     uint16_t w;
++#     uint32_t coeffs_x;
++#     uint32_t coeffs_y;
++#     uint32_t wo_u;
++#     uint32_t wo_v;
++#     uint32_t dst_addr_c;
++#     uint32_t next_fn;
++# } qpu_mc_pred_c_p_t;
++
++.macro m_filter_c_p, v_tmu, v_bit_depth
++
++.if v_bit_depth <= 8
++.set v_x_shift,         1
++.set v_x_mul,           2
++.set v_v_shift,         8
++# Shifts to get width & height in the right place in rb_dma0
++.set v_dma_h_shift,     7
++.set v_dma_wh_shift,    i_shift16
++.else
++.set v_x_shift,         2
++.set v_x_mul,           4
++.set v_v_shift,         i_shift16
++# Shifts to get width & height in the right place in rb_dma0
++.set v_dma_h_shift,     8
++.set v_dma_wh_shift,    15
++.endif
++
++.if v_tmu == 0
++.set vrx_xshift,        rb_xshift2              # b side more convienient
++.set vrx_xshift_next,   ra_xshift_next
++.set vra_y_next,        ra_y_next
++.set vrx_base_next,     ra_base_next
++.set vra_y,             ra_y
++.set vra_base,          ra_base
++.set vr_txs,            t0s
++.else
++.set vrx_xshift,        ra_xshift               # a side more convienient
++.set vrx_xshift_next,   rb_xshift2_next
++.set vra_y_next,        ra_y2_next
++.set vrx_base_next,     rb_base2_next
++.set vra_y,             ra_y2
++.set vra_base,          rb_base2
++.set vr_txs,            t1s
++.endif
++
++# denom shift values
++.set i_wt_den_p5,                  (DENOM + 13 - v_bit_depth)
++.set i_wt_den_p6,                  (DENOM + 14 - v_bit_depth)
++
++# per-channel shifts were calculated on the *previous* invocation
++# get base addresses and per-channel shifts for *next* invocation
++  mov vw_setup, rb_vpm_init     ; mov ra2, unif                 # ; x_y
++
++  add.setf -, rb_ef, rb_ef      ; mov r3, unif                  # [ra2 delay] ; base
++
++  shl r0, ra2.16b, v_x_shift    ; v8subs r5rep, r0, r0          # r5 = 0
++  add r0, r0, rb_elem_x         ; mov ra_width_height, unif     # r1=pitch2 mask ; width_height
++  sub r1, r5, rb_pitch          ; mov ra0, unif                 # ; H filter coeffs
++  max r0, r0, r5                ; mov vrx_xshift, vrx_xshift_next
++  min r0, r0, rb_max_x          ; mov vra_y_next, ra2.16a
++
++.if v_bit_depth <= 8
++  shl vrx_xshift_next, r0, 3
++  and r0, r0, -4
++.endif
++  and r1, r0, r1                ; mul24 r2, ra_width, v_x_mul   # r2=w*2 (we are working in pel pairs)  ** x*2 already calced!
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                ; mov ra3, unif                 # ; V filter coeffs
++  add vrx_base_next, r3, r0     ; mov r1, ra_height
++
++# set up VPM write
++  sub rb_dma1, rb_dma1_base, r2 ; mov ra_wt_off_mul_l0, unif    # Compute vdw_setup1(dst_pitch-width) ; U offset/weight
++  add rb_i_tmu, r1, (3-4) - PREREAD ; v8min r1, r1, ra_blk_height
++  add rb_lcount, r1, (3-4)      ; mov.ifc ra_wt_off_mul_l0, unif # ; V offset/weight
++
++# Misc final setup...
++
++  shl r0, r1, v_dma_h_shift     ; mov ra_dest, unif             # ; dst_addr
++  add r0, r0, r2                ; mov r2, ra_fir_off_val        # Combine width and height of destination area (r0=h<<8, r2=w*2)
++  shl r0, r0, v_dma_wh_shift    ; mov rb10, ra3.8c              # Shift into bits 16 upwards of the vdw_setup0 register
++  add ra_dma0, r0, rb_dma0_base ; mov r1, ra_wt_off_l0          # ; r1=weight
++  shl r1, r1, i_wt_den_p5       ; mul24 r0, r2, ra_wt_mul_l0
++  sub rb_wt_off, r1, r0         ; mov r0, ra_kmul_add
++  add ra_wt_mul_l0, ra_wt_mul_l0, r0 ; mov r5rep, -4            # ; loop counter (V FIFO fill = 4)
++  mov rb11, ra3.8d              ; mov ra_link, unif             # ; Link
++
++# r5           = -4                     (loop counter)
++# ra_wt_mul_l0 = weight L0 + 128        (now unsigned)
++# rb_wt_off    = (offset * 2 + 1) << (wt_den + 5)
++# rb31         = FIR value offset
++
++# FIFO: rb4, ra5, rb6, ra7
++# Coeffs in ra3.8a, ra3.8b, rb10, rb11
++
++# We want (r0r1)
++# U0U3 : V0V3 : U1U4 : V1V4 : U2U5 : V2U5 : ...
++# We fetch (after shift)
++#  C0  :  C3  :  C1  :  C4  :  C2  :  C5  : ...
++
++:1
++# retrieve texture results and pick out bytes
++# then submit two more texture requests
++
++.if v_tmu == 0
++  sub.setf -, r5, rb_i_tmu      ; mov rb4, ra5                  ; ldtmu0
++  shr r2, r4, vrx_xshift        ; mov.ifz  r3, vra_y_next
++  shr r1, r2, v_v_shift         ; mov.ifnz r3, vra_y
++  add.setf -, rb_ef, rb_ef      ; mov.ifz  vra_base, vrx_base_next
++.else
++  sub.setf -, r5, rb_i_tmu      ; mov rb4, ra5                  ; ldtmu1
++  shr r2, r4, vrx_xshift        ; mov.ifz  vra_base, vrx_base_next
++  shr r1, r2, v_v_shift         ; mov.ifnz r3, vra_y
++  add.setf -, rb_ef, rb_ef      ; mov.ifz  r3, vra_y_next       # [r1 << delay]
++.endif
++
++  add vra_y, r3, ra_k1          ; mov      r0, r1 << 15
++  max r3, r3, ra_k0             ; mov.ifnc r1, r2 << 1
++  min r3, r3, rb_max_y          ; mov.ifnc r0, r2
++
++  and r1, r1, ra_pmax           ; mul24 r3, r3, rb_pitch
++.if v_tmu == 0
++  add vr_txs, vra_base, r3      ; v8min r0, r0, rb_pmask        # ; mask bytes
++.else
++  add vr_txs, vra_base, r3      ; v8min r0, r0, ra_pmax         # ; mask bytes
++.endif
++
++# apply horizontal filter
++# The filter coeffs for the two halves of this are the same (unlike in the
++# Y case) so it doesn't matter which ra0 we get them from
++# Also as the two halves are locked together we don't need to separate the 1st
++# r0 mul or the last r1 mul as they are valid for all QPUs
++
++  add r5rep, r5, 1              ; mul24      r3, ra0.8a,       r0
++  sub r2, rb_fir_off_h, r3      ; mul24      r3, ra0.8d,       r1
++  sub r2, r2, r3                ; mul24      r3, ra0.8b << 2,  r0 << 2  @ "mul_used", 0
++  nop                           ; mul24.ifn  r3, ra0.8b << 12, r1 << 12 @ "mul_used", 0
++  add r2, r2, r3                ; mul24      r3, ra0.8c << 4,  r0 << 4  @ "mul_used", 0
++  add.setf -, r5, r5            ; mul24.ifn  r3, ra0.8c << 14, r1 << 14 @ "mul_used", 0
++
++# V filter = - r4 * a + r5 * b + r6 * c - r7 * d (post FIFO shift)
++# We would like to save the r5->r4 shift but we need a delay slot
++# for both r7 & r6 which we can't find anything to put in if we have
++# already multiplied r4 & r5!
++  brr.anyn -, r:1b
++  add r2, r2, r3                ; mul24 r0, ra7, rb10           # r6 post
++  mov ra5, rb6                  ; mul24 r1, rb6, ra3.8b         # r5 post
++  asr ra7, r2, v_bit_depth - 8  ; mov rb6, ra7
++# >>> .anyn 1b
++
++  add r1, r1, r0                ; mul24 r0, rb4, ra3.8a         # [ra7 delay]
++  sub r1, r1, r0                ; mul24 r0, ra7, rb11
++  sub r1, r1, r0
++
++  asr r1, r1, 6                 ; mov r3, ra_blk_height         # ; NxtLoop
++  sub.setf -, r5, rb_lcount     ; mul24 r0, r1, ra_wt_mul_l0
++  add r0, r0, rb_wt_off         ; mul24 r1, r1, ra_kmul_add
++  sub r1, r0, r1                ; v8subs r0, ra_height, r3      # ; NxtLoop
++  brr.anyn -, r:1b
++  asr r1, r1, i_wt_den_p6
++  min r1, r1, ra_pmax           ; mov -, vw_wait
++  max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch        # ; NxtLoop
++# >>> .anyn 1b
++
++# r0 = remaining height (min 0)
++# r2 = r3 * rb_pitch
++# r3 = block_height
++
++# If looping again then we consumed 16 height last loop
++# rb_dma1 (stride) remains constant
++# rb_i_tmu remains const (based on total height)
++# recalc ra_dma0, rb_lcount based on new segment height
++
++  mov.setf ra_height, r0        ; mov vw_setup, ra_dma0         # VDW setup 0
++
++# DMA out
++  bra.anyz -, ra_link
++  min r0, r0, r3                ; mov vw_setup, rb_dma1         # Stride
++  sub r1, r0, r3                ; mov vw_addr, ra_dest          # start the VDW
++  shl r1, r1, i_shift23
++# >>> .anyz ra_link
++
++# Here r1 = cur_blk_height - 16 so it will be 0 or -ve
++# We add to dma0 to reduce the number of output lines in the final block
++  brr -, r:1b
++  add rb_lcount, rb_lcount, r0
++  add ra_dma0, ra_dma0, r1
++  add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init     # ; Reset our VDM write pointer
++# >>> 1b
++.endm
++
++::mc_filter_c_p
++  m_filter_c_p 0, 8
++
++::mc_filter_c_p_l1
++  m_filter_c_p 1, 8
++
++################################################################################
++#
++# mc_filter_c_b
++#
++# typedef struct qpu_mc_pred_c_b_s {
++#     int16_t y;
++#     int16_t x;
++#     uint32_t base;
++#     uint16_t h;
++#     uint16_t w;
++#     uint32_t coeffs_x1;
++#     uint32_t coeffs_y1;
++#     int16_t weight_u1;
++#     int16_t weight_v1;
++#     int16_t y2;
++#     int16_t x2;
++#     uint32_t base2;
++#     uint32_t coeffs_x2;
++#     uint32_t coeffs_y2;
++#     uint32_t wo_u2;
++#     uint32_t wo_v2;
++#     uint32_t dst_addr_c;
++#     uint32_t next_fn;
++# } qpu_mc_pred_c_b_t;
++
++.macro m_filter_c_b, v_bit_depth
++
++.if v_bit_depth <= 8
++.set v_x_shift,         1
++.set v_v_shift,         8
++# Shifts to get width & height in the right place in ra_dma0
++.set v_dma_h_shift,     7
++.set v_dma_wh_shift,    i_shift16
++.else
++.set v_x_shift,         2
++.set v_v_shift,         i_shift16
++# Shifts to get width & height in the right place in ra_dma0
++.set v_dma_h_shift,     8
++.set v_dma_wh_shift,    15
++.endif
++.set v_x_mul,           (1 << v_x_shift)
++
++# denom shift values
++.set i_wt_den_p5,                  (DENOM + 13 - v_bit_depth)
++.set i_wt_den_p6,                  (DENOM + 14 - v_bit_depth)
++
++# per-channel shifts were calculated on the *previous* invocation
++
++# get base addresses and per-channel shifts for *next* invocation
++  mov vw_setup, rb_vpm_init     ; mov ra2, unif                 # ; x_y
++
++  add.setf -, rb_ef, rb_ef      ; mov r3, unif                  # [ra2 delay] ; r3=base
++
++  shl r0, ra2.16b, v_x_shift    ; v8subs r5rep, r1, r1          # x ; r5=0
++  add r0, r0, rb_elem_x         ; mov ra_y_next, ra2.16a
++  sub r1, r5, rb_pitch          ; mov ra_width_height, unif     # r1=pitch2 mask ; width_height
++  max r0, r0, r5                ; mov ra_xshift, ra_xshift_next
++  min r0, r0, rb_max_x          ; mov ra0, unif                 # ; L0 H filter coeffs
++
++.if v_bit_depth <= 8
++  shl ra_xshift_next, r0, 3
++.endif
++
++  and r0, r0, -4                ; mov ra2, unif                 # ; L0 V filter coeffs
++  and r1, r0, r1                ; mul24 r2, ra_width, v_x_mul   # r2=x*2 (we are working in pel pairs)
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                ; mov r1, ra_height             # Add stripe offsets ; r1=height
++  add ra_base_next, r3, r0      ; mov rb_xshift2, rb_xshift2_next # ; xshift2 used because B
++
++# set up VPM write
++
++  sub rb_dma1, rb_dma1_base, r2 ; mov ra_wt_off_mul_l0, unif    # Compute vdw_setup1(dst_pitch-width) ; U weight
++  add rb_i_tmu, r1, (3-4) - PREREAD ; v8min r1, r1, ra_blk_height
++  add rb_lcount, r1, (3-4)      ; mov.ifc ra_wt_mul_l0, ra_wt_off_l0 # ; V weight
++
++  shl r0, r1, v_dma_h_shift     ; mov ra3, unif                 # ; x2_y2
++  add r0, r0, r2                ; mov r3, unif                  # [ra3 delay] ; base
++  shl r0, r0, v_dma_wh_shift    ; mov ra_y2_next, ra3.16a       # Shift into bits 16 upwards of the vdw_setup0 register
++  add ra_dma0, r0, rb_dma0_base ; mov r0, ra3.16b               # r0=x
++
++# L1 - uniform layout could possibly be optimized
++
++  shl r0, r0, v_x_shift         ; mov ra1, unif                 # r0=x<<shift ; L1 H filter coeffs
++  add r0, r0, rb_elem_x         ; mov ra3, unif                 # ; L1 V filter coeffs
++  sub r1, r5, rb_pitch          ; mov ra_wt_off_mul_l1, unif    # [ra3 delay] r1=pitch2 mask ; U offset/weight
++  max r0, r0, r5                ; mov ra9, rb_max_y
++  min r0, r0, rb_max_x          ; mov r2, ra_kmul_add
++
++.if v_bit_depth <= 8
++  shl rb_xshift2_next, r0, 3
++.endif
++
++  and r0, r0, -4                ; mov.ifc ra_wt_off_mul_l1, unif # ; V offset/weight
++  and r1, r0, r1                ; mov r5rep, -4
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                ; mov ra_dest, unif             #  Add stripe offsets ; dst_addr
++  add rb_base2_next, r3, r0     ; mov r0, ra_fir_off_val
++
++  add ra_wt_mul_l0, ra_wt_mul_l0, r2 ; mul24 r1, r0, ra_wt_mul_l0
++  add ra_wt_mul_l1, ra_wt_mul_l1, r2 ; mul24 r0, r0, ra_wt_mul_l1
++  add r0, r0, r1                ; mov r1, ra_wt_off_l1          # ; L0 off unset
++  shl r1, r1, i_wt_den_p6       ; mov rb11, ra3.8d
++  sub rb_wt_off, r1, r0         ; mov ra_link, unif             # ; link
++
++  mov ra10, rb_xshift2          ; mov rb7,  ra2.8d
++
++# r5        loop counter (-4)
++# ra0       H coeffs L0
++# ra1       H coeffs L1
++# ra2       V coeffs L0
++# ra3       V coeffs L1
++# ra9       rb_max_y alias
++# ra10      rb_xshift2 alias
++
++:1
++# retrieve texture results and pick out bytes
++# then submit two more texture requests
++  sub.setf -, r5, rb_i_tmu      ; nop                           ; ldtmu0
++  shr r2, r4, ra_xshift         ; mov.ifz rb_base2, rb_base2_next
++  shr r1, r2, v_v_shift         ; mov.ifz ra_y_y2, ra_y_y2_next
++  add.setf -, rb_ef, rb_ef      ; mov.ifz ra_base, ra_base_next # [ra_y delay]
++  add ra_y, 1, ra_y             ; mov r3, ra_y
++
++  max r3, r3, ra_k0             ; mov      r0, r1 << 15
++  min r3, r3, ra9               ; mov.ifnc r1, r2 << 1
++
++  mov.ifnc r0, r2               ; mul24 r3, r3, rb_pitch
++  add t0s, ra_base, r3          ; v8min r0, r0, rb_pmask        # ; masks bytes
++
++# L0 H-filter (-ra4*, +rb5, +rb6, -ra7)
++
++  and r1, r1, rb_pmask          ; mul24      r2, ra0.8a,       r0
++  sub r2, rb_fir_off_h, r2      ; mul24      r3, ra0.8d,       r1
++  sub r2, r2, r3                ; mul24      r3, ra0.8b << 2,  r0 << 2  @ "mul_used", 0
++  nop                           ; mul24.ifn  r3, ra0.8b << 12, r1 << 12 @ "mul_used", 0
++  add r2, r2, r3                ; mul24      r3, ra0.8c << 4,  r0 << 4  @ "mul_used", 0
++  nop                           ; mul24.ifn  r3, ra0.8c << 14, r1 << 14 @ "mul_used", 0
++
++  add r0, r2, r3                ; mul24 ra4, rb5, ra2.8a        ; ldtmu1
++
++  shr r2, r4, ra10              ; mov rb5, rb6
++  shr r1, r2, v_v_shift         ; mov r3, ra_y2
++  shr ra7, r0, v_bit_depth - 8  ; mov rb6, ra7                  # [r1 << delay]
++
++  add ra_y2, r3, ra_k1          ; mov      r0, r1 << 15
++  max r3, r3, ra_k0             ; mov.ifnc r1, r2 << 1
++  min r3, r3, rb_max_y          ; v8min r1, r1, ra_pmax
++
++  mov.ifnc r0, r2               ; mul24 r3, r3, rb_pitch
++  add t1s, rb_base2, r3         ; v8min r0, r0, ra_pmax         # ; masks bytes
++
++# L1 H-filter (-r0*, +rb9, +rb10, -ra11)
++
++  add r5rep, r5, 1              ; mul24      r2, ra1.8a,       r0
++  sub r2, rb_fir_off_h, r2      ; mul24      r3, ra1.8d,       r1
++  sub r2, r2, r3                ; mul24      r3, ra1.8b << 2,  r0 << 2  @ "mul_used", 0
++  nop                           ; mul24.ifn  r3, ra1.8b << 12, r1 << 12 @ "mul_used", 0
++  add r2, r3, r2                ; mul24      r3, ra1.8c << 4,  r0 << 4  @ "mul_used", 0
++  add.setf -, r5, r5            ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++
++  brr.anyn -, r:1b
++  add r2, r2, r3                ; mul24 r0, rb9,  ra3.8a
++  mov rb9, rb10                 ; mul24 r1, rb10, ra3.8b
++  shr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++# >>> .anyn 1b
++
++  sub r2, r1, r0                ; mul24 r1, rb5,  ra2.8b        # L1 ; L0
++  sub.setf -, r5, rb_lcount     ; mov r0, ra4
++  sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++  add r1, r1, r0                ; mul24 r0, ra7,  rb7
++
++  sub r1, r1, r0                ; mul24 r0, rb10, ra3.8c        # L1
++  add r2, r2, r0                ; mul24 r0, ra11, rb11          # L1
++  sub r2, r2, r0
++
++  shr r1, r1, 6
++  shr r2, r2, 6                 ; mul24 r0, r1, ra_wt_mul_l0
++  add r2, r2, r1                ; mul24 r1, r2, ra_wt_mul_l1
++  add r1, r1, r0                ; mul24 r2, r2, ra_kmul_add
++  sub r1, r1, r2                ; mov r3, ra_blk_height         # ; NxtLoop
++  add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3      # ; NxtLoop
++
++  brr.anyn -, r:1b
++  asr r1, r1, ra_wt_den_p7
++  min r1, r1, ra_pmax           ; mov -, vw_wait
++  max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch        # ; NxtLoop
++# >>> .anyn 1b
++
++# r0 = remaining height (min 0)
++# r2 = r3 * rb_pitch
++# r3 = block_height
++
++# If looping again then we consumed 16 height last loop
++# rb_dma1 (stride) remains constant
++# rb_i_tmu remains const (based on total height)
++# recalc ra_dma0, rb_lcount based on new segment height
++
++  mov.setf ra_height, r0        ; mov vw_setup, ra_dma0         # ; VDW setup 0
++
++# DMA out
++  bra.anyz -, ra_link
++  min r0, r0, r3                ; mov vw_setup, rb_dma1         # ; Stride
++  sub r1, r0, r3                ; mov vw_addr, ra_dest          # ; start the VDW
++  shl r1, r1, i_shift23
++# >>> .anyz ra_link
++
++# Here r1 = cur_blk_height - 16 so it will be 0 or -ve
++# We add to dma0 to reduce the number of output lines in the final block
++  brr -, r:1b
++  add rb_lcount, rb_lcount, r0
++  add ra_dma0, ra_dma0, r1
++  add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init     # ; Reset our VDM write pointer
++# >>> 1b
++.endm
++
++::mc_filter_c_b
++  m_filter_c_b 8
++
++################################################################################
++# Exit code used by both Luma & Chroma so place between them to avoid I-cache
++# conflicts
++
++.macro m_exit_drain
++.if PREREAD == 2
++# Special case 2 as loop is wasteful
++  nop                   ; nop           ; ldtmu0
++  nop                   ; nop           ; ldtmu1
++  nop                   ; nop           ; ldtmu0
++  mov -, vw_wait        ; nop           ; ldtmu1
++.else
++  mov.setf r3, PREREAD - 1
++:1
++  brr.anynz -, r:1b
++  nop                   ; nop           ; ldtmu0
++  nop                   ; nop           ; ldtmu1
++  sub.setf r3, r3, 1
++ # >>>
++  mov  -, vw_wait
++.endif
++.endm
++
++# This sync layout groups QPUs 0-3, 4-7, 8-11 (i.e. 1 group per TMU pair)
++# All qpus start at the beginning and after that (group - 1) must have finished
++# before (group) can start
++#
++# Requires setup code for QPU 0 to srel sem 12 (m_setup_q0) to start the chain
++# Exit code will sacq sem 12 so everything is @ 0 on exit (this is important -
++# lockup otherwise)
++#
++# There is some, currently ill defined, potential lockup if we have the VDM active
++# whilst doing sem stuff so we wait first. ?? QPU stall from sem stalls VDM pipe too ??
++#
++# The code stalled when I had many waiters on a single sem so we have a
++# "ripple" of srels to restart.  Unsure why, may have been bug, but this works
++# and we currently have both the memory & sems to support it.
++.macro m_sync_q, n_qpu, n_quads
++# Do not generate code for qpu >= quads * 4 -  fns should never be called
++.if n_qpu < n_quads * 4
++  mov ra_link, unif     # Can only branch to an a reg (not r0)
++  mov -, vw_wait        # [ra_link delay]
++
++.set n_sem_sync, n_qpu - (n_qpu % 4)
++.set n_sem_in, n_qpu
++.set n_sem_out, n_qpu + 1
++
++.if n_qpu % 4 == 0
++
++.set n_sem_quad_in,  12 + n_qpu / 4
++.set n_sem_quad_out, 12 + (((n_qpu / 4) + 1) % n_quads)
++
++  sacq -, n_sem_sync
++  sacq -, n_sem_sync
++  sacq -, n_sem_sync
++  bra -, ra_link
++  sacq -, n_sem_quad_in
++  srel -, n_sem_out
++  srel -, n_sem_quad_out
++
++.else
++  bra -, ra_link
++  srel -, n_sem_sync
++  sacq -, n_sem_in
++.if n_sem_out % 4 != 0
++  srel -, n_sem_out
++.else
++  nop
++.endif
++.endif
++.endif
++.endm
++
++.set v_quads8, N_QPU_8 / 4
++
++::mc_sync_q0
++  m_sync_q 0, v_quads8
++::mc_sync_q1
++  m_sync_q 1, v_quads8
++::mc_sync_q2
++  m_sync_q 2, v_quads8
++::mc_sync_q3
++  m_sync_q 3, v_quads8
++::mc_sync_q4
++  m_sync_q 4, v_quads8
++::mc_sync_q5
++  m_sync_q 5, v_quads8
++::mc_sync_q6
++  m_sync_q 6, v_quads8
++::mc_sync_q7
++  m_sync_q 7, v_quads8
++::mc_sync_q8
++  m_sync_q 8, v_quads8
++::mc_sync_q9
++  m_sync_q 9, v_quads8
++::mc_sync_q10
++  m_sync_q 10, v_quads8
++::mc_sync_q11
++  m_sync_q 11, v_quads8
++
++# mc_exit()
++# Chroma & Luma the same now
++
++.macro m_exit_qn
++  m_exit_drain
++  nop                   ; nop           ; thrend
++  nop
++  nop
++# >>> thrend <<<
++.endm
++
++::mc_exit_c_qn
++::mc_exit_y_qn
++  m_exit_qn
++
++
++
++# mc_interrupt_exit12()
++
++.macro m_exit_q0
++  m_exit_drain
++  sacq -, 12
++  nop                   ; nop           ; thrend
++  mov interrupt, 1
++  nop
++# >>> thrend <<<
++.endm
++
++::mc_exit_c_q0
++::mc_exit_y_q0
++  m_exit_q0
++
++# LUMA CODE
++
++# The idea is to form B predictions by doing 8 pixels from ref0 in parallel with 8 pixels from ref1.
++# For P frames we make the second x,y coordinates offset by +8
++
++
++################################################################################
++# mc_setup
++#
++# typedef struct qpu_mc_pred_y_s_s {
++#    qpu_mc_src_t next_src1;
++#    qpu_mc_src_t next_src2;
++#    uint16_t pic_h;
++#    uint16_t pic_w;
++#    uint32_t stride2;
++#    uint32_t stride1;
++#    uint32_t wdenom;
++#    uint32_t next_fn;
++# } qpu_mc_pred_y_s_t;
++
++.macro m_setup_y, v_bit_depth
++
++# Cannot use mul24 on x as x might be -ve, so must use shift
++.if v_bit_depth <= 8
++.set v_x_shift,         0
++.set v_pmask,           0xff
++.set v_blk_height,      Y_BLK_HEIGHT_8
++.else
++.set v_x_shift,         1
++.set v_pmask,           0xffff
++.set v_blk_height,      Y_BLK_HEIGHT_16
++.endif
++
++
++  # Need to save these because we need to know the frame dimensions before computing texture coordinates
++  mov tmurs, 1                  ; mov ra0, unif                 # No TMU swap ; x_y
++  mov ra9, unif                                                 # ref_y_base
++  mov ra1, unif                                                 # x2_y2
++
++
++# load constants
++  mov r0, [0,2,0,2,0,2,0,2,1,3,1,3,1,3,1,3]
++  shl rb_ef, r0, i_shift30      ; mov ra11, unif                # ; ref_y2_base
++
++  mov ra_kff800100, 0xff800100
++  mov rb_pmask, v_pmask
++  mov ra_blk_height_pmax, ((1 << v_bit_depth) - 1) | (v_blk_height << 16)
++  mov rb_fir_off_h, (FIR_OFFSET << (v_bit_depth - 8))
++  mov ra_fir_off_val_wt_den_p7, (FIR_OFFSET << 16) | (DENOM + 15 - v_bit_depth)
++  mov rb_y_coeffs_2, 0x050b0a00
++  mov rb_y_coeffs_3, 0x11283a40
++  mov rb_y_coeffs_5, 0x0a0b0500
++
++# Compute part of VPM to use
++
++# Read image dimensions
++  mov ra3, unif                                                 # width_height
++  mov ra_ef, rb_ef              ; mov rb_xpitch, unif           # [ra3 delay] ; stride2
++.if v_x_shift == 0
++  sub rb_max_x, ra3.16b, 1
++.else
++  sub r0, ra3.16b, 1
++  shl rb_max_x, r0, v_x_shift
++.endif
++  sub rb_max_y, ra3.16a, 1
++  mov r3, elem_num              ; mov rb_pitch, unif            # stride1
++
++# get destination pitch
++  mov r1, vdw_setup_1(0)                                        # [rb_pitch delay]
++  or  rb_dma1_base, r1, rb_pitch
++
++# Compute base address for first and second access
++  add r0, ra0.16b, r3                                           # Load x + elem_num
++.if v_x_shift != 0
++  shl r0, r0, v_x_shift
++.endif
++  max r0, r0, 0
++  min r0, r0, rb_max_x
++  shl ra_xshift_next, r0, 3                                     # Compute shifts
++
++# X is byte offset - we can only load words - mask
++
++  and r0, r0, -4                ; v8subs r2, r2, r2
++  sub r2, r2, rb_pitch
++  and r1, r0, r2
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                                                # Add stripe offsets
++  add ra_base, ra9, r0
++
++  # r3 still contains elem_num
++  add r0, ra1.16b, r3                                           # Load x
++.if v_x_shift != 0
++  shl r0, r0, v_x_shift
++.endif
++  max r0, r0, 0
++  min r0, r0, rb_max_x
++  shl rb_xshift2_next, r0, 3                                    # Compute shifts
++
++  # r2 still contains mask
++  and r0, r0, -4
++  and r1, r0, r2
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                                                # Add stripe offsets
++  add rb_base2, ra11, r0
++
++# Do preloads
++  nop                           ; mov r0, ra0.16a               # ; r0 = y
++  mov r3, PREREAD               ; mov r2, ra1.16a               # ; r2 = y2
++
++:1
++  sub.setf r3, r3, 1
++  max r1, r0, 0
++  min r1, r1, rb_max_y
++  add r0, r0, ra_k1             ; mul24 r1, r1, rb_pitch
++  add t0s, ra_base, r1          ; mov ra_y, r0
++
++  max r1, r2, 0
++  brr.anynz -, r:1b
++  min r1, r1, rb_max_y
++  add r2, r2, ra_k1             ; mul24 r1, r1, rb_pitch
++  add t1s, rb_base2, r1         ; mov ra_y2, r2
++# >>> .anynz 1b
++
++  m_calc_dma_regs v_bit_depth, v_blk_height, rb_vpm_init, rb_dma0_base
++
++  mov ra_link, unif                                             # Next fn
++
++# touch vertical context to keep simulator happy
++  mov ra8,  0                   ; mov rb8,  0                   # [ra_link delay]
++  bra -, ra_link
++  mov ra9,  0                   ; mov rb9,  0
++  mov ra10, 0                   ; mov rb10, 0
++  mov ra11, 0                   ; mov rb11, 0
++# >>> ra_link
++.endm
++
++::mc_setup_y_q0
++  m_setup_q0
++::mc_setup_y_qn
++  m_setup_y 8
++
++################################################################################
++#
++# Start of per-block setup code
++# P and B blocks share the same setup code to save on Icache space
++
++# get base addresses and per-channel shifts for *next* invocation
++# per-channel shifts were calculated on the *previous* invocation
++
++# 1st 3 instructions of per_block-setup in branch delay
++#
++# typedef struct qpu_mc_pred_y_p_s {
++#    qpu_mc_src_t next_src1;
++#    qpu_mc_src_t next_src2;
++#    uint16_t h;
++#    uint16_t w;
++#    uint32_t mymx21;
++#    uint32_t wo1;
++#    uint32_t wo2;
++#    uint32_t dst_addr;
++#    uint32_t next_fn;
++# } qpu_mc_pred_y_p_t;
++#
++
++.macro m_luma_setup, v_bit_depth
++# Hack - QASM may well have have label pasting but I have no idea how...
++.if v_bit_depth == 8
++  brr ra_link, r:per_block_setup_8
++.elif v_bit_depth == 10
++  brr ra_link, r:per_block_setup_10
++.endif
++  mov ra0, unif                 ; mov r3, elem_num              # y_x ; elem_num has implicit unpack??
++  add.setf -, rb_ef, rb_ef      ; v8subs r5rep, r2, r2          # [ra0 delay] ; r5 = 0
++  add r0, ra0.16b, r3           ; mov rb_xshift2, rb_xshift2_next
++.endm
++
++.macro m_per_block_setup, v_bit_depth
++
++.if v_bit_depth <= 8
++.set v_x_shift,         0
++.set v_x_mul,           1
++# Shifts to get width & height in the right place in ra_dma0
++.set v_dma_h_shift,     7
++.set v_dma_wh_shift,    i_shift16
++.else
++.set v_x_shift,         1
++.set v_x_mul,           2
++# Shifts to get width & height in the right place in ra_dma0
++.set v_dma_h_shift,     8
++.set v_dma_wh_shift,    15
++.endif
++
++.if v_x_shift != 0
++  shl r0, r0, v_x_shift
++.endif
++  max r0, r0, r5                ; mov ra_xshift, ra_xshift_next
++  min r0, r0, rb_max_x
++
++  shl ra_xshift_next, r0, 3                                     # Compute shifts
++  and r0, r0, -4
++  sub r2, r5, rb_pitch          ; mov ra_base_next, unif        # ; src1.base
++  and r1, r0, r2                ; mov ra_y_next, ra0.16a
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                ; mov ra1, unif                 # Add stripe offsets ; src2.x_y
++  add ra_base_next, ra_base_next, r0                            # [ra1 delay]
++
++  add r0, ra1.16b, r3                                           # Load x2
++.if v_x_shift != 0
++  shl r0, r0, v_x_shift
++.endif
++  max r0, r0, r5                ; mov ra_y2_next, ra1.16a
++  min r0, r0, rb_max_x          ; mov rb_base2_next, unif       # ; src2.base
++  shl rb_xshift2_next, r0, 3                                    # Compute shifts
++  and r0, r0, -4                ; mov ra_width_height, unif     # ; width_height
++  and r1, r0, r2                ; mov vw_setup, rb_vpm_init     # ; set up VPM write
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                ; mul24 r1, ra_width, v_x_mul   # Add stripe offsets ; r1 = x in bytes
++  add rb_base2_next, rb_base2_next, r0
++
++# get width,height of block (unif load above), r1 = width * pel_size
++  sub rb_dma1, rb_dma1_base, r1 ; mov r0, ra_height             # Compute vdw_setup1(dst_pitch-width)
++  add rb_i_tmu, r0, (7-8) - PREREAD ; v8min r0, r0, ra_blk_height
++  add rb_lcount, r0, (7-8)
++  shl r0, r0, v_dma_h_shift     ; mov r3, ra_kmul_add           # ; r3 return val
++  add r0, r0, r1                                                # Combine width and height of destination area
++  shl r0, r0, v_dma_wh_shift    ; mov r2, ra_fir_off_val        # Shift into bits 16 upwards of the vdw_setup0 register ; r2 return val
++  add ra_dma0, r0, rb_dma0_base ; mov r0, unif                  # ; Packed filter offsets
++
++# get filter coefficients and discard unused B frame values
++  shl.ifnn r0, r0, i_shift16    ; mov ra_wt_off_mul_l0, unif    #  Pick half to use ; L0 offset/weight
++  shl ra8, r0, 3                ; mov rb5, ra_k255
++
++# Coeffs are all abs values here as that means mul24 works (no sign extend from .8)
++
++# 2nd half coeffs same as first if we can swap 8<->24 in the rotate val
++# but I can't see a way of doing that that is cheap enough to be worth it
++
++# Picked out in a slightly random order to space out uniform loads
++
++  # 1
++  mov r1, 0x01040400            # [ra8 delay]
++  ror ra2.8b, r1, ra8.8d
++  ror ra0.8b, r1, ra8.8c
++  # 2
++  ror ra2.8c, rb_y_coeffs_2, ra8.8d
++  ror ra0.8c, rb_y_coeffs_2, ra8.8c
++  # 0
++  mov r1,0x00010100             # -ve  [ra8 delay]
++  ror r0, r1, ra8.8d            ; mov ra_wt_off_mul_l1, unif    # ; L1 Wt/Offset
++  ror ra0.8a, r1, ra8.8c        ; v8min rb4, r0, rb5
++  # 7
++  shl r1, r1, 8                 ; mov.ifn ra_wt_off_mul_l0, ra_wt_off_mul_l1 # r1 = 0x01010000
++  ror r0, r1, ra8.8d            ; mov ra_dest, unif             # ; Destination address
++  ror ra1.8d, r1, ra8.8c        ; v8min rb11, r0, rb5
++  # 3
++  ror ra2.8d, rb_y_coeffs_3, ra8.8d
++  ror ra0.8d, rb_y_coeffs_3, ra8.8c
++  # 5
++  ror ra3.8b, rb_y_coeffs_5, ra8.8d
++  ror ra1.8b, rb_y_coeffs_5, ra8.8c
++  # 6
++  mov r1,0x04040100
++  ror ra3.8c, r1, ra8.8d
++  ror ra1.8c, r1, ra8.8c        ; mov r5rep, -8                 # ; r5 return val
++
++  bra -, ra_link
++  # 4
++  mov r1,0x3a281100
++  ror r0, r1, ra8.8d            ; mov ra_link, unif             # ; link - load after we've used its previous val
++  ror ra1.8a, r1, ra8.8c        ; v8min rb8, r0, rb5
++# >>> branch ra_link
++
++# r5 = -8
++# r2 = fir_off_val
++# r3 = 128
++.endm
++
++:per_block_setup_8
++  m_per_block_setup 8
++
++
++
++################################################################################
++#
++# mc_filter_y_pxx
++#
++# Setup (& therefore uniform struct) shared with _bxx
++# Struct in m_luma_setup
++#
++# We can have 2 separate P reqs here as long as they mate to generate a
++# rectangular output block (i.e. h0 = h1, w0 = 8)
++#
++# At this point we have already issued PREREAD pairs of texture requests for the current block
++
++.macro m_filter_y_pxx, v_bit_depth
++
++# denom shift values
++.set i_wt_den_p5,                  (DENOM + 13 - v_bit_depth)
++.set i_wt_den_p6,                  (DENOM + 14 - v_bit_depth)
++
++  m_luma_setup v_bit_depth
++
++  shl r1, ra_wt_off_l0, i_wt_den_p5
++  add ra_wt_mul_l0, ra_wt_mul_l0, r3 ; mul24 r0, r2, ra_wt_mul_l0 # r2 = 0x4000 so mul24 safe even with -ve wt_mul
++  sub rb_wt_off, r1, r0         ; mov ra_ef.8a, rb4
++
++# retrieve texture results and pick out bytes
++# then submit two more texture requests
++
++# This loop is identical to the B loop from here --->
++:1
++  add.setf -, ra_ef, ra_ef      ; mul24 ra4, rb5, ra_ef
++
++  max r2, ra_y, 0               ; mov r1, 0
++  min r2, r2, rb_max_y          ; mov r3, ra_k1
++  add ra_y, ra_y, r3            ; mul24 r2, r2, rb_pitch        ; ldtmu0
++  add t0s, ra_base, r2          ; mov rb5,  rb6
++  shr r0, r4, ra_xshift         ; mov rb6,  rb7
++
++  max r2, ra_y2, r1             ; v8min r0, r0, rb_pmask        ; ldtmu1 # ; masks out all but wanted bytes
++  shr r1, r4, rb_xshift2        ; mov rb7, ra8
++  min r2, r2, rb_max_y          ; v8min r1, r1, ra_pmax
++  add ra_y2, ra_y2, r3          ; mul24 r2, r2, rb_pitch
++  add t1s, rb_base2, r2         ; mov ra8,  ra9
++
++# apply horizontal filter
++  add r5rep, r5, r3     ; mul24      r2, ra0.8a << 8,  r1 << 8  @ "mul_used", 0
++  mov r3, rb_fir_off_h  ; mul24.ifnn r2, ra0.8a,       r0
++  sub r2, r3, r2        ; mul24      r3, ra0.8b << 1,  r0 << 1  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra0.8b << 9,  r1 << 9  @ "mul_used", 0
++  add r2, r2, r3        ; mul24      r3, ra0.8c << 2,  r0 << 2  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra0.8c << 10, r1 << 10 @ "mul_used", 0
++  sub r2, r2, r3        ; mul24      r3, ra0.8d << 3,  r0 << 3  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra0.8d << 11, r1 << 11 @ "mul_used", 0
++  add r2, r2, r3        ; mul24      r3, ra1.8a << 4,  r0 << 4  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra1.8a << 12, r1 << 12 @ "mul_used", 0
++  add r2, r2, r3        ; mul24      r3, ra1.8b << 5,  r0 << 5  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra1.8b << 13, r1 << 13 @ "mul_used", 0
++  sub r2, r2, r3        ; mul24      r3, ra1.8c << 6,  r0 << 6  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++  add r2, r2, r3        ; mul24      r3, ra1.8d << 7,  r0 << 7  @ "mul_used", 0
++  add.setf -, r5, r5    ; mul24.ifn  r3, ra1.8d << 15, r1 << 15 @ "mul_used", 0
++
++  brr.anyn -, r:1b
++  sub r2, r2, r3                ; mul24 r1, rb5,  ra2.8b
++  mov ra9,  rb10                ; mul24 r0, rb10, ra3.8b
++  asr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++  # >>> .anyn 1b (r5 + r5)
++
++  # apply vertical filter and write to VPM
++  # - r4* + r5 - r6 + r7 + r8 - r9 + r10 - r11
++
++  sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++  sub r1, r1, r0                ; mul24 r0, rb7,  ra2.8d
++  add r1, r1, r0                ; mul24 r0, ra8,  rb8
++  add r1, r1, r0                ; mul24 r0, rb10, ra3.8c
++  add r1, r1, r0                ; mul24 r0, ra11, rb11
++# <--- to here
++  sub.setf -, r5, rb_i_tmu      ; mov r3, ra_blk_height                 # ; NxtLoop: r3 = block height
++  sub r1, r1, ra4               ; mov.ifz rb_base2, rb_base2_next
++  sub r1, r1, r0                ; mov.ifz ra_base, ra_base_next
++
++  asr r1, r1, 6                 ; mov.ifz ra_y_y2, ra_y_y2_next
++  sub.setf -, r5, rb_lcount     ; mul24 r0, r1, ra_wt_mul_l0
++  add r0, r0, rb_wt_off         ; mul24 r1, r1, ra_kmul_add
++  sub r1, r0, r1                ; v8subs r0, ra_height, r3              # ; NxtLoop: r0 = remaining height (0 saturate)
++
++  brr.anyn -, r:1b
++  asr r1, r1, i_wt_den_p6
++  min r1, r1, ra_pmax           ; mov -, vw_wait
++  max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch                # ; NxtLoop
++# >>> branch.anyn 1b (r5 - rb_lcount)
++
++# r0 = remaining height (min 0)
++# r2 = r3 * rb_pitch
++# r3 = block_height
++
++# If looping again then we consumed 16 height last loop
++# rb_dma1 (stride) remains constant
++# rb_i_tmu remains const (based on total height)
++# recalc ra_dma0, rb_lcount based on new segment height
++
++  mov.setf ra_height, r0        ; mov vw_setup, ra_dma0 # VDW setup 0
++
++# DMA out
++  bra.anyz -, ra_link
++  min r0, r0, r3                ; mov vw_setup, rb_dma1 # Stride
++  sub r1, r0, r3                ; mov vw_addr, ra_dest  # start the VDW
++  shl r1, r1, i_shift23
++# >>> .anyz ra_link
++
++# Here r1 = cur_blk_height - 16 so it will be 0 or -ve
++# We add to dma0 to reduce the number of output lines in the final block
++  brr -, r:1b
++  add rb_lcount, rb_lcount, r0
++  add ra_dma0, ra_dma0, r1
++  add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init     # ; Reset our VDM write pointer
++# >>> 1b
++.endm
++
++::mc_filter_y_pxx
++  m_filter_y_pxx 8
++
++
++################################################################################
++
++# mc_filter_b(y_x, base, y2_x2, base2, width_height, my2_mx2_my_mx, offsetweight0, this_dst, next_kernel)
++#
++# Setup (& therefore uniform struct) shared with _pxx
++# Struct in m_luma_setup
++#
++# l0 calc in els 0-7, L1 in 8-15
++# Only els 0-7 write data that is stored back to ram (els 8-15 may write tosh)
++#
++# At this point we have already issued PREREAD pairs of texture requests for the current block
++
++.macro m_filter_y_bxx, v_bit_depth
++
++# denom shift values
++.set i_wt_den_p5,                  (DENOM + 13 - v_bit_depth)
++.set i_wt_den_p6,                  (DENOM + 14 - v_bit_depth)
++
++  m_luma_setup v_bit_depth
++
++  shl r1, ra_wt_off_l0, i_wt_den_p6
++  add ra_wt_mul_l0, ra_wt_mul_l0, r3 ; mul24 r0, r2, ra_wt_mul_l0
++  sub r1, r1, r0                ; mul24 r0, r2, ra_wt_mul_l1
++  sub rb_wt_off, r1, r0         ; mov ra_ef.8a, rb4
++
++# This loop is identical to the P loop from here --->
++:1
++  add.setf -, ra_ef, ra_ef      ; mul24 ra4, rb5, ra_ef
++
++  max r2, ra_y, 0               ; mov r1, 0
++  min r2, r2, rb_max_y          ; mov r3, ra_k1
++  add ra_y, ra_y, r3            ; mul24 r2, r2, rb_pitch        ; ldtmu0
++  add t0s, ra_base, r2          ; mov rb5,  rb6
++  shr r0, r4, ra_xshift         ; mov rb6,  rb7
++
++  max r2, ra_y2, r1             ; v8min r0, r0, rb_pmask        ; ldtmu1 # ; masks out all but wanted bytes
++  shr r1, r4, rb_xshift2        ; mov rb7, ra8
++  min r2, r2, rb_max_y          ; v8min r1, r1, ra_pmax
++  add ra_y2, ra_y2, r3          ; mul24 r2, r2, rb_pitch
++  add t1s, rb_base2, r2         ; mov ra8,  ra9
++
++# apply horizontal filter
++  add r5rep, r5, r3     ; mul24      r2, ra0.8a << 8,  r1 << 8  @ "mul_used", 0
++  mov r3, rb_fir_off_h  ; mul24.ifnn r2, ra0.8a,       r0
++  sub r2, r3, r2        ; mul24      r3, ra0.8b << 1,  r0 << 1  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra0.8b << 9,  r1 << 9  @ "mul_used", 0
++  add r2, r2, r3        ; mul24      r3, ra0.8c << 2,  r0 << 2  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra0.8c << 10, r1 << 10 @ "mul_used", 0
++  sub r2, r2, r3        ; mul24      r3, ra0.8d << 3,  r0 << 3  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra0.8d << 11, r1 << 11 @ "mul_used", 0
++  add r2, r2, r3        ; mul24      r3, ra1.8a << 4,  r0 << 4  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra1.8a << 12, r1 << 12 @ "mul_used", 0
++  add r2, r2, r3        ; mul24      r3, ra1.8b << 5,  r0 << 5  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra1.8b << 13, r1 << 13 @ "mul_used", 0
++  sub r2, r2, r3        ; mul24      r3, ra1.8c << 6,  r0 << 6  @ "mul_used", 0
++  nop                   ; mul24.ifn  r3, ra1.8c << 14, r1 << 14 @ "mul_used", 0
++  add r2, r2, r3        ; mul24      r3, ra1.8d << 7,  r0 << 7  @ "mul_used", 0
++  add.setf -, r5, r5    ; mul24.ifn  r3, ra1.8d << 15, r1 << 15 @ "mul_used", 0
++
++  brr.anyn -, r:1b
++  sub r2, r2, r3                ; mul24 r1, rb5,  ra2.8b
++  mov ra9,  rb10                ; mul24 r0, rb10, ra3.8b
++  asr ra11, r2, v_bit_depth - 8 ; mov rb10, ra11
++  # >>> .anyn 1b (r5 + r5)
++
++  # apply vertical filter and write to VPM
++  # - r4* + r5 - r6 + r7 + r8 - r9 + r10 - r11
++
++  sub r1, r1, r0                ; mul24 r0, rb6,  ra2.8c
++  sub r1, r1, r0                ; mul24 r0, rb7,  ra2.8d
++  add r1, r1, r0                ; mul24 r0, ra8,  rb8
++  add r1, r1, r0                ; mul24 r0, rb10, ra3.8c
++  add r1, r1, r0                ; mul24 r0, ra11, rb11
++# <--- to here
++  sub r1, r1, ra4
++  sub r1, r1, r0                ; mov r2, rb_wt_off
++
++  asr r1, r1, 6
++  sub.setf -, r5, rb_i_tmu      ; mul24 r0, r1, ra_wt_mul_l0
++  mov.ifz rb_base2, rb_base2_next ; mul24 r1, r1, ra_kmul_add
++  sub r1, r0, r1                ; mov.ifz ra_y_y2, ra_y_y2_next
++  sub.setf -, r5, rb_lcount     ; mov.ifz ra_base, ra_base_next
++  add r1, r1, r2                ; mov r0, r1 << 8
++  add r1, r1, r0                ; mov r3, ra_blk_height         # ; NxtLoop: r3 = block height
++
++  brr.anyn -, r:1b
++  asr r1, r1, ra_wt_den_p7      ; mul24 r2, r3, rb_pitch        # ; NxtLoop
++  min r1, r1, ra_pmax           ; mov -, vw_wait
++  max vpm, r1, 0                ; v8subs r0, ra_height, r3      # ; NxtLoop: r0 = remaining height (0 saturate)
++# >>> branch.anyn 1b (r5 - rb_lcount)
++
++# r0 = remaining height (min 0)
++# r2 = r3 * rb_pitch
++# r3 = block_height
++
++# If looping again then we consumed block_height last loop
++# rb_dma1 (stride) remains constant
++# rb_i_tmu remains const (based on total height)
++# recalc ra_dma0, rb_lcount based on new segment height
++
++  mov.setf ra_height, r0        ; mov vw_setup, ra_dma0         # VDW setup 0
++
++# DMA out
++  bra.anyz -, ra_link
++  min r0, r0, r3                ; mov vw_setup, rb_dma1         # Stride
++  sub r1, r0, r3                ; mov vw_addr, ra_dest          # start the VDW
++  shl r1, r1, i_shift23
++# >>> .anyz ra_link (ra_height - remaining height)
++
++# Here r1 = cur_blk_height - blk_height so it will be 0 or -ve
++# We add to dma0 to reduce the number of output lines in the final block
++  brr -, r:1b
++  add rb_lcount, rb_lcount, r0
++  add ra_dma0, ra_dma0, r1
++  add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init     # ; Reset our VDM write pointer
++# >>> 1b
++.endm
++
++::mc_filter_y_bxx
++  m_filter_y_bxx 8
++
++################################################################################
++#
++# typedef struct qpu_mc_pred_y_p00_s {
++#    qpu_mc_src_t next_src1;
++#    uint16_t h;
++#    uint16_t w;
++#    uint32_t wo1;
++#    uint32_t dst_addr;
++#    uint32_t next_fn;
++# } qpu_mc_pred_y_p00_t;
++
++.macro m_filter_y_p00, v_bit_depth
++
++.if v_bit_depth <= 8
++.set v_x_shift,         0
++.set v_x_mul,           1
++# Shifts to get width & height in the right place in ra_dma0
++.set v_dma_h_shift,     7
++.set v_dma_wh_shift,    i_shift16
++.else
++.set v_x_shift,         1
++.set v_x_mul,           2
++# Shifts to get width & height in the right place in ra_dma0
++.set v_dma_h_shift,     8
++.set v_dma_wh_shift,    15
++.endif
++
++  mov ra0, unif                 ; mov r0, elem_num              # y_x
++  mov ra_xshift, ra_xshift_next ; v8subs r5rep, r5, r5          # [ra0 delay] ; r5 = 0
++  add r0, ra0.16b, r0           ; mov ra_base_next, unif        # ; src1.base
++.if v_x_shift != 0
++  shl r0, r0, v_x_shift
++.endif
++
++  max r0, r0, r5                ; mov ra_y_next, ra0.16a        # ; width_height
++  min r0, r0, rb_max_x          ; mov ra_width_height, unif
++
++  shl ra_xshift_next, r0, 3                                     # Compute shifts
++  and r0, r0, -4
++  sub r2, r5, rb_pitch          ; mov ra_wt_off_mul_l0, unif    # ; weight_offset
++  and r1, r0, r2
++  xor r0, r0, r1                ; mul24 r1, r1, rb_xpitch
++  add r0, r0, r1                ; mov ra_dest, unif             # Add stripe offsets ; dest addr
++  add ra_base_next, ra_base_next, r0 ; mov vw_setup, rb_vpm_init  # [ra_width delay] ; set up VPM write
++
++# get width,height of block (unif load above)
++# Compute vdw_setup1(dst_pitch-width)
++  shl r1, ra_width, v_x_shift
++  sub rb_dma1, rb_dma1_base, r1 ; mov r0, ra_height
++  sub rb_i_tmu, r0, PREREAD     ; v8min r0, r0, ra_blk_height
++  shl r0, r0, v_dma_h_shift     ; mov rb_lcount, r0
++  add r0, r0, r1                                                # Combine width and height of destination area
++  shl rb_wt_off, ra_wt_off_l0, DENOM + 7
++  shl r0, r0, v_dma_wh_shift    ; mov ra_link, unif             # Shift into bits 16 upwards of the vdw_setup0 register ; link
++  add ra_dma0, r0, rb_dma0_base
++
++:1
++  sub.setf -, r5, rb_i_tmu      ; v8adds r5rep, r5, ra_k1
++  nop                           ; mov.ifz ra_y, ra_y_next       ; ldtmu0
++  shr r0, r4, ra_xshift         ; mov r3, rb_pitch
++
++  max r2, ra_y, 0  # y
++  min r2, r2, rb_max_y          ; mov.ifz ra_base, ra_base_next
++  add ra_y, ra_y, 1             ; mul24 r2, r2, r3
++  add t0s, ra_base, r2          ; v8min r0, r0, rb_pmask
++
++  sub.setf -, r5, rb_lcount     ; mul24 r1, r0, ra_wt_mul_l0
++  shl r1, r1, 8                 ; mov r3, ra_blk_height
++  add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3
++
++  brr.anyn -, r:1b
++  asr r1, r1, DENOM + 8
++  min r1, r1, ra_pmax           ; mov -, vw_wait
++  max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++# >>> branch.anyn 1b
++
++# r0 = remaining height (min 0)
++# r2 = r3 * rb_pitch
++# r3 = block_height
++
++# If looping again then we consumed 16 height last loop
++# rb_dma1 (stride) remains constant
++# rb_i_tmu remains const (based on total height)
++# recalc ra_dma0, rb_lcount based on new segment height
++
++  mov.setf ra_height, r0 ; mov vw_setup, ra_dma0 # VDW setup 0
++
++# DMA out
++  bra.anyz -, ra_link
++  min r0, r0, r3        ; mov vw_setup, rb_dma1 # Stride
++  sub r1, r0, r3        ; mov vw_addr, ra_dest  # start the VDW
++  shl r1, r1, i_shift23
++# >>> .anyz ra_link
++
++# Here r1 = cur_blk_height - 16 so it will be 0 or -ve
++# We add to dma0 to reduce the number of output lines in the final block
++  brr -, r:1b
++  add rb_lcount, rb_lcount, r0
++  add ra_dma0, ra_dma0, r1
++  add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init     # ; Reset our VDM write pointer
++# >>> 1b
++.endm
++
++::mc_filter_y_p00
++  m_filter_y_p00 8
++
++################################################################################
++
++.macro m_filter_y_b00, v_bit_depth
++# luma setup does a fair bit more than we need calculating filter coeffs
++# that we will never use but it saves I-cache to use it (also simple!)
++  m_luma_setup v_bit_depth
++
++# Fix up vals that were expecting a filter (somewhat icky)
++  mov r2, 1
++  add rb_i_tmu, rb_i_tmu, r2    ; mov r1, ra_wt_off_mul_l0      # Need in rX rather than raX for <<8 to do what we want
++  shl rb_wt_off, ra_wt_off_l0, DENOM + 8 ; v8subs r5quad, r5, r5 # [r1 << delay] ; r5quad OK for zero
++  nop                           ; mov.ifnz ra_wt_off_mul_l0, r1 << 8
++
++:1
++  sub.setf -, r5, rb_i_tmu      ; nop                           ; ldtmu1
++  shr r1, r4, rb_xshift2        ; mov.ifz ra_y_y2, ra_y_y2_next ; ldtmu0
++  shr r0, r4, ra_xshift         ; mov r3, rb_pitch
++
++  max r2, ra_y, 0  # y
++  min r2, r2, rb_max_y          ; mov.ifz ra_base, ra_base_next
++  add ra_y, ra_y, 1             ; mul24 r2, r2, r3
++  add t0s, ra_base, r2          ; mov.ifz rb_base2, rb_base2_next
++
++  max r2, ra_y2, 0
++  min r2, r2, rb_max_y
++  add ra_y2, ra_y2, 1           ; mul24 r2, r2, r3
++  add t1s, rb_base2, r2         ; v8min r0, r0, ra_pmax         # v8subs masks out all but bottom byte
++  and r1, r1, rb_pmask          ; mul24 r0, r0, ra_wt_mul_l0
++
++  sub.setf -, r5, rb_lcount     ; mul24 r1, r1, ra_wt_mul_l1
++  add r1, r0, r1                ; v8adds r5rep, r5, ra_k1
++
++  shl r1, r1, 8                 ; mov r3, ra_blk_height
++  add r1, r1, rb_wt_off         ; v8subs r0, ra_height, r3
++
++  brr.anyn -, r:1b
++  asr r1, r1, (DENOM + 9) - 32                                  # -32 to get valid shift immediate
++  min r1, r1, ra_pmax           ; mov -, vw_wait
++  max vpm, r1, ra_k0            ; mul24 r2, r3, rb_pitch
++# >>> branch.anyn 1b
++
++# r0 = remaining height (min 0)
++# r2 = r3 * rb_pitch
++# r3 = block_height
++
++# If looping again then we consumed 16 height last loop
++# rb_dma1 (stride) remains constant
++# rb_i_tmu remains const (based on total height)
++# recalc ra_dma0, rb_lcount based on new segment height
++
++  mov.setf ra_height, r0        ; mov vw_setup, ra_dma0         # ; VDW setup 0
++
++# DMA out
++  bra.anyz -, ra_link
++  min r0, r0, r3                ; mov vw_setup, rb_dma1         # ; Stride
++  sub r1, r0, r3                ; mov vw_addr, ra_dest          # ; start the VDW
++  shl r1, r1, i_shift23
++# >>> .anyz ra_link
++
++# Here r1 = cur_blk_height - 16 so it will be 0 or -ve
++# We add to dma0 to reduce the number of output lines in the final block
++  brr -, r:1b
++  add rb_lcount, rb_lcount, r0
++  add ra_dma0, ra_dma0, r1
++  add ra_dest, ra_dest, r2      ; mov vw_setup, rb_vpm_init     # ; Reset our VDM write pointer
++# >>> 1b
++.endm
++
++::mc_filter_y_b00
++  m_filter_y_b00 8
++
++################################################################################
++################################################################################
++# 10 BIT
++
++::mc_setup_c10_q0
++  m_setup_q0
++::mc_setup_c10_qn
++  m_setup_c 10
++
++::mc_filter_c10_p
++  m_filter_c_p 0, 10
++
++::mc_filter_c10_p_l1
++  m_filter_c_p 1, 10
++
++
++::mc_filter_c10_b
++  m_filter_c_b 10
++
++# Even if these fns are the same as for other bit depths we want our own copy
++# to keep the code we are using in a single lump to avoid (direct map) cache
++# thrashing
++.set v_quads10, N_QPU_16 / 4
++
++::mc_sync10_q0
++  m_sync_q 0, v_quads10
++::mc_sync10_q1
++  m_sync_q 1, v_quads10
++::mc_sync10_q2
++  m_sync_q 2, v_quads10
++::mc_sync10_q3
++  m_sync_q 3, v_quads10
++::mc_sync10_q4
++  m_sync_q 4, v_quads10
++::mc_sync10_q5
++  m_sync_q 5, v_quads10
++::mc_sync10_q6
++  m_sync_q 6, v_quads10
++::mc_sync10_q7
++  m_sync_q 7, v_quads10
++::mc_sync10_q8
++  m_sync_q 8, v_quads10
++::mc_sync10_q9
++  m_sync_q 9, v_quads10
++::mc_sync10_q10
++  m_sync_q 10, v_quads10
++::mc_sync10_q11
++  m_sync_q 11, v_quads10
++
++::mc_exit_y10_q0
++::mc_exit_c10_q0
++  m_exit_q0
++
++::mc_exit_y10_qn
++::mc_exit_c10_qn
++  m_exit_qn
++
++::mc_setup_y10_q0
++  m_setup_q0
++::mc_setup_y10_qn
++  m_setup_y 10
++
++:per_block_setup_10
++  m_per_block_setup 10
++
++::mc_filter_y10_pxx
++  m_filter_y_pxx 10
++
++::mc_filter_y10_p00
++  m_filter_y_p00 10
++
++::mc_filter_y10_bxx
++  m_filter_y_bxx 10
++
++::mc_filter_y10_b00
++  m_filter_y_b00 10
++
++
++
++::mc_end
++# Do not add code here because mc_end must appear after all other code.
+--- /dev/null
++++ b/libavcodec/rpi_hevc_shader_cmd.h
+@@ -0,0 +1,165 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*/
++
++#ifndef RPI_SHADER_CMD_H
++#define RPI_SHADER_CMD_H
++
++#pragma pack(push, 4)
++
++#if RPI_QPU_EMU_C && RPI_QPU_EMU_Y
++// If mixed then we are just confused and get a lot of warnings....
++typedef const uint8_t * qpu_mc_src_addr_t;
++typedef uint8_t * qpu_mc_dst_addr_t;
++#else
++typedef uint32_t qpu_mc_src_addr_t;
++typedef uint32_t qpu_mc_dst_addr_t;
++#endif
++
++typedef struct qpu_mc_src_s
++{
++    int16_t y;
++    int16_t x;
++    qpu_mc_src_addr_t base;
++} qpu_mc_src_t;
++
++
++typedef struct qpu_mc_pred_c_p_s {
++    qpu_mc_src_t next_src;
++    uint16_t h;
++    uint16_t w;
++    uint32_t coeffs_x;
++    uint32_t coeffs_y;
++    uint32_t wo_u;
++    uint32_t wo_v;
++    qpu_mc_dst_addr_t dst_addr_c;
++    uint32_t next_fn;
++} qpu_mc_pred_c_p_t;
++
++typedef struct qpu_mc_pred_c_b_s {
++    qpu_mc_src_t next_src1;
++    uint16_t h;
++    uint16_t w;
++    uint32_t coeffs_x1;
++    uint32_t coeffs_y1;
++    int16_t weight_u1;
++    int16_t weight_v1;
++    qpu_mc_src_t next_src2;
++    uint32_t coeffs_x2;
++    uint32_t coeffs_y2;
++    uint32_t wo_u2;
++    uint32_t wo_v2;
++    qpu_mc_dst_addr_t dst_addr_c;
++    uint32_t next_fn;
++} qpu_mc_pred_c_b_t;
++
++typedef struct qpu_mc_pred_c_s_s {
++    qpu_mc_src_t next_src1;
++    uint32_t pic_cw;            // C Width (== Y width / 2)
++    uint32_t pic_ch;            // C Height (== Y Height / 2)
++    uint32_t stride2;
++    uint32_t stride1;
++    qpu_mc_src_t next_src2;
++    uint32_t next_fn;
++} qpu_mc_pred_c_s_t;
++
++typedef struct qpu_mc_pred_c_s {
++    union {
++        qpu_mc_pred_c_p_t p;
++        qpu_mc_pred_c_b_t b;
++        qpu_mc_pred_c_s_t s;
++    };
++} qpu_mc_pred_c_t;
++
++
++typedef struct qpu_mc_pred_y_p_s {
++    qpu_mc_src_t next_src1;
++    qpu_mc_src_t next_src2;
++    uint16_t h;
++    uint16_t w;
++    uint32_t mymx21;
++    uint32_t wo1;
++    uint32_t wo2;
++    qpu_mc_dst_addr_t dst_addr;
++    uint32_t next_fn;
++} qpu_mc_pred_y_p_t;
++
++typedef struct qpu_mc_pred_y_p00_s {
++    qpu_mc_src_t next_src1;
++    uint16_t h;
++    uint16_t w;
++    uint32_t wo1;
++    qpu_mc_dst_addr_t dst_addr;
++    uint32_t next_fn;
++} qpu_mc_pred_y_p00_t;
++
++typedef struct qpu_mc_pred_y_s_s {
++    qpu_mc_src_t next_src1;
++    qpu_mc_src_t next_src2;
++    uint16_t pic_h;
++    uint16_t pic_w;
++    uint32_t stride2;
++    uint32_t stride1;
++    uint32_t next_fn;
++} qpu_mc_pred_y_s_t;
++
++typedef struct qpu_mc_pred_sync_s {
++    uint32_t next_fn;
++} qpu_mc_pred_sync_t;
++
++// Only a useful structure in that it allows us to return something other than a void *
++typedef struct qpu_mc_pred_y_s {
++    union {
++        qpu_mc_pred_y_p_t p;
++        qpu_mc_pred_y_p00_t p00;
++        qpu_mc_pred_y_s_t s;
++    };
++} qpu_mc_pred_y_t;
++
++typedef union qpu_mc_pred_cmd_u {
++    qpu_mc_pred_y_t y;
++    qpu_mc_pred_c_t c;
++    qpu_mc_pred_sync_t sync;
++} qpu_mc_pred_cmd_t;
++
++static void inline qpu_mc_link_set(qpu_mc_pred_cmd_t * const cmd, const uint32_t fn)
++{
++    // Link is last el of previous cmd
++    ((uint32_t *)cmd)[-1] = fn;
++}
++
++#define QPU_MC_PRED_N_Y8        12
++#define QPU_MC_PRED_N_C8        12
++
++#define QPU_MC_PRED_N_Y10       12
++#define QPU_MC_PRED_N_C10       12
++
++#define QPU_MC_DENOM            7
++
++#pragma pack(pop)
++
++#endif
++
+--- /dev/null
++++ b/libavcodec/rpi_hevc_shader_template.c
+@@ -0,0 +1,88 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*/
++
++#include "hevc.h"
++#include "rpi_hevcdec.h"
++#include "libavutil/rpi_sand_fns.h"
++#include "rpi_hevc_shader_cmd.h"
++#include "rpi_hevc_shader_template.h"
++
++typedef struct shader_track_s
++{
++    const union qpu_mc_pred_cmd_u *qpu_mc_curr;
++    const struct qpu_mc_src_s *last_l0;
++    const struct qpu_mc_src_s *last_l1;
++    uint32_t width;  // pic_width * PW
++    uint32_t height;
++    uint32_t stride2;
++    uint32_t stride1;
++} shader_track_t;
++
++static int wtoidx(const unsigned int w)
++{
++    static const uint8_t pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
++    return pel_weight[w];
++}
++
++static const int fctom(uint32_t x)
++{
++    int rv;
++    // As it happens we can take the 2nd filter term & divide it by 8
++    // (dropping fractions) to get the fractional move
++    rv = 8 - ((x >> 11) & 0xf);
++    av_assert2(rv >= 0 && rv <= 7);
++    return rv;
++}
++
++static inline int32_t ext(int32_t x, unsigned int shl, unsigned int shr)
++{
++    return (x << shl) >> shr;
++}
++
++static inline int woff_p(HEVCRpiContext *const s, int32_t x)
++{
++    return ext(x, 0, 17 + s->ps.sps->bit_depth - 8);
++}
++
++static inline int woff_b(HEVCRpiContext *const s, int32_t x)
++{
++    return ext(x - 0x10000, 0, 16 + s->ps.sps->bit_depth - 8);
++}
++
++static inline int wweight(int32_t x)
++{
++    return ext(x, 16, 16);
++}
++
++
++#define PW 1
++#include "rpi_hevc_shader_template_fn.h"
++
++#undef PW
++#define PW 2
++#include "rpi_hevc_shader_template_fn.h"
++
+--- /dev/null
++++ b/libavcodec/rpi_hevc_shader_template.h
+@@ -0,0 +1,49 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*/
++
++#ifndef LIBAVCODEC_RPI_SHADER_TEMPLATE_H
++#define LIBAVCODEC_RPI_SHADER_TEMPLATE_H
++
++struct HEVCRpiContext;
++struct HEVCRpiInterPredEnv;
++
++void ff_hevc_rpi_shader_c8(struct HEVCRpiContext *const s,
++                  const struct HEVCRpiInterPredEnv *const ipe_y,
++                  const struct HEVCRpiInterPredEnv *const ipe_c);
++
++void ff_hevc_rpi_shader_c16(struct HEVCRpiContext *const s,
++                  const struct HEVCRpiInterPredEnv *const ipe_y,
++                  const struct HEVCRpiInterPredEnv *const ipe_c);
++
++void rpi_sand_dump8(const char * const name,
++                    const uint8_t * const base, const int stride1, const int stride2, int x, int y, int w, int h, const int is_c);
++
++void rpi_sand_dump16(const char * const name,
++                     const uint8_t * const base, const int stride1, const int stride2, int x, int y, int w, int h, const int is_c);
++
++#endif
++
+--- /dev/null
++++ b/libavcodec/rpi_hevc_shader_template_fn.h
+@@ -0,0 +1,502 @@
++/*
++Copyright (c) 2017 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*/
++
++#define STRCAT(x,y) x##y
++
++#if PW == 1
++#define pixel uint8_t
++#define FUNC(f) STRCAT(f, 8)
++#elif PW == 2
++#define pixel uint16_t
++#define FUNC(f) STRCAT(f, 16)
++#else
++#error Unexpected PW
++#endif
++
++#define PATCH_STRIDE (16 * PW)
++
++static void FUNC(dup_lr)(uint8_t * dst, const uint8_t * src, unsigned int w, unsigned int h, unsigned int stride)
++{
++    for (unsigned int i = 0; i != h; ++i, dst += stride, src += stride) {
++        const pixel s = *(const pixel *)src;
++        pixel * d = (pixel *)dst;
++        for (unsigned int j = 0; j < w; j += PW) {
++            *d++ = s;
++        }
++    }
++}
++
++static void FUNC(dup_tb)(uint8_t * dst, const uint8_t * src, unsigned int w, unsigned int h, unsigned int stride)
++{
++    for (unsigned int i = 0; i != h; ++i, dst += stride) {
++        memcpy(dst, src, w);
++    }
++}
++
++static void FUNC(get_patch_y)(const shader_track_t * const st,
++                         uint8_t * dst, const unsigned int dst_stride,
++                         const qpu_mc_src_t *src,
++                         unsigned int _w, unsigned int _h)
++{
++    int x = src->x * PW;
++    int y = src->y;
++    int w = _w * PW;
++    int h = _h;
++    int dl = 0;
++    int dr = 0;
++    int dt = 0;
++    int db = 0;
++
++    if (x < 0) {
++        if (-x >= w)
++            x = PW - w;
++        dl = -x;
++        w += x;
++        x = 0;
++    }
++    if (x + w > st->width) {
++        if (x >= st->width)
++            x = st->width - PW;
++        dr = (x + w) - st->width;
++        w = st->width - x;
++    }
++
++    // Y
++    if (y < 0) {
++        if (-y >= h)
++            y = 1 - h;
++        dt = -y;
++        h += y;
++        y = 0;
++    }
++    if (y + h > st->height) {
++        if (y >= st->height)
++            y = st->height - 1;
++        db = (y + h) - st->height;
++        h = st->height - y;
++    }
++
++    dst += dl + dt * dst_stride;
++    FUNC(av_rpi_sand_to_planar_y)(dst, dst_stride, (const uint8_t *)src->base, st->stride1, st->stride2, x, y, w, h);
++
++    // Edge dup
++    if (dl != 0)
++        FUNC(dup_lr)(dst - dl, dst, dl, h, dst_stride);
++    if (dr != 0)
++        FUNC(dup_lr)(dst + w, dst + w - PW, dr, h, dst_stride);
++    w += dl + dr;
++    dst -= dl;
++
++    if (dt != 0)
++        FUNC(dup_tb)(dst - dt * dst_stride, dst, w, dt, dst_stride);
++    if (db != 0)
++        FUNC(dup_tb)(dst + h * dst_stride, dst + (h - 1) * dst_stride, w, db, dst_stride);
++}
++
++
++
++static void FUNC(get_patch_c)(const shader_track_t * const st,
++                         uint8_t * dst_u, uint8_t * dst_v, const unsigned int dst_stride,
++                         const qpu_mc_src_t *src,
++                         unsigned int _w, unsigned int _h)
++{
++    int x = src->x * PW;
++    int y = src->y;
++    int w = _w * PW;
++    int h = _h;
++    int dl = 0;
++    int dr = 0;
++    int dt = 0;
++    int db = 0;
++    const int width = st->width;
++    const int height = st->height;
++
++    if (x < 0) {
++        if (-x >= w)
++            x = PW - w;
++        dl = -x;
++        w += x;
++        x = 0;
++    }
++    if (x + w > width) {
++        if (x >= width)
++            x = width - PW;
++        dr = (x + w) - width;
++        w = width - x;
++    }
++
++    // Y
++    if (y < 0) {
++        if (-y >= h)
++            y = 1 - h;
++        dt = -y;
++        h += y;
++        y = 0;
++    }
++    if (y + h > height) {
++        if (y >= height)
++            y = height - 1;
++        db = (y + h) - height;
++        h = height - y;
++    }
++
++    dst_u += dl + dt * dst_stride;
++    dst_v += dl + dt * dst_stride;
++    FUNC(av_rpi_sand_to_planar_c)(dst_u, dst_stride, dst_v, dst_stride, (const uint8_t *)src->base, st->stride1, st->stride2, x, y, w, h);
++
++    // Edge dup
++    if (dl != 0)
++    {
++        FUNC(dup_lr)(dst_u - dl, dst_u, dl, h, dst_stride);
++        FUNC(dup_lr)(dst_v - dl, dst_v, dl, h, dst_stride);
++    }
++    if (dr != 0)
++    {
++        FUNC(dup_lr)(dst_u + w, dst_u + w - PW, dr, h, dst_stride);
++        FUNC(dup_lr)(dst_v + w, dst_v + w - PW, dr, h, dst_stride);
++    }
++    w += dl + dr;
++    dst_u -= dl;
++    dst_v -= dl;
++
++    if (dt != 0)
++    {
++        FUNC(dup_tb)(dst_u - dt * dst_stride, dst_u, w, dt, dst_stride);
++        FUNC(dup_tb)(dst_v - dt * dst_stride, dst_v, w, dt, dst_stride);
++    }
++    if (db != 0)
++    {
++        FUNC(dup_tb)(dst_u + h * dst_stride, dst_u + (h - 1) * dst_stride, w, db, dst_stride);
++        FUNC(dup_tb)(dst_v + h * dst_stride, dst_v + (h - 1) * dst_stride, w, db, dst_stride);
++    }
++}
++
++// w, y, w, h in pixels
++// stride1, stride2 in bytes
++void FUNC(rpi_sand_dump)(const char * const name,
++                         const uint8_t * const base, const int stride1, const int stride2, int x, int y, int w, int h, const int is_c)
++{
++    const int mask = stride2 == 0 ? ~0 : stride1 - 1;
++
++    printf("%s (%d,%d) %dx%d\n", name, x, y, w, h);
++
++    if (is_c) {
++        x *= 2;
++        w *= 2;
++    }
++
++    for (int i = y; i != y + h; ++i) {
++        for (int j = x; j != x + w; ++j) {
++            const uint8_t * p = base + ((j*PW) & mask) + i * stride1 + ((j*PW) & ~mask) * stride2;
++            char sep = is_c && (j & 1) == 0 ? ':' : ' ';
++#if PW == 1
++            if (j < 0 || i < 0)
++                printf("..%c", sep);
++            else
++                printf("%02x%c", *(const pixel*)p, sep);
++#else
++            if (j < 0 || i < 0)
++                printf("...%c", sep);
++            else
++                printf("%03x%c", *(const pixel*)p, sep);
++#endif
++        }
++        printf("\n");
++    }
++}
++
++
++void FUNC(ff_hevc_rpi_shader_c)(HEVCRpiContext *const s,
++                  const HEVCRpiInterPredEnv *const ipe_y,
++                  const HEVCRpiInterPredEnv *const ipe_c)
++{
++    for (int c_idx = 0; c_idx < 2; ++c_idx)
++    {
++        const HEVCRpiInterPredEnv *const ipe = c_idx == 0 ? ipe_y : ipe_c;
++        shader_track_t tracka[QPU_N_MAX] = {{NULL}};
++        unsigned int exit_n = 0;
++
++        if (ipe == NULL || !ipe->used) {
++            continue;
++        }
++
++        do {
++            for (unsigned int i = 0; i != ipe->n; ++i) {
++                const HEVCRpiInterPredQ * const q = ipe->q + i;
++                shader_track_t * const st = tracka + i;
++                const qpu_mc_pred_cmd_t * cmd = st->qpu_mc_curr == NULL ? q->qpu_mc_base : st->qpu_mc_curr;
++
++                for (;;) {
++                    const uint32_t link = (cmd == q->qpu_mc_base) ? q->code_setup : ((uint32_t *)cmd)[-1];
++
++                    if (link == q->code_setup) {
++                        if (c_idx == 0) {
++                            // Luma
++                            const qpu_mc_pred_y_s_t *const c = &cmd->y.s;
++
++                            st->height = c->pic_h;
++                            st->width = c->pic_w * PW;
++                            st->stride1 = c->stride1;
++                            st->stride2 = c->stride2;
++                            st->last_l0 = &c->next_src1;
++                            st->last_l1 = &c->next_src2;
++                            cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                        }
++                        else {
++                            // Chroma
++                            const qpu_mc_pred_c_s_t *const c = &cmd->c.s;
++
++                            st->height = c->pic_ch;
++                            st->width = c->pic_cw * PW;
++                            st->stride1 = c->stride1;
++                            st->stride2 = c->stride2;
++                            st->last_l0 = &c->next_src1;
++                            st->last_l1 = &c->next_src2;
++                            cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                        }
++                    }
++                    else if (link == s->qpu.y_pxx) {
++                        const qpu_mc_pred_y_p_t *const c = &cmd->y.p;
++                        const int w1 = FFMIN(c->w, 8);
++                        const int w2 = c->w - w1;
++
++                        uint8_t patch_y1[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        uint8_t patch_y2[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++
++                        FUNC(get_patch_y)(st,
++                                    patch_y1, PATCH_STRIDE,
++                                    st->last_l0,
++                                    16, c->h + 7);
++                        if (w2 > 0) {
++                            FUNC(get_patch_y)(st,
++                                        patch_y2, PATCH_STRIDE,
++                                        st->last_l1,
++                                        16, c->h + 7);
++                        }
++
++                        // wo[offset] = offset*2+1
++                        s->hevcdsp.put_hevc_qpel_uni_w[wtoidx(w1)][(c->mymx21 & 0xff00) != 0][(c->mymx21 & 0xff) != 0](
++                            (uint8_t *)c->dst_addr, st->stride1, patch_y1 + 3 * (PATCH_STRIDE + PW), PATCH_STRIDE,
++                            c->h, QPU_MC_DENOM, wweight(c->wo1), woff_p(s, c->wo1), (c->mymx21 & 0xff), ((c->mymx21 >> 8) & 0xff), w1);
++                        if (w2 > 0) {
++                            s->hevcdsp.put_hevc_qpel_uni_w[wtoidx(w2)][(c->mymx21 & 0xff000000) != 0][(c->mymx21 & 0xff0000) != 0](
++                                (uint8_t *)c->dst_addr + 8 * PW, st->stride1, patch_y2 + 3 * (PATCH_STRIDE + PW), PATCH_STRIDE,
++                                c->h, QPU_MC_DENOM, wweight(c->wo2), woff_p(s, c->wo2), ((c->mymx21 >> 16) & 0xff), ((c->mymx21 >> 24) & 0xff), w2);
++                        }
++                        st->last_l0 = &c->next_src1;
++                        st->last_l1 = &c->next_src2;
++                        cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                    }
++                    else if (link == s->qpu.y_bxx) {
++                        const qpu_mc_pred_y_p_t *const c = &cmd->y.p;
++
++                        uint8_t patch_y1[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        uint8_t patch_y2[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        int16_t patch_y3[MAX_PB_SIZE * MAX_PB_SIZE];
++
++                        FUNC(get_patch_y)(st,
++                                    patch_y1, PATCH_STRIDE,
++                                    st->last_l0,
++                                    16, c->h + 7);
++                        FUNC(get_patch_y)(st,
++                                    patch_y2, PATCH_STRIDE,
++                                    st->last_l1,
++                                    16, c->h + 7);
++
++                        s->hevcdsp.put_hevc_qpel[wtoidx(c->w)][(c->mymx21 & 0xff00) != 0][(c->mymx21 & 0xff) != 0](
++                           patch_y3, patch_y1+ 3 * (PATCH_STRIDE + PW), PATCH_STRIDE,
++                           c->h, (c->mymx21 & 0xff), ((c->mymx21 >> 8) & 0xff), c->w);
++
++                        s->hevcdsp.put_hevc_qpel_bi_w[wtoidx(c->w)][(c->mymx21 & 0xff000000) != 0][(c->mymx21 & 0xff0000) != 0](
++                            (uint8_t *)c->dst_addr, st->stride1, patch_y2 + 3 * (PATCH_STRIDE + PW), PATCH_STRIDE, patch_y3,
++                            c->h, QPU_MC_DENOM, wweight(c->wo1), wweight(c->wo2),
++                            0, woff_b(s, c->wo2), ((c->mymx21 >> 16) & 0xff), ((c->mymx21 >> 24) & 0xff), c->w);
++                        st->last_l0 = &c->next_src1;
++                        st->last_l1 = &c->next_src2;
++                        cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                    }
++                    else if (link == s->qpu.y_p00) {
++                        const qpu_mc_pred_y_p00_t *const c = &cmd->y.p00;
++
++                        uint8_t patch_y1[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++
++                        FUNC(get_patch_y)(st,
++                                    patch_y1, PATCH_STRIDE,
++                                    st->last_l0,
++                                    16, c->h + 7);
++
++                        // wo[offset] = offset*2+1
++                        s->hevcdsp.put_hevc_qpel_uni_w[wtoidx(c->w)][0][0](
++                            (uint8_t *)c->dst_addr, st->stride1, patch_y1, PATCH_STRIDE,
++                            c->h, QPU_MC_DENOM, wweight(c->wo1), woff_p(s, c->wo1), 0, 0, c->w);
++
++                        st->last_l0 = &c->next_src1;
++                        cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                    }
++                    else if (link == s->qpu.y_b00) {
++                        const qpu_mc_pred_y_p_t *const c = &cmd->y.p;
++
++                        uint8_t patch_y1[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        uint8_t patch_y2[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        int16_t patch_y3[MAX_PB_SIZE * MAX_PB_SIZE];
++
++                        av_assert0(c->w <= 16 && c->h <= 64);
++
++                        FUNC(get_patch_y)(st,
++                                    patch_y1, PATCH_STRIDE,
++                                    st->last_l0,
++                                    16, c->h);
++                        FUNC(get_patch_y)(st,
++                                    patch_y2, PATCH_STRIDE,
++                                    st->last_l1,
++                                    16, c->h);
++
++                        s->hevcdsp.put_hevc_qpel[wtoidx(c->w)][0][0](
++                           patch_y3, patch_y1, PATCH_STRIDE,
++                           c->h, 0, 0, c->w);
++
++                        s->hevcdsp.put_hevc_qpel_bi_w[wtoidx(c->w)][0][0](
++                            (uint8_t *)c->dst_addr, st->stride1, patch_y2, PATCH_STRIDE, patch_y3,
++                            c->h, QPU_MC_DENOM, wweight(c->wo1), wweight(c->wo2),
++                            0, woff_b(s, c->wo2), 0, 0, c->w);
++                        st->last_l0 = &c->next_src1;
++                        st->last_l1 = &c->next_src2;
++                        cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                    }
++                    else if (link == s->qpu.c_pxx) {
++                        const qpu_mc_pred_c_p_t *const c = &cmd->c.p;
++                        const int mx = fctom(c->coeffs_x);
++                        const int my = fctom(c->coeffs_y);
++
++                        uint8_t patch_u1[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        uint8_t patch_v1[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        uint8_t patch_u3[8 * 16 * PW];
++                        uint8_t patch_v3[8 * 16 * PW];
++
++                        FUNC(get_patch_c)(st, patch_u1, patch_v1, PATCH_STRIDE, st->last_l0, 8+3, c->h + 3);
++
++                        s->hevcdsp.put_hevc_epel_uni_w[wtoidx(c->w)][my != 0][mx != 0](
++                            patch_u3, 8 * PW, patch_u1 + PATCH_STRIDE + PW, PATCH_STRIDE,
++                            c->h, QPU_MC_DENOM, wweight(c->wo_u), woff_p(s, c->wo_u), mx, my, c->w);
++                        s->hevcdsp.put_hevc_epel_uni_w[wtoidx(c->w)][my != 0][mx != 0](
++                            patch_v3, 8 * PW, patch_v1 + PATCH_STRIDE + PW, PATCH_STRIDE,
++                            c->h, QPU_MC_DENOM, wweight(c->wo_v), woff_p(s, c->wo_v), mx, my, c->w);
++
++                        FUNC(av_rpi_planar_to_sand_c)((uint8_t *)c->dst_addr_c, st->stride1, st->stride2, patch_u3, 8 * PW, patch_v3, 8 * PW, 0, 0, c->w * PW, c->h);
++
++                        st->last_l0 = &c->next_src;
++                        cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                    }
++                    else if (link == s->qpu.c_pxx_l1) {
++                        const qpu_mc_pred_c_p_t *const c = &cmd->c.p;
++                        const int mx = fctom(c->coeffs_x);
++                        const int my = fctom(c->coeffs_y);
++
++                        uint8_t patch_u1[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        uint8_t patch_v1[PATCH_STRIDE * 72]; // (Max width + 8) * (max height + 8)
++                        uint8_t patch_u3[8 * 16 * PW];
++                        uint8_t patch_v3[8 * 16 * PW];
++
++                        FUNC(get_patch_c)(st, patch_u1, patch_v1, PATCH_STRIDE, st->last_l1, 8+3, c->h + 3);
++
++                        s->hevcdsp.put_hevc_epel_uni_w[wtoidx(c->w)][my != 0][mx != 0](
++                            patch_u3, 8 * PW, patch_u1 + PATCH_STRIDE + PW, PATCH_STRIDE,
++                            c->h, QPU_MC_DENOM, wweight(c->wo_u), woff_p(s, c->wo_u), mx, my, c->w);
++                        s->hevcdsp.put_hevc_epel_uni_w[wtoidx(c->w)][my != 0][mx != 0](
++                            patch_v3, 8 * PW, patch_v1 + PATCH_STRIDE + PW, PATCH_STRIDE,
++                            c->h, QPU_MC_DENOM, wweight(c->wo_v), woff_p(s, c->wo_v), mx, my, c->w);
++
++                        FUNC(av_rpi_planar_to_sand_c)((uint8_t *)c->dst_addr_c, st->stride1, st->stride2, patch_u3, 8 * PW, patch_v3, 8 * PW, 0, 0, c->w * PW, c->h);
++
++                        st->last_l1 = &c->next_src;
++                        cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                    }
++                    else if (link == s->qpu.c_bxx) {
++                        const qpu_mc_pred_c_b_t *const c = &cmd->c.b;
++                        const int mx1 = fctom(c->coeffs_x1);
++                        const int my1 = fctom(c->coeffs_y1);
++                        const int mx2 = fctom(c->coeffs_x2);
++                        const int my2 = fctom(c->coeffs_y2);
++
++                        uint8_t patch_u1[PATCH_STRIDE * 72];
++                        uint8_t patch_v1[PATCH_STRIDE * 72];
++                        uint8_t patch_u2[PATCH_STRIDE * 72];
++                        uint8_t patch_v2[PATCH_STRIDE * 72];
++                        uint8_t patch_u3[8 * 16 * PW];
++                        uint8_t patch_v3[8 * 16 * PW];
++                        uint16_t patch_u4[MAX_PB_SIZE * MAX_PB_SIZE];
++                        uint16_t patch_v4[MAX_PB_SIZE * MAX_PB_SIZE];
++
++                        FUNC(get_patch_c)(st, patch_u1, patch_v1, PATCH_STRIDE, st->last_l0, 8+3, c->h + 3);
++                        FUNC(get_patch_c)(st, patch_u2, patch_v2, PATCH_STRIDE, st->last_l1, 8+3, c->h + 3);
++
++                        s->hevcdsp.put_hevc_epel[wtoidx(c->w)][my1 != 0][mx1 != 0](
++                           patch_u4, patch_u1 + PATCH_STRIDE + PW, PATCH_STRIDE,
++                           c->h, mx1, my1, c->w);
++                        s->hevcdsp.put_hevc_epel[wtoidx(c->w)][my1 != 0][mx1 != 0](
++                           patch_v4, patch_v1 + PATCH_STRIDE + PW, PATCH_STRIDE,
++                           c->h, mx1, my1, c->w);
++
++                        s->hevcdsp.put_hevc_epel_bi_w[wtoidx(c->w)][my2 != 0][mx2 != 0](
++                            patch_u3, 8 * PW, patch_u2 + PATCH_STRIDE + PW, PATCH_STRIDE, patch_u4,
++                            c->h, QPU_MC_DENOM, c->weight_u1, wweight(c->wo_u2),
++                            0, woff_b(s, c->wo_u2), mx2, my2, c->w);
++                        s->hevcdsp.put_hevc_epel_bi_w[wtoidx(c->w)][my2 != 0][mx2 != 0](
++                            patch_v3, 8 * PW, patch_v2 + PATCH_STRIDE + PW, PATCH_STRIDE, patch_v4,
++                            c->h, QPU_MC_DENOM, c->weight_v1, wweight(c->wo_v2),
++                            0, woff_b(s, c->wo_v2), mx2, my2, c->w);
++
++                        FUNC(av_rpi_planar_to_sand_c)((uint8_t *)c->dst_addr_c, st->stride1, st->stride2, patch_u3, 8 * PW, patch_v3, 8 * PW, 0, 0, c->w * PW, c->h);
++
++                        st->last_l0 = &c->next_src1;
++                        st->last_l1 = &c->next_src2;
++                        cmd = (const qpu_mc_pred_cmd_t *)(c + 1);
++                    }
++                    else if (link == q->code_sync) {
++                        cmd = (const qpu_mc_pred_cmd_t *)((uint32_t *)cmd + 1);
++                        break;
++                    }
++                    else if (link == q->code_exit) {
++                        // We expect exit to occur without other sync
++                        av_assert0(i == exit_n);
++                        ++exit_n;
++                        break;
++                    }
++                    else {
++                        av_assert0(0);
++                    }
++                }
++
++                st->qpu_mc_curr = cmd;
++            }
++        } while (exit_n == 0);
++    }
++}
++
++#undef FUNC
++#undef pixel
++
+--- /dev/null
++++ b/libavcodec/rpi_hevc_transform.s
+@@ -0,0 +1,444 @@
++# ******************************************************************************
++# Argon Design Ltd.
++# (c) Copyright 2015 Argon Design Ltd. All rights reserved.
++#
++# Module : HEVC
++# Author : Peter de Rivaz
++# ******************************************************************************
++
++# USE_STACK = 1 means temporary data stored on the stack (requires build with larger stack)
++# USE_STACK = 0 means temporary data stored in fixed per-VPU data buffers (requires modifications to vasm to handle instruction encoding for PC relative instructions)
++.set USE_STACK, 0
++
++# Lines that fail to assemble start with #:
++# The script insert_magic_opcodes.sh inserts the machine code directly for these.
++# HEVC VPU Transform
++#
++# Transform matrix can be thought of as
++#   output row vector = input row vector * transMatrix2
++#
++# The even rows of the matrix are symmetric
++# The odd rows of the matrix are antisymmetric
++#
++# So only need to compute the first half of the results, then can compute the remainder with a butterfly
++#
++# EXAMPLE
++#   (a b c d) (1 2  2  1)
++#             (3 4 -4 -3)
++#             (5 6  6  5)
++#             (7 8 -8 -7)
++#
++#  x=(a c)(1 2) = 1a+5c 2a+6c
++#         (5 6)
++#
++#  y=(b d)(3 4) = 3b+7d 4b+8d
++#         (7 8)
++#
++#  u=x+y = 1a+5c+3b+7d 2a+4b+6c+8d
++#  v=x-y = 1a+5c-3b-7d 2a+6c-4b-8d
++#
++#  Final results are (u , v[::-1])
++#
++#
++#  For 32x1 input, load even rows into HX(0++,0), odd rows into HX(16++,0)
++#  Apply the even matrix first and stop before rounding
++#  Then apply the odd matrix in a full manner:
++#
++#   First step is to compute partial products with the first input (16 cycles)
++#   1a 3b 5c 7d   16x1 input coefficients produce 16x16 output
++#   2a 4b 6c 8d
++#   2a -4b 6c -8d
++#   1a -3b 5c -7d
++#
++#   Second step is to sum partial products into final position (8 cycles)
++#   1a+3b+5c+7d
++#   2a+4b+6c+8d
++#   2a-4b+6c-8d
++#   1a-3b+5c-7d
++#
++#   Then can apply butterfly to combine even results and odd results + rounding to produce 16 rows of output at a time (need to save in transposed format)
++#
++#   For 16x16 no butterfly is required and can store final results in original location  (Could do 2 16x16s in parallel to make use of the trick - saves on the adds)
++#
++#   For 8x8 we could compute two in parallel.
++#
++#
++
++# Columns are transformed first
++#
++# Store top left half of transMatrix2 in
++# Store bottom left half of transMatrix2 in HX(32,32)
++#
++# For 16x16
++# HX(0:15,0) contains input data before transform
++# HY(0:15,0) contains 32bit output data after transform
++# HX(32,0) contains even rows of left half of transMatrix2
++# HX(32,32) contains odd rows of left half of transMatrix2
++# HY(48,0) contains partial products ready for summing
++#
++
++
++# hevc_trans_16x16(short *transMatrix2, short *coeffs, int num) # TODO add size so we can branch to correct implementation (or perhaps have coeffs32 and num32 as secondary inputs!)
++# transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory)
++# coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory)
++# num: number of 16x16 transforms to be done
++# coeffs32
++# num32: number of 32x32 transforms
++# command 0 for transform, 1 for memclear16(int16_t *dst,num16)
++#
++
++.equ TRANS_SHIFT, 20 - BIT_DEPTH
++.equ TRANS_RND2, 1 << (TRANS_SHIFT - 1)
++.equ TRANS_ASL2, 16 - TRANS_SHIFT
++
++
++hevc_trans_16x16:
++  push r6-r15, lr # TODO cut down number of used registers
++  mov r14,r3 # coeffs32
++  mov r15,r4 # num32
++  mov r3, 16*2 # Stride of transMatrix2 in bytes
++  vldh HX(32++,0),(r0 += r3) REP 16 # This is the 16x16 matrix, a transform is equivalent to multiplying input row vector * matrix
++
++  add r0, 16*16*2 # For 32x32 transforms we also need this matrix
++  vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix
++
++  # Now use r0 to describe which matrix we are working on.
++  # Allows us to prefetch the next block of coefficients for efficiency.
++  mov r0,0 # This describes the location where we read our coefficients from
++  mov r3,16*2 # Stride of coefficients in bytes (TODO remove)
++  mov r7,16*16*2 # Total block size
++  mov r8,64*16 # Value used to swap from current to next VRF location
++  mov r4,64 # Constant used for rounding first pass
++  mov r5,TRANS_RND2 # Constant used for rounding second pass
++
++  sub sp,sp,64+16*16*2 # Move on stack pointer in case interrupt occurs and uses stack
++
++  add r11,sp,64 # Space for 32 bytes before, and rounding
++  lsr r11,5
++  lsl r11,5 # Make sure r11 is rounded to multiple of 2**5==32
++
++  lsr r10, r2, 16 # Number of compressed blocks stored in top short
++  extu r2,16
++  # At start of block r0,r1 point to the current block (that has already been loaded)
++  # r0 VRF location of current block
++  # r1 address of current block
++  # r2 number of 16*16 transforms to do
++  # r3 Stride of coefficients (==32)
++  # r4 TRANS_RND1 (64)
++  # r5 TRANS_RND2
++  # r6 temporary used inside col_trans16
++  # r7 16*16*2 total bytes in block
++  # r8 64*16 VRF switch locations
++  # r9 temporary in unpack_coeff for index
++  # r10 number of 16x16 transforms using compression
++  # r11 unpacked data buffer (16*16 shorts) (preceded by 16 shorts of packed data buffer)
++  # r12 temporary counter in unpack_coeff
++  # r13
++  # r14 Save information for 32 bit transform (coeffs location)
++  # r15 Save information for 32 bit transform (number of transforms)
++  cmp r2,0
++  beq done16x16s
++block_loop:
++  # With compressed coefficients, we don't use prefetch as we don't want to issue unnecessary memory requests
++  cmp r10,0
++  mov r6, r1
++  beq not_compressed
++  sub r10, 1
++  bl unpack16x16
++not_compressed:
++  #mov r6,r1 # DEBUG without compress
++  vldh HX(0++,0)+r0,(r6 += r3) REP 16
++  #eor r0,r8
++  #add r1,r7
++  # Prefetch the next block
++  #bl unpack16x16
++  #vldh HX(0++,0)+r0,(r6 += r3) REP 16
++  #vmov HX(0++,0)+r0,0 REP 16  # DEBUG
++  #eor r0,r8
++  #sub r1,r7
++
++  # Transform the current block
++  bl col_trans_16
++  vadd HY(0++,0)+r0,HY(0++,0)+r0,r4 REP 16   # Now add on rounding, shift down by 7, and saturate
++  #vsasls HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16 # 9+7=16 so this ends up with the output saturated and in the top half of the word.
++  vasl HY(0++,0)+r0,HY(0++,0)+r0,9 REP 16    # This should be saturating, but the instruction above does not assemble?
++  vmov VX(0,0++)+r0, HX(0++,32)+r0 REP 16    # For simplicity transpose this back to the original position
++
++  bl col_trans_16
++  vadd HY(0++,0)+r0,HY(0++,0)+r0,r5 REP 16   # Now add on rounding, shift down by 7, and saturate
++  #vsasls HY(0++,0)+r0,HY(0++,0)+r0,4 REP 16 # 4+12=16 so this ends up with the output saturated and in the top half of the word.
++  vasl HY(0++,0)+r0,HY(0++,0)+r0,TRANS_ASL2 REP 16    # This should be saturating, but the instruction above does not assemble?  (Probably because it ends with ls which is interpreted as a condition flag)
++
++  # Save results - note there has been a transposition during the processing so we save columns
++  vsth VX(0,32++)+r0, (r1 += r3) REP 16
++
++  # Move onto next block
++  eor r0,r8
++  add r1,r7
++
++  addcmpbgt r2,-1,0,block_loop
++done16x16s:
++
++  add sp,sp,64+16*16*2 # Move on stack pointer in case interrupt occurs and uses stack
++  # Now go and do any 32x32 transforms
++  b hevc_trans_32x32
++
++  pop r6-r15, pc
++# This returns a value in r6 that says where to load the data from.
++# We load data 16 shorts at a time from memory (uncached), and store to stack space to allow us to process it.
++unpack16x16:
++# Clear out destination
++  vmov HX(0,0)+r0,0
++  mov r6, r11
++  vsth HX(0,0)+r0,(r6 += r3) REP 16
++  mov r5, r1 # Moving pointer to input coefficients
++unpack_outer_loop:
++  # Loop until we find the end
++  vldh HX(0,0)+r0,(r5)  # TODO would prefetch help here while unpacking previous?
++  sub r6,r11,32
++  #add r6,pc,packed_data-$ # Packed data
++  vsth HX(0,0)+r0,(r6)  # Store into packed data
++  mov r12,0
++unpack_loop:
++  ld r4,(r6)
++  add r6,r6,4
++  lsr r9,r4,16 # r9 is destination value
++  cmp r4,0 # {value,index}
++  extu r4,8
++  beq done_unpack
++  sth r9,(r11, r4)
++  addcmpblt r12,1,8,unpack_loop
++#  # Read next 16
++  add r5,32
++  b unpack_outer_loop
++done_unpack:
++#  # Set new load location
++  mov r6, r11
++  #add r6,pc,unpacked_data-$
++#  # Restore constants
++  mov r4,64
++  mov r5,TRANS_RND2
++#  pop r6-r15, pc
++  b lr
++
++# r1,r2,r3 r7,r8 should be preserved
++# HX(0++,0)+r0 is the block to be transformed
++# HX(32++,0)+r6 is the 16x16 matrix of transform coefficients
++# Use HY(48,0) for intermediate results
++# r0 can be used, but should be returned to its original value at the end
++col_trans_16:
++  add r6,r0,16 # Final value for this loop
++col_trans_16_loop:
++  # First compute partial products for a single column
++  vmul32s HY(48++,0), VX(0,0)+r0, VX(32,0++) REP 16
++  # Then sum up the results and place back
++  vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC
++  addcmpblt r0,1,r6,col_trans_16_loop
++  sub r0,16  # put r0 back to its original value
++  b lr
++
++col_trans_odd_16:
++  add r6,r0,16 # Final value for this loop
++col_trans_odd_16_loop:
++  # First compute partial products for a single column
++  vmul32s HY(48++,0), VX(0,0)+r0, VX(32,32++) REP 16
++  # Then sum up the results and place back
++  vadd VY(0,0)+r0, VY(48,0++), VY(48,8++) REP 8 CLRA SACC
++  addcmpblt r0,1,r6,col_trans_odd_16_loop
++  sub r0,16  # put r0 back to its original value
++  b lr
++
++# r1/r10 input pointer
++# r0,r4,r5,r6 free
++# r8/r9 output storage
++#
++# Store packed coefficients at r9-32
++# Store unpacked at r9+32*32 (because transform works on even/odd rows on input, but writes all rows)
++unpack32x32:
++# Clear out destination
++  vmov HX(0,0),0
++  add r0, r9, 32*32*2 # Unpacked buffer
++  mov r4, 32
++  vsth HX(0,0),(r0 += r4) REP 64
++unpack_outer_loop32:
++  # Loop until we find the end
++  vldh HX(0,0),(r1)  # TODO would prefetch help here while unpacking previous?
++  sub r6,r9,32
++  #add r6,pc,packed_data-$ # Packed data
++  vsth HX(0,0),(r6)  # Store into packed data
++  mov r8,0
++unpack_loop32:
++  ld r4,(r6)
++  add r6,r6,4
++  lsr r5,r4,16 # r5 is destination value
++  cmp r4,0 # {value,index}
++  extu r4,10
++  beq done_unpack
++  sth r5,(r0, r4)
++  addcmpblt r8,1,8,unpack_loop32
++#  # Read next 16
++  add r1,32
++  b unpack_outer_loop32
++done_unpack32:
++  b lr
++# hevc_trans_32x32(short *transMatrix2, short *coeffs, int num)
++# transMatrix2: address of the constant matrix (must be at 32 byte aligned address in Videocore memory) Even followed by odd
++# coeffs: address of the transform coefficients (must be at 32 byte aligned address in Videocore memory)
++# num: number of 16x16 transforms to be done in low 16, number of packed in high 16
++#
++# Note that the 32x32 transforms are stored in reverse order, this means that the unpacked ones appear first!
++hevc_trans_32x32:
++  mov r1,r14 # coeffs
++  mov r2,r15 # num
++  lsr r15,r15,16 # Number that are packed
++  extu r2,16 # Total number
++
++  # Fetch odd transform matrix
++  #mov r3, 16*2 # Stride of transMatrix2 in bytes (and of coefficients)
++  #vldh HX(32++,0),(r0 += r3) REP 16 # This is the even 16x16 matrix
++  #add r0, 16*16*2
++  #vldh HX(32++,32),(r0 += r3) REP 16 # This is the odd 16x16 matrix
++
++  mov r3, 32*2*2 # Stride used to fetch alternate rows of our input coefficient buffer
++  mov r7, 16*16*2 # Total block size
++
++.if USE_STACK
++  # Stack base allocation
++  sub sp,sp,32*32*4+64 # Allocate some space on the stack for us to store 32*32 shorts as temporary results (needs to be aligned) and another 32*32 for unpacking
++  # set r8 to 32byte aligned stack pointer with 32 bytes of space before it
++  add r8,sp,63
++  lsr r8,5
++  lsl r8,5
++.else
++#:version r8
++  .half 0x00e8 #AUTOINSERTED
++  btst r8,16
++#:add r8,pc,intermediate_results-$
++  .half 0xbfe8
++  .half intermediate_results-($-2)
++  beq on_vpu1
++  add r8,r8,32*32*2*2+16*2 # Move to secondary storage
++on_vpu1:
++.endif
++  mov r9,r8  # Backup of the temporary storage
++  mov r10,r1 # Backup of the coefficient buffer
++
++  cmp r2,0
++  beq done32x32s
++block_loop32:
++
++  # Transform the first 16 columns
++  mov r1,r10  # Input Coefficient buffer
++  mov r8,r9   # Output temporary storage
++  # Unpacked are first, so need to only do unpacking when r2(=num left) <= r15 (=num packed)
++  cmp r2,r15
++  bgt not_compressed_32
++  bl unpack32x32
++  add r1,r9,32*32*2   # Uncompressed into temporary storage
++  mov r8,r9           # Transform into here
++not_compressed_32:
++  # COLUMN TRANSFORM
++  mov r4, 64 # Constant used for rounding first pass
++  mov r5, 9 # left shift used for rounding first pass
++
++  bl trans32
++  # Transform the second 16 columns
++  add r8,32*16*2
++  add r1,32
++  bl trans32
++
++  # ROW TRANSFORM
++  mov r4, TRANS_RND2 # Constant used for rounding second pass
++  mov r5, TRANS_ASL2 # left shift used for rounding second pass
++
++  mov r1,r9  # Input temporary storage
++  mov r8,r10   # Output Coefficient buffer
++  bl trans32
++  # Transform the second 16 columns
++  add r8,32*16*2
++  add r1,32
++  bl trans32
++
++  add r10, 32*32*2 # move onto next block of coefficients
++  addcmpbgt r2,-1,0,block_loop32
++done32x32s:
++
++.if USE_STACK
++  add sp,sp,32*32*4+64# Restore stack
++.endif
++
++  pop r6-r15, pc
++
++trans32:
++  push lr
++  # We can no longer afford the VRF space to do prefetching when doing 32x32
++  # Fetch the even rows
++  vldh HX(0++,0),(r1 += r3) REP 16
++  # Fetch the odd rows
++  vldh HX(16++,0),64(r1 += r3) REP 16 # First odd row is 32 shorts ahead of r1
++
++  # Transform the even rows using even matrix
++  mov r0, 0 # Even rows
++  bl col_trans_16
++
++  # Now transform the odd rows using odd matrix
++  mov r0, 64*16 # Odd rows
++  bl col_trans_odd_16
++
++  # Now apply butterfly to compute the first 16 results
++  vadd HY(48++,0),HY(0++,0),HY(16++,0) REP 16
++  vadd HY(48++,0),HY(48++,0),r4 REP 16   # add on rounding,
++  vasl HY(48++,0),HY(48++,0),r5 REP 16    # shift down by 7, and saturate
++  # 16bit results now in HX(48,32)
++  mov r0,r8
++  mov r6,32*2
++  vsth VX(48,32++),(r0+=r6) REP 16
++
++  # Now apply butterfly to compute the second 16 results (in reverse order)
++  vsub HY(63,0),HY(0 ,0),HY(16,0)
++  vsub HY(62,0),HY(1 ,0),HY(17,0)
++  vsub HY(61,0),HY(2 ,0),HY(18,0)
++  vsub HY(60,0),HY(3 ,0),HY(19,0)
++  vsub HY(59,0),HY(4 ,0),HY(20,0)
++  vsub HY(58,0),HY(5 ,0),HY(21,0)
++  vsub HY(57,0),HY(6 ,0),HY(22,0)
++  vsub HY(56,0),HY(7 ,0),HY(23,0)
++  vsub HY(55,0),HY(8 ,0),HY(24,0)
++  vsub HY(54,0),HY(9 ,0),HY(25,0)
++  vsub HY(53,0),HY(10,0),HY(26,0)
++  vsub HY(52,0),HY(11,0),HY(27,0)
++  vsub HY(51,0),HY(12,0),HY(28,0)
++  vsub HY(50,0),HY(13,0),HY(29,0)
++  vsub HY(49,0),HY(14,0),HY(30,0)
++  vsub HY(48,0),HY(15,0),HY(31,0)
++  vadd HY(48++,0),HY(48++,0),r4 REP 16   # add on rounding,
++  vasl HY(48++,0),HY(48++,0),r5 REP 16    # shift down by 7, and saturate
++  add r0,r8,32
++  vsth VX(48,32++),(r0+=r6) REP 16
++  pop pc
++
++.if USE_STACK == 0
++  .balign 32
++
++# .space directives generate 0's in the bin so avoid unnecessary padding by
++# just setting to appropriate value
++.equ intermediate_results, $+16*2
++
++# Layout goes:
++#
++#packed_buffer:
++#  .space 16*2
++#intermediate_results:
++#  .space 32*32*2
++#unpacked_buffer:
++#  .space 32*32*2
++#
++#packed_buffer2:
++#  .space 16*2
++#intermediate_results2:
++#  .space 32*32*2
++#unpacked_buffer2:
++#  .space 32*32*2
++.endif
++
++
+--- /dev/null
++++ b/libavcodec/rpi_hevc_transform10.h
+@@ -0,0 +1,94 @@
++static const unsigned char rpi_hevc_transform10 [] = {
++0xa9,  0x03,  0x3e,  0x40,  0x4f,  0x40,  0x03,  0xb0,   // 0000
++0x20,  0x00,  0x0c,  0xf8,  0x38,  0x88,  0x80,  0x03,   // 0008
++0xc0,  0xf8,  0x00,  0x00,  0x40,  0xb0,  0x00,  0x02,   // 0010
++0x0c,  0xf8,  0x38,  0xa8,  0x80,  0x03,  0xc0,  0xf8,   // 0018
++0x00,  0x00,  0x00,  0x60,  0x03,  0xb0,  0x20,  0x00,   // 0020
++0x07,  0xb0,  0x00,  0x02,  0x08,  0xb0,  0x00,  0x04,   // 0028
++0x04,  0xb0,  0x40,  0x00,  0x05,  0xb0,  0x00,  0x02,   // 0030
++0x59,  0xb0,  0xc0,  0xfd,  0x0b,  0x12,  0x5b,  0x7a,   // 0038
++0x5b,  0x7c,  0x4a,  0xc3,  0x50,  0x17,  0x02,  0x6f,   // 0040
++0x02,  0x6a,  0x32,  0x18,  0x0a,  0x6a,  0x16,  0x40,   // 0048
++0x04,  0x18,  0x1a,  0x66,  0x80,  0x90,  0x32,  0x00,   // 0050
++0x0c,  0xf8,  0x38,  0x80,  0x80,  0x03,  0xc0,  0x08,   // 0058
++0x18,  0x00,  0x80,  0x90,  0x51,  0x00,  0x04,  0xff,   // 0060
++0x30,  0xc0,  0x80,  0x03,  0x20,  0x08,  0x10,  0x00,   // 0068
++0x4c,  0xfe,  0x30,  0xc0,  0x09,  0x04,  0x20,  0x08,   // 0070
++0x00,  0x00,  0x04,  0xfc,  0x38,  0x90,  0x80,  0x02,   // 0078
++0xc0,  0x0b,  0x02,  0x00,  0x80,  0x90,  0x40,  0x00,   // 0080
++0x04,  0xff,  0x30,  0xc0,  0x80,  0x03,  0x20,  0x08,   // 0088
++0x14,  0x00,  0x4c,  0xfe,  0x30,  0xc0,  0x06,  0x04,   // 0090
++0x20,  0x08,  0x00,  0x00,  0x8c,  0xf8,  0x2c,  0xe0,   // 0098
++0x80,  0x03,  0x20,  0x30,  0x04,  0x00,  0x80,  0x45,   // 00a0
++0x71,  0x42,  0xf2,  0x8c,  0xd1,  0xc0,  0x59,  0xb0,   // 00a8
++0x40,  0x02,  0x00,  0x9e,  0x6d,  0x00,  0x29,  0x03,   // 00b0
++0x00,  0xf4,  0x38,  0x80,  0x00,  0x0c,  0xb6,  0x40,   // 00b8
++0x8c,  0xf8,  0x20,  0xe0,  0x80,  0x03,  0x00,  0x30,   // 00c0
++0x18,  0x00,  0x15,  0x40,  0x08,  0xf0,  0x38,  0x80,   // 00c8
++0x85,  0x0b,  0x66,  0xb5,  0xe0,  0xff,  0x88,  0xf0,   // 00d0
++0x24,  0xe0,  0x86,  0x03,  0x0c,  0x60,  0x64,  0x08,   // 00d8
++0x46,  0x62,  0x49,  0xc3,  0x50,  0x27,  0x04,  0x6a,   // 00e0
++0x84,  0x6e,  0x07,  0x18,  0x69,  0xa0,  0x04,  0x5f,   // 00e8
++0x1c,  0x8b,  0xf7,  0xc8,  0x45,  0x76,  0x6b,  0x1f,   // 00f0
++0xb6,  0x40,  0x04,  0xb0,  0x40,  0x00,  0x05,  0xb0,   // 00f8
++0x00,  0x02,  0x5a,  0x00,  0x06,  0xb4,  0x10,  0x00,   // 0100
++0xa4,  0xff,  0x24,  0xcc,  0x60,  0x02,  0x00,  0xf8,   // 0108
++0x3e,  0x00,  0x03,  0xff,  0x37,  0xd0,  0x78,  0x03,   // 0110
++0xe0,  0x03,  0xbe,  0x0b,  0x10,  0x8b,  0xf6,  0x5b,   // 0118
++0x00,  0x67,  0x5a,  0x00,  0x06,  0xb4,  0x10,  0x00,   // 0120
++0xa4,  0xff,  0x24,  0xcc,  0xe0,  0x02,  0x00,  0xf8,   // 0128
++0x3e,  0x00,  0x03,  0xff,  0x37,  0xd0,  0x78,  0x03,   // 0130
++0xe0,  0x03,  0xbe,  0x0b,  0x10,  0x8b,  0xf6,  0x5b,   // 0138
++0x00,  0x67,  0x5a,  0x00,  0x00,  0xf4,  0x38,  0x80,   // 0140
++0x00,  0x04,  0x20,  0xb5,  0x00,  0x08,  0x04,  0xb0,   // 0148
++0x20,  0x00,  0x8e,  0xf8,  0x20,  0xe0,  0x80,  0x03,   // 0150
++0xc0,  0x43,  0x00,  0x00,  0x08,  0xf0,  0x38,  0x80,   // 0158
++0x81,  0x03,  0x26,  0xb5,  0xe0,  0xff,  0x88,  0xf0,   // 0160
++0x20,  0xe0,  0x86,  0x03,  0x08,  0x60,  0x64,  0x08,   // 0168
++0x46,  0x62,  0x45,  0xc3,  0x50,  0x27,  0x04,  0x6a,   // 0170
++0xa4,  0x6e,  0x7f,  0x90,  0xbf,  0xff,  0x65,  0xa0,   // 0178
++0x04,  0x07,  0x18,  0x8b,  0xf6,  0xc8,  0x41,  0x76,   // 0180
++0x6a,  0x1f,  0x5a,  0x00,  0xe1,  0x40,  0xf2,  0x40,   // 0188
++0x0f,  0x7b,  0x02,  0x6f,  0x03,  0xb0,  0x80,  0x00,   // 0190
++0x07,  0xb0,  0x00,  0x02,  0xe8,  0x00,  0x08,  0x6d,   // 0198
++0xe8,  0xbf,  0x60,  0x01,  0x03,  0x18,  0x48,  0xb0,   // 01a0
++0x20,  0x10,  0x89,  0x40,  0x1a,  0x40,  0x02,  0x6a,   // 01a8
++0x24,  0x18,  0xa1,  0x40,  0x98,  0x40,  0xf2,  0x4a,   // 01b0
++0x06,  0x1e,  0xff,  0x9f,  0xc5,  0xff,  0x21,  0xb5,   // 01b8
++0x00,  0x08,  0x98,  0x40,  0x04,  0xb0,  0x40,  0x00,   // 01c0
++0x95,  0x60,  0x80,  0x90,  0x18,  0x00,  0x48,  0xb0,   // 01c8
++0x00,  0x04,  0x41,  0x76,  0x80,  0x90,  0x13,  0x00,   // 01d0
++0x04,  0xb0,  0x00,  0x02,  0x65,  0x60,  0x91,  0x40,   // 01d8
++0xa8,  0x40,  0x80,  0x90,  0x0c,  0x00,  0x48,  0xb0,   // 01e0
++0x00,  0x04,  0x41,  0x76,  0x80,  0x90,  0x07,  0x00,   // 01e8
++0x4a,  0xb0,  0x00,  0x08,  0xf2,  0x8c,  0xdf,  0xc0,   // 01f0
++0x29,  0x03,  0xef,  0x03,  0x0c,  0xf8,  0x38,  0x80,   // 01f8
++0x80,  0x03,  0xc0,  0xf8,  0x04,  0x00,  0x0c,  0xf8,   // 0200
++0x38,  0x84,  0xc0,  0x03,  0xc0,  0xf8,  0x04,  0x00,   // 0208
++0x00,  0x60,  0xff,  0x9f,  0x79,  0xff,  0x00,  0xb0,   // 0210
++0x00,  0x04,  0xff,  0x9f,  0x85,  0xff,  0x04,  0xff,   // 0218
++0x30,  0xcc,  0x10,  0x03,  0xe0,  0xfb,  0x3e,  0x00,   // 0220
++0x04,  0xff,  0x33,  0xcc,  0x80,  0x03,  0xe0,  0xfb,   // 0228
++0x10,  0x00,  0x4c,  0xfe,  0x33,  0xcc,  0x80,  0x03,   // 0230
++0xe0,  0xfb,  0x14,  0x00,  0x80,  0x40,  0x06,  0xb0,   // 0238
++0x40,  0x00,  0x8c,  0xf8,  0x2f,  0xe0,  0x80,  0x03,   // 0240
++0xe0,  0x63,  0x00,  0x00,  0x20,  0xf7,  0xf0,  0xcf,   // 0248
++0x10,  0x03,  0x20,  0xf7,  0xb0,  0xcf,  0x11,  0x13,   // 0250
++0x20,  0xf7,  0x70,  0xcf,  0x12,  0x23,  0x20,  0xf7,   // 0258
++0x30,  0xcf,  0x13,  0x33,  0x20,  0xf7,  0xf0,  0xce,   // 0260
++0x14,  0x43,  0x20,  0xf7,  0xb0,  0xce,  0x15,  0x53,   // 0268
++0x20,  0xf7,  0x70,  0xce,  0x16,  0x63,  0x20,  0xf7,   // 0270
++0x30,  0xce,  0x17,  0x73,  0x20,  0xf7,  0xf0,  0xcd,   // 0278
++0x18,  0x83,  0x20,  0xf7,  0xb0,  0xcd,  0x19,  0x93,   // 0280
++0x20,  0xf7,  0x70,  0xcd,  0x1a,  0xa3,  0x20,  0xf7,   // 0288
++0x30,  0xcd,  0x1b,  0xb3,  0x20,  0xf7,  0xf0,  0xcc,   // 0290
++0x1c,  0xc3,  0x20,  0xf7,  0xb0,  0xcc,  0x1d,  0xd3,   // 0298
++0x20,  0xf7,  0x70,  0xcc,  0x1e,  0xe3,  0x20,  0xf7,   // 02a0
++0x30,  0xcc,  0x1f,  0xf3,  0x04,  0xff,  0x33,  0xcc,   // 02a8
++0x80,  0x03,  0xe0,  0xfb,  0x10,  0x00,  0x4c,  0xfe,   // 02b0
++0x33,  0xcc,  0x80,  0x03,  0xe0,  0xfb,  0x14,  0x00,   // 02b8
++0x00,  0xb5,  0x20,  0x00,  0x8c,  0xf8,  0x2f,  0xe0,   // 02c0
++0x80,  0x03,  0xe0,  0x63,  0x00,  0x00,  0x6f,  0x03,   // 02c8
++0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,   // 02d0
++0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,   // 02d8
++};
+--- /dev/null
++++ b/libavcodec/rpi_hevc_transform8.h
+@@ -0,0 +1,94 @@
++static const unsigned char rpi_hevc_transform8 [] = {
++0xa9,  0x03,  0x3e,  0x40,  0x4f,  0x40,  0x03,  0xb0,   // 0000
++0x20,  0x00,  0x0c,  0xf8,  0x38,  0x88,  0x80,  0x03,   // 0008
++0xc0,  0xf8,  0x00,  0x00,  0x40,  0xb0,  0x00,  0x02,   // 0010
++0x0c,  0xf8,  0x38,  0xa8,  0x80,  0x03,  0xc0,  0xf8,   // 0018
++0x00,  0x00,  0x00,  0x60,  0x03,  0xb0,  0x20,  0x00,   // 0020
++0x07,  0xb0,  0x00,  0x02,  0x08,  0xb0,  0x00,  0x04,   // 0028
++0x04,  0xb0,  0x40,  0x00,  0x05,  0xb0,  0x00,  0x08,   // 0030
++0x59,  0xb0,  0xc0,  0xfd,  0x0b,  0x12,  0x5b,  0x7a,   // 0038
++0x5b,  0x7c,  0x4a,  0xc3,  0x50,  0x17,  0x02,  0x6f,   // 0040
++0x02,  0x6a,  0x32,  0x18,  0x0a,  0x6a,  0x16,  0x40,   // 0048
++0x04,  0x18,  0x1a,  0x66,  0x80,  0x90,  0x32,  0x00,   // 0050
++0x0c,  0xf8,  0x38,  0x80,  0x80,  0x03,  0xc0,  0x08,   // 0058
++0x18,  0x00,  0x80,  0x90,  0x51,  0x00,  0x04,  0xff,   // 0060
++0x30,  0xc0,  0x80,  0x03,  0x20,  0x08,  0x10,  0x00,   // 0068
++0x4c,  0xfe,  0x30,  0xc0,  0x09,  0x04,  0x20,  0x08,   // 0070
++0x00,  0x00,  0x04,  0xfc,  0x38,  0x90,  0x80,  0x02,   // 0078
++0xc0,  0x0b,  0x02,  0x00,  0x80,  0x90,  0x40,  0x00,   // 0080
++0x04,  0xff,  0x30,  0xc0,  0x80,  0x03,  0x20,  0x08,   // 0088
++0x14,  0x00,  0x4c,  0xfe,  0x30,  0xc0,  0x04,  0x04,   // 0090
++0x20,  0x08,  0x00,  0x00,  0x8c,  0xf8,  0x2c,  0xe0,   // 0098
++0x80,  0x03,  0x20,  0x30,  0x04,  0x00,  0x80,  0x45,   // 00a0
++0x71,  0x42,  0xf2,  0x8c,  0xd1,  0xc0,  0x59,  0xb0,   // 00a8
++0x40,  0x02,  0x00,  0x9e,  0x6d,  0x00,  0x29,  0x03,   // 00b0
++0x00,  0xf4,  0x38,  0x80,  0x00,  0x0c,  0xb6,  0x40,   // 00b8
++0x8c,  0xf8,  0x20,  0xe0,  0x80,  0x03,  0x00,  0x30,   // 00c0
++0x18,  0x00,  0x15,  0x40,  0x08,  0xf0,  0x38,  0x80,   // 00c8
++0x85,  0x0b,  0x66,  0xb5,  0xe0,  0xff,  0x88,  0xf0,   // 00d0
++0x24,  0xe0,  0x86,  0x03,  0x0c,  0x60,  0x64,  0x08,   // 00d8
++0x46,  0x62,  0x49,  0xc3,  0x50,  0x27,  0x04,  0x6a,   // 00e0
++0x84,  0x6e,  0x07,  0x18,  0x69,  0xa0,  0x04,  0x5f,   // 00e8
++0x1c,  0x8b,  0xf7,  0xc8,  0x45,  0x76,  0x6b,  0x1f,   // 00f0
++0xb6,  0x40,  0x04,  0xb0,  0x40,  0x00,  0x05,  0xb0,   // 00f8
++0x00,  0x08,  0x5a,  0x00,  0x06,  0xb4,  0x10,  0x00,   // 0100
++0xa4,  0xff,  0x24,  0xcc,  0x60,  0x02,  0x00,  0xf8,   // 0108
++0x3e,  0x00,  0x03,  0xff,  0x37,  0xd0,  0x78,  0x03,   // 0110
++0xe0,  0x03,  0xbe,  0x0b,  0x10,  0x8b,  0xf6,  0x5b,   // 0118
++0x00,  0x67,  0x5a,  0x00,  0x06,  0xb4,  0x10,  0x00,   // 0120
++0xa4,  0xff,  0x24,  0xcc,  0xe0,  0x02,  0x00,  0xf8,   // 0128
++0x3e,  0x00,  0x03,  0xff,  0x37,  0xd0,  0x78,  0x03,   // 0130
++0xe0,  0x03,  0xbe,  0x0b,  0x10,  0x8b,  0xf6,  0x5b,   // 0138
++0x00,  0x67,  0x5a,  0x00,  0x00,  0xf4,  0x38,  0x80,   // 0140
++0x00,  0x04,  0x20,  0xb5,  0x00,  0x08,  0x04,  0xb0,   // 0148
++0x20,  0x00,  0x8e,  0xf8,  0x20,  0xe0,  0x80,  0x03,   // 0150
++0xc0,  0x43,  0x00,  0x00,  0x08,  0xf0,  0x38,  0x80,   // 0158
++0x81,  0x03,  0x26,  0xb5,  0xe0,  0xff,  0x88,  0xf0,   // 0160
++0x20,  0xe0,  0x86,  0x03,  0x08,  0x60,  0x64,  0x08,   // 0168
++0x46,  0x62,  0x45,  0xc3,  0x50,  0x27,  0x04,  0x6a,   // 0170
++0xa4,  0x6e,  0x7f,  0x90,  0xbf,  0xff,  0x65,  0xa0,   // 0178
++0x04,  0x07,  0x18,  0x8b,  0xf6,  0xc8,  0x41,  0x76,   // 0180
++0x6a,  0x1f,  0x5a,  0x00,  0xe1,  0x40,  0xf2,  0x40,   // 0188
++0x0f,  0x7b,  0x02,  0x6f,  0x03,  0xb0,  0x80,  0x00,   // 0190
++0x07,  0xb0,  0x00,  0x02,  0xe8,  0x00,  0x08,  0x6d,   // 0198
++0xe8,  0xbf,  0x60,  0x01,  0x03,  0x18,  0x48,  0xb0,   // 01a0
++0x20,  0x10,  0x89,  0x40,  0x1a,  0x40,  0x02,  0x6a,   // 01a8
++0x24,  0x18,  0xa1,  0x40,  0x98,  0x40,  0xf2,  0x4a,   // 01b0
++0x06,  0x1e,  0xff,  0x9f,  0xc5,  0xff,  0x21,  0xb5,   // 01b8
++0x00,  0x08,  0x98,  0x40,  0x04,  0xb0,  0x40,  0x00,   // 01c0
++0x95,  0x60,  0x80,  0x90,  0x18,  0x00,  0x48,  0xb0,   // 01c8
++0x00,  0x04,  0x41,  0x76,  0x80,  0x90,  0x13,  0x00,   // 01d0
++0x04,  0xb0,  0x00,  0x08,  0x45,  0x60,  0x91,  0x40,   // 01d8
++0xa8,  0x40,  0x80,  0x90,  0x0c,  0x00,  0x48,  0xb0,   // 01e0
++0x00,  0x04,  0x41,  0x76,  0x80,  0x90,  0x07,  0x00,   // 01e8
++0x4a,  0xb0,  0x00,  0x08,  0xf2,  0x8c,  0xdf,  0xc0,   // 01f0
++0x29,  0x03,  0xef,  0x03,  0x0c,  0xf8,  0x38,  0x80,   // 01f8
++0x80,  0x03,  0xc0,  0xf8,  0x04,  0x00,  0x0c,  0xf8,   // 0200
++0x38,  0x84,  0xc0,  0x03,  0xc0,  0xf8,  0x04,  0x00,   // 0208
++0x00,  0x60,  0xff,  0x9f,  0x79,  0xff,  0x00,  0xb0,   // 0210
++0x00,  0x04,  0xff,  0x9f,  0x85,  0xff,  0x04,  0xff,   // 0218
++0x30,  0xcc,  0x10,  0x03,  0xe0,  0xfb,  0x3e,  0x00,   // 0220
++0x04,  0xff,  0x33,  0xcc,  0x80,  0x03,  0xe0,  0xfb,   // 0228
++0x10,  0x00,  0x4c,  0xfe,  0x33,  0xcc,  0x80,  0x03,   // 0230
++0xe0,  0xfb,  0x14,  0x00,  0x80,  0x40,  0x06,  0xb0,   // 0238
++0x40,  0x00,  0x8c,  0xf8,  0x2f,  0xe0,  0x80,  0x03,   // 0240
++0xe0,  0x63,  0x00,  0x00,  0x20,  0xf7,  0xf0,  0xcf,   // 0248
++0x10,  0x03,  0x20,  0xf7,  0xb0,  0xcf,  0x11,  0x13,   // 0250
++0x20,  0xf7,  0x70,  0xcf,  0x12,  0x23,  0x20,  0xf7,   // 0258
++0x30,  0xcf,  0x13,  0x33,  0x20,  0xf7,  0xf0,  0xce,   // 0260
++0x14,  0x43,  0x20,  0xf7,  0xb0,  0xce,  0x15,  0x53,   // 0268
++0x20,  0xf7,  0x70,  0xce,  0x16,  0x63,  0x20,  0xf7,   // 0270
++0x30,  0xce,  0x17,  0x73,  0x20,  0xf7,  0xf0,  0xcd,   // 0278
++0x18,  0x83,  0x20,  0xf7,  0xb0,  0xcd,  0x19,  0x93,   // 0280
++0x20,  0xf7,  0x70,  0xcd,  0x1a,  0xa3,  0x20,  0xf7,   // 0288
++0x30,  0xcd,  0x1b,  0xb3,  0x20,  0xf7,  0xf0,  0xcc,   // 0290
++0x1c,  0xc3,  0x20,  0xf7,  0xb0,  0xcc,  0x1d,  0xd3,   // 0298
++0x20,  0xf7,  0x70,  0xcc,  0x1e,  0xe3,  0x20,  0xf7,   // 02a0
++0x30,  0xcc,  0x1f,  0xf3,  0x04,  0xff,  0x33,  0xcc,   // 02a8
++0x80,  0x03,  0xe0,  0xfb,  0x10,  0x00,  0x4c,  0xfe,   // 02b0
++0x33,  0xcc,  0x80,  0x03,  0xe0,  0xfb,  0x14,  0x00,   // 02b8
++0x00,  0xb5,  0x20,  0x00,  0x8c,  0xf8,  0x2f,  0xe0,   // 02c0
++0x80,  0x03,  0xe0,  0x63,  0x00,  0x00,  0x6f,  0x03,   // 02c8
++0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,   // 02d0
++0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,  0x00,   // 02d8
++};
+--- /dev/null
++++ b/libavcodec/rpi_hevcdec.c
+@@ -0,0 +1,6134 @@
++/*
++ * HEVC video Decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2012 - 2013 Mickael Raulet
++ * Copyright (C) 2012 - 2013 Gildas Cocherel
++ * Copyright (C) 2012 - 2013 Wassim Hamidouche
++ * Copyright (C) 2018 John Cox, Ben Avison, Peter de Rivaz for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "libavutil/attributes.h"
++#include "libavutil/common.h"
++#include "libavutil/display.h"
++#include "libavutil/internal.h"
++#include "libavutil/mastering_display_metadata.h"
++#include "libavutil/md5.h"
++#include "libavutil/opt.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/stereo3d.h"
++
++#include "decode.h"
++#include "bswapdsp.h"
++#include "bytestream.h"
++#include "golomb.h"
++#include "hevc.h"
++#include "rpi_hevc_data.h"
++#include "rpi_hevc_parse.h"
++#include "rpi_hevcdec.h"
++#include "rpi_hevc_cabac_fns.h"
++#include "profiles.h"
++#include "hwconfig.h"
++
++#include "rpi_zc_frames.h"
++#include "rpi_qpu.h"
++#include "rpi_hevc_shader.h"
++#include "rpi_hevc_shader_cmd.h"
++#include "rpi_hevc_shader_template.h"
++#include "rpi_zc.h"
++#include "libavutil/rpi_sand_fns.h"
++
++#include "pthread.h"
++#include <stdatomic.h>
++
++#define DEBUG_DECODE_N 0   // 0 = do all, n = frames idr onwards
++
++#define PACK2(hi,lo) (((hi) << 16) | ((lo) & 0xffff))
++
++#ifndef av_mod_uintp2
++static av_always_inline av_const unsigned av_mod_uintp2_c(unsigned a, unsigned p)
++{
++    return a & ((1 << p) - 1);
++}
++#   define av_mod_uintp2   av_mod_uintp2_c
++#endif
++
++const uint8_t ff_hevc_rpi_pel_weight[65] = { [2] = 0, [4] = 1, [6] = 2, [8] = 3, [12] = 4, [16] = 5, [24] = 6, [32] = 7, [48] = 8, [64] = 9 };
++static void rpi_begin(const HEVCRpiContext * const s, HEVCRpiJob * const jb, const unsigned int ctu_ts_first);
++
++#define MC_DUMMY_X (-32)
++#define MC_DUMMY_Y (-32)
++
++// UV & Y both have min 4x4 pred (no 2x2 chroma)
++// Allow for even spread +1 for setup, +1 for rounding
++// As we have load sharing this can (in theory) be exceeded so we have to
++// check after each CTU, but it is a good base size
++
++// Worst case (all 4x4) commands per CTU
++#define QPU_Y_CMD_PER_CTU_MAX (16 * 16)
++#define QPU_C_CMD_PER_CTU_MAX (8 * 8)
++
++#define QPU_MAX_CTU_PER_LINE ((HEVC_RPI_MAX_WIDTH + 63) / 64)
++
++#define QPU_GRPS (QPU_N_MAX / QPU_N_GRP)
++#define QPU_CTU_PER_GRP ((QPU_MAX_CTU_PER_LINE + QPU_GRPS - 1) / QPU_GRPS)
++
++#define QPU_Y_CMD_SLACK_PER_Q (QPU_Y_CMD_PER_CTU_MAX / 2)
++#define QPU_C_CMD_SLACK_PER_Q (QPU_C_CMD_PER_CTU_MAX / 2)
++
++// Total cmds to allocate - allow for slack & setup
++#define QPU_Y_COMMANDS (QPU_CTU_PER_GRP * QPU_GRPS * QPU_Y_CMD_PER_CTU_MAX + (1 + QPU_Y_CMD_SLACK_PER_Q) * QPU_N_MAX)
++#define QPU_C_COMMANDS (QPU_CTU_PER_GRP * QPU_GRPS * QPU_C_CMD_PER_CTU_MAX + (1 + QPU_C_CMD_SLACK_PER_Q) * QPU_N_MAX)
++
++#define QPU_Y_SYNCS (QPU_N_MAX * (16 + 2))
++#define QPU_C_SYNCS (QPU_N_MAX * (8 + 2))
++
++// The QPU code for UV blocks only works up to a block width of 8
++#define RPI_CHROMA_BLOCK_WIDTH 8
++
++#define ENCODE_COEFFS(c0, c1, c2, c3) (((c0) & 0xff) | ((c1) & 0xff) << 8 | ((c2) & 0xff) << 16 | ((c3) & 0xff) << 24)
++
++
++// Actual filter goes -ve, +ve, +ve, -ve using these values
++static const uint32_t rpi_filter_coefs[8] = {
++        ENCODE_COEFFS(  0,  64,   0,  0),
++        ENCODE_COEFFS(  2,  58,  10,  2),
++        ENCODE_COEFFS(  4,  54,  16,  2),
++        ENCODE_COEFFS(  6,  46,  28,  4),
++        ENCODE_COEFFS(  4,  36,  36,  4),
++        ENCODE_COEFFS(  4,  28,  46,  6),
++        ENCODE_COEFFS(  2,  16,  54,  4),
++        ENCODE_COEFFS(  2,  10,  58,  2)
++};
++
++// Function arrays by QPU
++
++static const int * const inter_pred_setup_c_qpu[12] = {
++    mc_setup_c_q0, mc_setup_c_qn, mc_setup_c_qn, mc_setup_c_qn,
++    mc_setup_c_qn, mc_setup_c_qn, mc_setup_c_qn, mc_setup_c_qn,
++    mc_setup_c_qn, mc_setup_c_qn, mc_setup_c_qn, mc_setup_c_qn
++};
++
++static const int * const inter_pred_setup_c10_qpu[12] = {
++    mc_setup_c10_q0, mc_setup_c10_qn, mc_setup_c10_qn, mc_setup_c10_qn,
++    mc_setup_c10_qn, mc_setup_c10_qn, mc_setup_c10_qn, mc_setup_c10_qn,
++    mc_setup_c10_qn, mc_setup_c10_qn, mc_setup_c10_qn, mc_setup_c10_qn
++};
++
++static const int * const inter_pred_setup_y_qpu[12] = {
++    mc_setup_y_q0, mc_setup_y_qn, mc_setup_y_qn, mc_setup_y_qn,
++    mc_setup_y_qn, mc_setup_y_qn, mc_setup_y_qn, mc_setup_y_qn,
++    mc_setup_y_qn, mc_setup_y_qn, mc_setup_y_qn, mc_setup_y_qn
++};
++
++static const int * const inter_pred_setup_y10_qpu[12] = {
++    mc_setup_y10_q0, mc_setup_y10_qn, mc_setup_y10_qn, mc_setup_y10_qn,
++    mc_setup_y10_qn, mc_setup_y10_qn, mc_setup_y10_qn, mc_setup_y10_qn,
++    mc_setup_y10_qn, mc_setup_y10_qn, mc_setup_y10_qn, mc_setup_y10_qn
++};
++
++static const int * const inter_pred_sync_qpu[12] = {
++    mc_sync_q0, mc_sync_q1, mc_sync_q2, mc_sync_q3,
++    mc_sync_q4, mc_sync_q5, mc_sync_q6, mc_sync_q7,
++    mc_sync_q8, mc_sync_q9, mc_sync_q10, mc_sync_q11
++};
++
++static const int * const inter_pred_sync10_qpu[12] = {
++    mc_sync10_q0, mc_sync10_q1, mc_sync10_q2, mc_sync10_q3,
++    mc_sync10_q4, mc_sync10_q5, mc_sync10_q6, mc_sync10_q7,
++    mc_sync10_q8, mc_sync10_q9, mc_sync10_q10, mc_sync10_q11
++};
++
++static const int * const inter_pred_exit_c_qpu[12] = {
++    mc_exit_c_q0, mc_exit_c_qn, mc_exit_c_qn, mc_exit_c_qn,
++    mc_exit_c_qn, mc_exit_c_qn, mc_exit_c_qn, mc_exit_c_qn,
++    mc_exit_c_qn, mc_exit_c_qn, mc_exit_c_qn, mc_exit_c_qn
++};
++
++static const int * const inter_pred_exit_c10_qpu[12] = {
++    mc_exit_c10_q0, mc_exit_c10_qn, mc_exit_c10_qn, mc_exit_c10_qn,
++    mc_exit_c10_qn, mc_exit_c10_qn, mc_exit_c10_qn, mc_exit_c10_qn,
++    mc_exit_c10_qn, mc_exit_c10_qn, mc_exit_c10_qn, mc_exit_c10_qn
++};
++
++static const int * const inter_pred_exit_y_qpu[12] = {
++    mc_exit_y_q0, mc_exit_y_qn, mc_exit_y_qn, mc_exit_y_qn,
++    mc_exit_y_qn, mc_exit_y_qn, mc_exit_y_qn, mc_exit_y_qn,
++    mc_exit_y_qn, mc_exit_y_qn, mc_exit_y_qn, mc_exit_y_qn
++};
++
++static const int * const inter_pred_exit_y10_qpu[12] = {
++    mc_exit_y10_q0, mc_exit_y10_qn, mc_exit_y10_qn, mc_exit_y10_qn,
++    mc_exit_y10_qn, mc_exit_y10_qn, mc_exit_y10_qn, mc_exit_y10_qn,
++    mc_exit_y10_qn, mc_exit_y10_qn, mc_exit_y10_qn, mc_exit_y10_qn
++};
++
++typedef struct ipe_chan_info_s
++{
++    const uint8_t bit_depth;
++    const uint8_t n;
++    const int * const * setup_fns;
++    const int * const * sync_fns;
++    const int * const * exit_fns;
++} ipe_chan_info_t;
++
++typedef struct ipe_init_info_s
++{
++    ipe_chan_info_t luma;
++    ipe_chan_info_t chroma;
++} ipe_init_info_t;
++
++static void set_bytes(uint8_t * b, const unsigned int stride, const int ln, unsigned int a)
++{
++    switch (ln)
++    {
++        default:  // normally 0
++            *b = a;
++            break;
++        case 1:
++            a |= a << 8;
++            *(uint16_t *)b = a;
++            b += stride;
++            *(uint16_t *)b = a;
++            break;
++        case 2:
++            a |= a << 8;
++            a |= a << 16;
++            *(uint32_t *)b = a;
++            b += stride;
++            *(uint32_t *)b = a;
++            b += stride;
++            *(uint32_t *)b = a;
++            b += stride;
++            *(uint32_t *)b = a;
++            break;
++        case 3:
++        {
++            unsigned int i;
++            uint64_t d;
++            a |= a << 8;
++            a |= a << 16;
++            d = ((uint64_t)a << 32) | a;
++            for (i = 0; i != 8; ++i, b += stride)
++                *(uint64_t *)b = d;
++            break;
++        }
++        case 4:
++        {
++            unsigned int i;
++            uint64_t d;
++            a |= a << 8;
++            a |= a << 16;
++            d = ((uint64_t)a << 32) | a;
++            for (i = 0; i != 16; ++i, b += stride)
++            {
++                *(uint64_t *)b = d;
++                *(uint64_t *)(b + 8) = d;
++            }
++            break;
++        }
++    }
++}
++
++// We expect this to be called with ln = (log2_cb_size - 3) so range =  -1..3
++// (4 not required)
++static void set_stash2(uint8_t * b_u, uint8_t * b_l, const int ln, unsigned int a)
++{
++    switch (ln)
++    {
++        default:  // 0 or -1
++            *b_u = a;
++            *b_l = a;
++            break;
++        case 1:
++            a |= a << 8;
++            *(uint16_t *)b_u = a;
++            *(uint16_t *)b_l = a;
++            break;
++        case 2:
++            a |= a << 8;
++            a |= a << 16;
++            *(uint32_t *)b_u = a;
++            *(uint32_t *)b_l = a;
++            break;
++        case 3:
++            a |= a << 8;
++            a |= a << 16;
++            *(uint32_t *)b_u = a;
++            *(uint32_t *)(b_u + 4) = a;
++            *(uint32_t *)b_l = a;
++            *(uint32_t *)(b_l + 4) = a;
++            break;
++        case 4:
++            a |= a << 8;
++            a |= a << 16;
++            *(uint32_t *)b_u = a;
++            *(uint32_t *)(b_u + 4) = a;
++            *(uint32_t *)(b_u + 8) = a;
++            *(uint32_t *)(b_u + 12) = a;
++            *(uint32_t *)b_l = a;
++            *(uint32_t *)(b_l + 4) = a;
++            *(uint32_t *)(b_l + 8) = a;
++            *(uint32_t *)(b_l + 12) = a;
++            break;
++    }
++}
++
++static void zap_cabac_stash(uint8_t * b, const int ln)
++{
++    switch (ln)
++    {
++        default:  // 0
++            *b = 0;
++            break;
++        case 1:
++            *(uint16_t *)b = 0;
++            break;
++        case 2:
++            *(uint32_t *)b = 0;
++            break;
++        case 3:
++            *(uint32_t *)b = 0;
++            *(uint32_t *)(b + 4) = 0;
++            break;
++    }
++}
++
++
++
++// Set a small square block of bits in a bitmap
++// Bits must be aligned on their size boundry (which will be true of all split CBs)
++static void set_bits(uint8_t * f, const unsigned int x, const unsigned int stride, const unsigned int ln)
++{
++    unsigned int n;
++    const unsigned int sh = (x & 7);
++
++    f += (x >> 3);
++
++    av_assert2(ln <= 3);
++    av_assert2((x & ((1 << ln) - 1)) == 0);
++
++    switch (ln)
++    {
++        default:  // 1
++            f[0] |= 1 << sh;
++            break;
++        case 1:  // 3 * 2
++            n = 3 << sh;
++            f[0] |= n;
++            f[stride] |= n;
++            break;
++        case 2:  // 0xf * 4
++            n = 0xf << sh;
++            f[0] |= n;
++            f[stride] |= n;
++            f[stride * 2] |= n;
++            f[stride * 3] |= n;
++            break;
++        case 3:  // 0xff * 8
++            for (n = 0; n != 8; ++n, f += stride)
++                *f = 0xff;
++            break;
++    }
++}
++
++static const ipe_init_info_t ipe_init_infos[9] = {  // Alloc for bit depths of 8-16
++   {  // 8
++      .luma =   {8, QPU_MC_PRED_N_Y8, inter_pred_setup_y_qpu, inter_pred_sync_qpu, inter_pred_exit_y_qpu},
++      .chroma = {8, QPU_MC_PRED_N_C8, inter_pred_setup_c_qpu, inter_pred_sync_qpu, inter_pred_exit_c_qpu}
++   },
++   {  // 9
++      .luma =   {0},
++      .chroma = {0}
++   },
++   {  // 10
++      .luma =   {10, QPU_MC_PRED_N_Y10, inter_pred_setup_y10_qpu, inter_pred_sync10_qpu, inter_pred_exit_y10_qpu},
++      .chroma = {10, QPU_MC_PRED_N_C10, inter_pred_setup_c10_qpu, inter_pred_sync10_qpu, inter_pred_exit_c10_qpu}
++   }
++
++};
++
++static void set_ipe_from_ici(HEVCRpiInterPredEnv * const ipe, const ipe_chan_info_t * const ici)
++{
++    const unsigned int n = ici->n;
++    const unsigned int q1_size = (ipe->gptr.numbytes / n) & ~3;  // Round down to word
++
++    ipe->n = n;
++    ipe->max_fill = q1_size - ipe->min_gap;
++    for(unsigned int i = 0; i < n; i++) {
++        HEVCRpiInterPredQ * const q = ipe->q + i;
++        q->qpu_mc_curr = q->qpu_mc_base =
++            (qpu_mc_pred_cmd_t *)(ipe->gptr.arm + i * q1_size);
++        q->code_setup = qpu_fn(ici->setup_fns[i]);
++        q->code_sync = qpu_fn(ici->sync_fns[i]);
++        q->code_exit = qpu_fn(ici->exit_fns[i]);
++    }
++}
++
++static void rpi_hevc_qpu_set_fns(HEVCRpiContext * const s, const unsigned int bit_depth)
++{
++    av_assert0(bit_depth >= 8 && bit_depth <= 16);
++
++    rpi_hevc_qpu_init_fn(&s->qpu, bit_depth);
++}
++
++// Unsigned Trivial MOD
++static inline unsigned int utmod(const unsigned int x, const unsigned int n)
++{
++    return x >= n ? x - n : x;
++}
++
++// returns pq->job_n++
++static inline unsigned int pass_queue_inc_job_n(HEVCRpiPassQueue * const pq)
++{
++    unsigned int const x2 = pq->job_n;
++    pq->job_n = utmod(x2 + 1, RPI_MAX_JOBS);
++    return x2;
++}
++
++static void pass_queue_init(HEVCRpiPassQueue * const pq, HEVCRpiContext * const s, HEVCRpiWorkerFn * const worker, sem_t * const psem_out, const int n)
++{
++    pq->terminate = 0;
++    pq->job_n = 0;
++    pq->context = s;
++    pq->worker = worker;
++    pq->psem_out = psem_out;
++    pq->pass_n = n;
++    pq->started = 0;
++    sem_init(&pq->sem_in, 0, 0);
++}
++
++static void pass_queue_kill(HEVCRpiPassQueue * const pq)
++{
++    sem_destroy(&pq->sem_in);
++}
++
++static inline void rpi_sem_wait(sem_t * const sem)
++{
++    while (sem_wait(sem) != 0) {
++        av_assert0(errno == EINTR);
++    }
++}
++
++static void pass_queue_submit_job(HEVCRpiPassQueue * const pq)
++{
++    sem_post(&pq->sem_in);
++}
++
++static inline void pass_queue_do_all(HEVCRpiContext * const s, HEVCRpiJob * const jb)
++{
++    // Do the various passes - common with the worker code
++    for (unsigned int i = 0; i != RPI_PASSES; ++i) {
++        s->passq[i].worker(s, jb);
++    }
++}
++
++
++#if 0
++static void dump_jbc(const HEVCRpiJobCtl *const jbc, const char * const func)
++{
++    int x;
++    sem_getvalue((sem_t *)&jbc->sem_out, &x);
++    printf("%s: jbc: in=%d, out=%d, sum=%d\n", func, jbc->offload_in, jbc->offload_out, x);
++}
++#endif
++
++
++static HEVCRpiJob * job_alloc(HEVCRpiJobCtl * const jbc, HEVCRpiLocalContext * const lc)
++{
++    HEVCRpiJob * jb;
++    HEVCRpiJobGlobal * const jbg = jbc->jbg;
++
++    pthread_mutex_lock(&jbg->lock);
++    // Check local 1st
++    if ((jb = jbc->jb1) != NULL)
++    {
++        // Only 1 - very easy :-)
++        jbc->jb1 = NULL;
++    }
++    else
++    {
++        // Now look for global free chain
++        if ((jb = jbg->free1) != NULL)
++        {
++            // Found one - unlink it
++            jbg->free1 = jb->next;
++            jb->next = NULL;
++        }
++        else
++        {
++            // Out of places to look - wait for one to become free - add to Qs
++
++            // Global
++            // If "good" lc then add after the last "good" el in the chain
++            // otherwise add to the tail
++            if (jbg->wait_tail == NULL || jbg->wait_tail->last_progress_good || !lc->last_progress_good)
++            {
++                // Add to end as we had to wait last time or wait Q empty
++                if ((lc->jw_prev = jbg->wait_tail) == NULL)
++                    jbg->wait_head = lc;
++                else
++                    lc->jw_prev->jw_next = lc;
++                lc->jw_next = NULL;
++                jbg->wait_tail = lc;
++            }
++            else
++            {
++                // This is a "good" lc that we need to poke into the middle
++                // of the Q
++                // We know that the Q isn't empty and there is at least one
++                // !last_progess_good el in it from the previous test
++
++                HEVCRpiLocalContext * const p = jbg->wait_good; // Insert after
++
++                if (p == NULL)
++                {
++                    // No current good els - add to head
++                    lc->jw_next = jbg->wait_head;
++                    jbg->wait_head = lc;
++                }
++                else
++                {
++                    lc->jw_next = p->jw_next;
++                    p->jw_next = lc;
++                }
++
++                lc->jw_next->jw_prev = lc;
++                lc->jw_prev = p;
++            }
++
++            // If "good" then we are now the last good waiting el
++            if (lc->last_progress_good)
++                jbg->wait_good = lc;
++
++            // Local
++            if ((lc->ljw_prev = jbc->lcw_tail) == NULL)
++                jbc->lcw_head = lc;
++            else
++                lc->ljw_prev->ljw_next = lc;
++            lc->ljw_next = NULL;
++            jbc->lcw_tail = lc;
++        }
++    }
++
++    pthread_mutex_unlock(&jbg->lock);
++
++    if (jb == NULL)  // Need to wait
++    {
++        rpi_sem_wait(&lc->jw_sem);
++        jb = lc->jw_job;  // Set by free code
++    }
++
++    return jb;
++}
++
++
++static void job_free(HEVCRpiJobCtl * const jbc0, HEVCRpiJob * const jb)
++{
++    HEVCRpiJobGlobal * const jbg = jbc0->jbg;  // This jbc only used to find jbg so we can get the lock
++    HEVCRpiJobCtl * jbc = jb->jbc_local;
++    HEVCRpiLocalContext * lc = NULL;
++
++    pthread_mutex_lock(&jbg->lock);
++
++    if (jbc != NULL)
++    {
++        av_assert1(jbc->jb1 == NULL);
++
++        // Release to Local if nothing waiting there
++        if ((lc = jbc->lcw_head) == NULL)
++            jbc->jb1 = jb;
++    }
++    else
++    {
++        // Release to global if nothing waiting there
++        if ((lc = jbg->wait_head) == NULL)
++        {
++            jb->next = jbg->free1;
++            jbg->free1 = jb;
++        }
++        else
++        {
++            // ? seems somehow mildy ugly...
++            jbc = lc->context->jbc;
++        }
++    }
++
++    if (lc != NULL)
++    {
++        // Something was waiting
++
++        // Unlink
++        // Global
++        if (lc->jw_next == NULL)
++            jbg->wait_tail = lc->jw_prev;
++        else
++            lc->jw_next->jw_prev = lc->jw_prev;
++
++        if (lc->jw_prev == NULL)
++            jbg->wait_head = lc->jw_next;
++        else
++            lc->jw_prev->jw_next = lc->jw_next;
++
++        // Local
++        if (lc->ljw_next == NULL)
++            jbc->lcw_tail = lc->ljw_prev;
++        else
++            lc->ljw_next->ljw_prev = lc->ljw_prev;
++
++        if (lc->ljw_prev == NULL)
++            jbc->lcw_head = lc->ljw_next;
++        else
++            lc->ljw_prev->ljw_next = lc->ljw_next;
++
++        // Update good if required
++        if (jbg->wait_good == lc)
++            jbg->wait_good = lc->jw_prev;
++
++        // Prod
++        lc->jw_job = jb;
++        sem_post(&lc->jw_sem);
++    }
++
++    pthread_mutex_unlock(&jbg->lock);
++}
++
++static void job_lc_kill(HEVCRpiLocalContext * const lc)
++{
++    sem_destroy(&lc->jw_sem);
++}
++
++static void job_lc_init(HEVCRpiLocalContext * const lc)
++{
++    lc->jw_next = NULL;
++    lc->jw_prev = NULL;
++    lc->ljw_next = NULL;
++    lc->ljw_prev = NULL;
++    lc->jw_job = NULL;
++    sem_init(&lc->jw_sem,  0, 0);
++}
++
++// Returns:
++//  0 if we have waited for MV or expect to wait for recon
++//  1 if we haven't waited for MV & do not need to wait for recon
++static int progress_good(const HEVCRpiContext *const s, const HEVCRpiJob * const jb)
++{
++    if (jb->waited) // reset by rpi_begin
++        return 0;
++    for (unsigned int i = 0; i != FF_ARRAY_ELEMS(jb->progress_req); ++i)
++    {
++        if (jb->progress_req[i] >= 0 && s->DPB[i].tf.progress != NULL &&
++                ((volatile int *)(s->DPB[i].tf.progress->data))[0] < jb->progress_req[i])
++            return 0;
++    }
++    return 1;
++}
++
++// Submit job if it is full (indicated by having ctu_ts_last set >= 0)
++static inline void worker_submit_job(HEVCRpiContext *const s, HEVCRpiLocalContext * const lc)
++{
++    HEVCRpiJobCtl *const jbc = s->jbc;
++    HEVCRpiJob * const jb = lc->jb0;
++
++    av_assert1(jb != NULL);
++
++    if (jb->ctu_ts_last < 0) {
++        return;
++    }
++
++    lc->last_progress_good = progress_good(s, jb);
++    jb->waited = !lc->last_progress_good;
++    lc->jb0 = NULL;
++
++    if (s->offload_recon)
++    {
++        pthread_mutex_lock(&jbc->in_lock);
++        jbc->offloadq[jbc->offload_in] = jb;
++        jbc->offload_in = utmod(jbc->offload_in + 1, RPI_MAX_JOBS);
++        pthread_mutex_unlock(&jbc->in_lock);
++
++        pass_queue_submit_job(s->passq + 0);  // Consumes job eventually
++    }
++    else
++    {
++        pass_queue_do_all(s, jb);  // Consumes job before return
++    }
++}
++
++
++// Call worker_pass0_ready to wait until the s->pass0_job slot becomes
++// available to receive the next job.
++//
++// Now safe against multiple callers - needed for tiles
++// "normal" and WPP will only call here one at a time
++static inline void worker_pass0_ready(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
++{
++    HEVCRpiJobCtl * const jbc = s->jbc;
++
++    // It is legit for us to already have a job allocated - do nothing in this case
++    if (lc->jb0 != NULL)
++        return;
++
++    if (s->offload_recon)
++        rpi_sem_wait(&jbc->sem_out);  // This sem will stop this frame grabbing too much
++
++    lc->jb0 = job_alloc(jbc, lc);
++
++    rpi_begin(s, lc->jb0, lc->ts);
++}
++
++// Free up a job without submission
++static void worker_free(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
++{
++    HEVCRpiJobCtl * const jbc = s->jbc;
++    HEVCRpiJob * const jb = lc->jb0;
++
++    if (jb == NULL) {
++        return;
++    }
++
++    lc->jb0 = NULL;
++
++    job_free(jbc, jb);
++
++    // If offload then poke sem_out too
++    if (s->offload_recon) {
++        sem_post(&jbc->sem_out);
++    }
++}
++
++
++// Call this to wait for all jobs to have completed at the end of a frame
++// Slightly icky as there is no clean way to wait for a sem to count up
++// Not reentrant - call on main thread only
++static void worker_wait(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc)
++{
++    HEVCRpiJobCtl * const jbc = s->jbc;
++    int i = 0;
++
++    // We shouldn't reach here with an unsubmitted job
++    av_assert1(lc->jb0 == NULL);
++
++    // If no offload then there can't be anything to wait for
++    if (!s->offload_recon) {
++        return;
++    }
++
++    if (sem_getvalue(&jbc->sem_out, &i) == 0 && i < RPI_MAX_JOBS)
++    {
++        for (i = 0; i != RPI_MAX_JOBS; ++i) {
++            rpi_sem_wait(&jbc->sem_out);
++        }
++        for (i = 0; i != RPI_MAX_JOBS; ++i) {
++            sem_post(&jbc->sem_out);
++        }
++    }
++}
++
++static void * pass_worker(void *arg)
++{
++    HEVCRpiPassQueue *const pq = (HEVCRpiPassQueue *)arg;
++    HEVCRpiContext *const s = pq->context;
++
++    for (;;)
++    {
++        rpi_sem_wait(&pq->sem_in);
++
++        if (pq->terminate)
++            break;
++
++        pq->worker(s, s->jbc->offloadq[pass_queue_inc_job_n(pq)]);
++        // * should really set jb->passes_done here
++
++        sem_post(pq->psem_out);
++    }
++    return NULL;
++}
++
++static void pass_queues_start_all(HEVCRpiContext *const s)
++{
++    unsigned int i;
++    HEVCRpiPassQueue * const pqs = s->passq;
++
++    for (i = 0; i != RPI_PASSES; ++i)
++    {
++        av_assert0(pthread_create(&pqs[i].thread, NULL, pass_worker, pqs + i) == 0);
++        pqs[i].started = 1;
++    }
++}
++
++static void pass_queues_term_all(HEVCRpiContext *const s)
++{
++    unsigned int i;
++    HEVCRpiPassQueue * const pqs = s->passq;
++
++    for (i = 0; i != RPI_PASSES; ++i)
++        pqs[i].terminate = 1;
++    for (i = 0; i != RPI_PASSES; ++i)
++    {
++        if (pqs[i].started)
++            sem_post(&pqs[i].sem_in);
++    }
++    for (i = 0; i != RPI_PASSES; ++i)
++    {
++        if (pqs[i].started) {
++            pthread_join(pqs[i].thread, NULL);
++            pqs[i].started = 0;
++        }
++    }
++}
++
++static void pass_queues_kill_all(HEVCRpiContext *const s)
++{
++    unsigned int i;
++    HEVCRpiPassQueue * const pqs = s->passq;
++
++    for (i = 0; i != RPI_PASSES; ++i)
++        pass_queue_kill(pqs + i);
++}
++
++
++static void worker_pic_free_one(HEVCRpiJob * const jb)
++{
++    // Free coeff stuff - allocation not the same for all buffers
++    HEVCRpiCoeffsEnv * const cf = &jb->coeffs;
++
++    if (cf->s[0].buf != NULL)
++        av_freep(&cf->mptr);
++    if (cf->s[2].buf != NULL)
++        gpu_free(&cf->gptr);
++    memset(cf, 0, sizeof(*cf));
++}
++
++static int worker_pic_alloc_one(HEVCRpiJob * const jb, const unsigned int coeff_count)
++{
++    HEVCRpiCoeffsEnv * const cf = &jb->coeffs;
++
++    if (gpu_malloc_cached((coeff_count + 32*32) * sizeof(cf->s[2].buf[0]), &cf->gptr) != 0)
++        goto fail;
++    cf->s[2].buf = (int16_t *)cf->gptr.arm;
++    cf->s[3].buf = cf->s[2].buf + coeff_count;
++
++    // Must be 64 byte aligned for our zero zapping code so over-allocate &
++    // round
++    if ((cf->mptr = av_malloc(coeff_count * sizeof(cf->s[0].buf[0]) + 63)) == NULL)
++        goto fail;
++    cf->s[0].buf = (void *)(((intptr_t)cf->mptr + 63) & ~63);
++    return 0;
++
++fail:
++    av_log(NULL, AV_LOG_ERROR, "%s: Allocation failed\n", __func__);
++    worker_pic_free_one(jb);
++    return -1;
++}
++
++static void worker_pic_reset(HEVCRpiCoeffsEnv * const cf)
++{
++    unsigned int i;
++    for (i = 0; i != 4; ++i) {
++        cf->s[i].n = 0;
++#if RPI_COMPRESS_COEFFS
++        cf->s[i].packed = 1;
++        cf->s[i].packed_n = 0;
++#endif
++    }
++}
++
++int16_t * rpi_alloc_coeff_buf(HEVCRpiJob * const jb, const int buf_no, const int n)
++{
++    HEVCRpiCoeffEnv *const cfe = jb->coeffs.s + buf_no;
++    int16_t * const coeffs = (buf_no != 3) ? cfe->buf + cfe->n : cfe->buf - (cfe->n + n);
++    cfe->n += n;
++    return coeffs;
++}
++
++void ff_hevc_rpi_progress_wait_field(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
++                                     const HEVCRpiFrame * const ref, const int val, const int field)
++{
++    if (ref->tf.progress != NULL && ((int *)ref->tf.progress->data)[field] < val) {
++        HEVCRpiContext *const fs = ref->tf.owner[field]->priv_data;
++        HEVCRpiFrameProgressState * const pstate = fs->progress_states + field;
++        sem_t * sem = NULL;
++
++        av_assert0(pthread_mutex_lock(&pstate->lock) == 0);
++        if (((volatile int *)ref->tf.progress->data)[field] < val) {
++            HEVCRpiFrameProgressWait * const pwait = &jb->progress_wait;
++
++            av_assert1(pwait->req == -1 && pwait->next == NULL);
++            jb->waited = 1;  // Remember that we had to wait for later scheduling
++
++            pwait->req = val;
++            pwait->next = NULL;
++            if (pstate->first == NULL)
++                pstate->first = pwait;
++            else
++                pstate->last->next = pwait;
++            pstate->last = pwait;
++            sem = &pwait->sem;
++        }
++        pthread_mutex_unlock(&pstate->lock);
++
++        if (sem != NULL) {
++            rpi_sem_wait(sem);
++        }
++    }
++}
++
++void ff_hevc_rpi_progress_signal_field(HEVCRpiContext * const s, const int val, const int field)
++{
++    HEVCRpiFrameProgressState *const pstate = s->progress_states + field;
++
++    ((int *)s->ref->tf.progress->data)[field] = val;
++
++    av_assert0(pthread_mutex_lock(&pstate->lock) == 0);
++    {
++        HEVCRpiFrameProgressWait ** ppwait = &pstate->first;
++        HEVCRpiFrameProgressWait * pwait;
++
++        while ((pwait = *ppwait) != NULL) {
++            if (pwait->req > val)
++            {
++                ppwait = &pwait->next;
++                pstate->last = pwait;
++            }
++            else
++            {
++                *ppwait = pwait->next;
++                pwait->req = -1;
++                pwait->next = NULL;
++                sem_post(&pwait->sem);
++            }
++        }
++    }
++    pthread_mutex_unlock(&pstate->lock);
++}
++
++static void ff_hevc_rpi_progress_init_state(HEVCRpiFrameProgressState * const pstate)
++{
++    pstate->first = NULL;
++    pstate->last = NULL;
++    pthread_mutex_init(&pstate->lock, NULL);
++}
++
++static void ff_hevc_rpi_progress_init_wait(HEVCRpiFrameProgressWait * const pwait)
++{
++    pwait->req = -1;
++    pwait->next = NULL;
++    sem_init(&pwait->sem, 0, 0);
++}
++
++static void ff_hevc_rpi_progress_kill_state(HEVCRpiFrameProgressState * const pstate)
++{
++    av_assert1(pstate->first == NULL);
++    pthread_mutex_destroy(&pstate->lock);
++}
++
++static void ff_hevc_rpi_progress_kill_wait(HEVCRpiFrameProgressWait * const pwait)
++{
++    sem_destroy(&pwait->sem);
++}
++
++
++/**
++ * NOTE: Each function hls_foo correspond to the function foo in the
++ * specification (HLS stands for High Level Syntax).
++ */
++
++/**
++ * Section 5.7
++ */
++
++// Realloc the entry point arrays
++static int alloc_entry_points(RpiSliceHeader * const sh, const int n)
++{
++    if (sh->entry_point_offset == NULL || n > sh->offsets_allocated || n == 0)
++    {
++        // Round up alloc to multiple of 32
++        int a = (n + 31) & ~31;
++
++        // We don't care about the previous contents so probably fastest to simply discard
++        av_freep(&sh->entry_point_offset);
++        av_freep(&sh->offset);
++        av_freep(&sh->size);
++
++        if (a != 0)
++        {
++            sh->entry_point_offset = av_malloc_array(a, sizeof(unsigned));
++            sh->offset = av_malloc_array(a, sizeof(int));
++            sh->size = av_malloc_array(a, sizeof(int));
++
++            if (!sh->entry_point_offset || !sh->offset || !sh->size) {
++                sh->num_entry_point_offsets = 0;
++                sh->offsets_allocated = 0;
++                return AVERROR(ENOMEM);
++            }
++        }
++
++        sh->offsets_allocated = a;
++    }
++
++    return 0;
++}
++
++/* free everything allocated  by pic_arrays_init() */
++static void pic_arrays_free(HEVCRpiContext *s)
++{
++    av_freep(&s->sao);
++    av_freep(&s->deblock);
++
++    av_freep(&s->cabac_stash_up);
++    s->cabac_stash_left = NULL;  // freed with _up
++
++    av_freep(&s->mvf_up);
++    av_freep(&s->mvf_left);
++
++    av_freep(&s->is_pcm);
++    av_freep(&s->is_intra_store);
++    s->is_intra = NULL;
++    av_freep(&s->rpl_tab);
++    s->rpl_tab_size = 0;
++
++    av_freep(&s->qp_y_tab);
++    av_freep(&s->tab_slice_address);
++    av_freep(&s->filter_slice_edges);
++
++    av_freep(&s->bs_horizontal);
++    s->bs_vertical = NULL;  // freed with H
++    av_freep(&s->bsf_stash_left);
++    av_freep(&s->bsf_stash_up);
++
++    av_freep(&s->rpl_up);
++    av_freep(&s->rpl_left);
++
++    alloc_entry_points(&s->sh, 0);
++
++    av_buffer_pool_uninit(&s->col_mvf_pool);
++}
++
++/* allocate arrays that depend on frame dimensions */
++static int pic_arrays_init(HEVCRpiContext * const s, const HEVCRpiSPS * const sps)
++{
++    const unsigned int log2_min_cb_size = sps->log2_min_cb_size;
++    const unsigned int width            = sps->width;
++    const unsigned int height           = sps->height;
++    const unsigned int pic_size_in_cb   = ((width  >> log2_min_cb_size) + 1) *
++                           ((height >> log2_min_cb_size) + 1);
++    const unsigned int ctb_count        = sps->ctb_size;
++
++    {
++        unsigned int w = ((width + HEVC_RPI_BS_STRIDE1_PEL_MASK) & ~HEVC_RPI_BS_STRIDE1_PEL_MASK);
++        unsigned int h = ((height + 15) & ~15);
++
++        s->bs_stride2 = h >> HEVC_RPI_BS_COL_BYTES_SHR; // Column size
++        s->bs_size = s->bs_stride2 * (w >> HEVC_RPI_BS_STRIDE1_PEL_SHIFT); // col size * cols
++    }
++
++    s->sao           = av_mallocz(ctb_count * sizeof(*s->sao) + 8); // Our sao code overreads this array slightly
++    s->deblock       = av_mallocz_array(ctb_count, sizeof(*s->deblock));
++    if (!s->sao || !s->deblock)
++        goto fail;
++
++    s->cabac_stash_up  = av_malloc((((width + 63) & ~63) >> 3) + (((height + 63) & ~63) >> 3));
++    s->cabac_stash_left = s->cabac_stash_up + (((width + 63) & ~63) >> 3);
++    if (s->cabac_stash_up == NULL)
++        goto fail;
++
++    // Round width up to max ctb size
++    s->mvf_up = av_malloc((((width + 63) & ~63) >> LOG2_MIN_PU_SIZE) * sizeof(*s->mvf_up));
++    // * Only needed if we have H tiles
++    s->mvf_left = av_malloc((((height + 63) & ~63) >> LOG2_MIN_PU_SIZE) * sizeof(*s->mvf_up));
++
++    // We can overread by 1 line & one byte in deblock so alloc & zero
++    // We don't need to zero the extra @ start of frame as it will never be
++    // written
++    s->is_pcm   = av_mallocz(sps->pcm_width * (sps->pcm_height + 1) + 1);
++    s->is_intra_store = av_mallocz(sps->pcm_width * (sps->pcm_height + 1) + 1);
++    if (s->is_pcm == NULL || s->is_intra_store == NULL)
++        goto fail;
++
++    s->filter_slice_edges = av_mallocz(ctb_count);
++    s->tab_slice_address  = av_malloc_array(ctb_count,
++                                      sizeof(*s->tab_slice_address));
++    s->qp_y_tab           = av_malloc_array(pic_size_in_cb,
++                                      sizeof(*s->qp_y_tab));
++    if (!s->qp_y_tab || !s->filter_slice_edges || !s->tab_slice_address)
++        goto fail;
++
++    s->bs_horizontal = av_mallocz(s->bs_size * 2);
++    s->bs_vertical   = s->bs_horizontal + s->bs_size;
++    if (s->bs_horizontal == NULL)
++        goto fail;
++
++    s->rpl_up = av_mallocz(sps->ctb_width * sizeof(*s->rpl_up));
++    s->rpl_left = av_mallocz(sps->ctb_height * sizeof(*s->rpl_left));
++    if (s->rpl_left == NULL || s->rpl_up == NULL)
++        goto fail;
++
++    if ((s->bsf_stash_left = av_mallocz(((height + 63) & ~63) >> 4)) == NULL ||
++        (s->bsf_stash_up   = av_mallocz(((width + 63) & ~63) >> 4)) == NULL)
++        goto fail;
++
++    s->col_mvf_stride = (width + 15) >> 4;
++    s->col_mvf_pool = av_buffer_pool_init(((height + 15) >> 4) * s->col_mvf_stride * sizeof(ColMvField),
++                                          av_buffer_allocz);
++    if (s->col_mvf_pool == NULL)
++        goto fail;
++
++    return 0;
++
++fail:
++    pic_arrays_free(s);
++    return AVERROR(ENOMEM);
++}
++
++static void default_pred_weight_table(HEVCRpiContext * const s)
++{
++  unsigned int i;
++  const unsigned int wt = 1 << QPU_MC_DENOM;
++  s->sh.luma_log2_weight_denom = 0;
++  s->sh.chroma_log2_weight_denom = 0;
++  for (i = 0; i < s->sh.nb_refs[L0]; i++) {
++      s->sh.luma_weight_l0[i] = wt;
++      s->sh.luma_offset_l0[i] = 0;
++      s->sh.chroma_weight_l0[i][0] = wt;
++      s->sh.chroma_weight_l0[i][1] = wt;
++      s->sh.chroma_offset_l0[i][0] = 0;
++      s->sh.chroma_offset_l0[i][1] = 0;
++  }
++  for (i = 0; i < s->sh.nb_refs[L1]; i++) {
++      s->sh.luma_weight_l1[i] = wt;
++      s->sh.luma_offset_l1[i] = 0;
++      s->sh.chroma_weight_l1[i][0] = wt;
++      s->sh.chroma_weight_l1[i][1] = wt;
++      s->sh.chroma_offset_l1[i][0] = 0;
++      s->sh.chroma_offset_l1[i][1] = 0;
++  }
++}
++
++static int get_weights(HEVCRpiContext * const s, GetBitContext * const gb,
++                       const unsigned int refs,
++                       int16_t * luma_weight,   int16_t * luma_offset,
++                       int16_t * chroma_weight, int16_t * chroma_offset)
++{
++    unsigned int luma_flags;
++    unsigned int chroma_flags;
++    unsigned int i;
++    const unsigned int wp_offset_bd_shift = s->ps.sps->high_precision_offsets_enabled_flag ? 0 : (s->ps.sps->bit_depth - 8);
++    const int wp_offset_half_range = s->ps.sps->wp_offset_half_range;
++    const unsigned int luma_weight_base    = 1 << QPU_MC_DENOM;
++    const unsigned int chroma_weight_base  = 1 << QPU_MC_DENOM;
++    const unsigned int luma_weight_shift   = (QPU_MC_DENOM - s->sh.luma_log2_weight_denom);
++    const unsigned int chroma_weight_shift = (QPU_MC_DENOM - s->sh.chroma_log2_weight_denom);
++
++    if (refs == 0)
++        return 0;
++
++    luma_flags = get_bits(gb, refs);
++    chroma_flags = ctx_cfmt(s) == 0 ? 0 : get_bits(gb, refs);
++    i = 1 << (refs - 1);
++
++    do
++    {
++        if ((luma_flags & i) != 0)
++        {
++            const int delta_weight = get_se_golomb(gb);
++            const int offset = get_se_golomb(gb);
++            if (delta_weight < -128 || delta_weight > 127 ||
++                offset < -wp_offset_half_range || offset >= wp_offset_half_range)
++            {
++                return AVERROR_INVALIDDATA;
++            }
++            *luma_weight++ = luma_weight_base + (delta_weight << luma_weight_shift);
++            *luma_offset++ = offset << wp_offset_bd_shift;
++        }
++        else
++        {
++            *luma_weight++ = luma_weight_base;
++            *luma_offset++ = 0;
++        }
++
++        if ((chroma_flags & i) != 0)
++        {
++            unsigned int j;
++            for (j = 0; j != 2; ++j)
++            {
++                const int delta_weight = get_se_golomb(gb);
++                const int delta_offset = get_se_golomb(gb);
++
++                if (delta_weight < -128 || delta_weight > 127 ||
++                    delta_offset < -4 * wp_offset_half_range || delta_offset >= 4 * wp_offset_half_range)
++                {
++                    return AVERROR_INVALIDDATA;
++                }
++
++                *chroma_weight++ = chroma_weight_base + (delta_weight << chroma_weight_shift);
++                *chroma_offset++ = av_clip(
++                    wp_offset_half_range + delta_offset -
++                        ((wp_offset_half_range * ((1 << s->sh.chroma_log2_weight_denom) + delta_weight)) >> s->sh.chroma_log2_weight_denom),
++                    -wp_offset_half_range, wp_offset_half_range - 1) << wp_offset_bd_shift;
++            }
++        }
++        else
++        {
++            *chroma_weight++ = chroma_weight_base;
++            *chroma_weight++ = chroma_weight_base;
++            *chroma_offset++ = 0;
++            *chroma_offset++ = 0;
++        }
++    } while ((i >>= 1) != 0);
++
++    return 0;
++}
++
++static int pred_weight_table(HEVCRpiContext *s, GetBitContext *gb)
++{
++    int err;
++    const unsigned int luma_log2_weight_denom = get_ue_golomb_long(gb);
++    const unsigned int chroma_log2_weight_denom = (ctx_cfmt(s) == 0) ? 0 : luma_log2_weight_denom + get_se_golomb(gb);
++
++    if (luma_log2_weight_denom > 7 ||
++        chroma_log2_weight_denom > 7)
++    {
++        av_log(s->avctx, AV_LOG_ERROR, "Invalid prediction weight denom: luma=%d, chroma=%d\n",
++               luma_log2_weight_denom, chroma_log2_weight_denom);
++        return AVERROR_INVALIDDATA;
++    }
++
++    s->sh.luma_log2_weight_denom = luma_log2_weight_denom;
++    s->sh.chroma_log2_weight_denom = chroma_log2_weight_denom;
++
++    if ((err = get_weights(s, gb, s->sh.nb_refs[L0],
++                s->sh.luma_weight_l0,      s->sh.luma_offset_l0,
++                s->sh.chroma_weight_l0[0], s->sh.chroma_offset_l0[0])) != 0 ||
++        (err = get_weights(s, gb, s->sh.nb_refs[L1],
++                s->sh.luma_weight_l1,      s->sh.luma_offset_l1,
++                s->sh.chroma_weight_l1[0], s->sh.chroma_offset_l1[0])) != 0)
++    {
++        av_log(s->avctx, AV_LOG_ERROR, "Invalid prediction weight or offset\n");
++        return err;
++    }
++
++    return 0;
++}
++
++static int decode_lt_rps(HEVCRpiContext *s, LongTermRPS *rps, GetBitContext *gb)
++{
++    const HEVCRpiSPS *sps = s->ps.sps;
++    int max_poc_lsb    = 1 << sps->log2_max_poc_lsb;
++    int prev_delta_msb = 0;
++    unsigned int nb_sps = 0, nb_sh;
++    int i;
++
++    rps->nb_refs = 0;
++    if (!sps->long_term_ref_pics_present_flag)
++        return 0;
++
++    if (sps->num_long_term_ref_pics_sps > 0)
++        nb_sps = get_ue_golomb_long(gb);
++    nb_sh = get_ue_golomb_long(gb);
++
++    if (nb_sps > sps->num_long_term_ref_pics_sps)
++        return AVERROR_INVALIDDATA;
++    if (nb_sh + (uint64_t)nb_sps > FF_ARRAY_ELEMS(rps->poc))
++        return AVERROR_INVALIDDATA;
++
++    rps->nb_refs = nb_sh + nb_sps;
++
++    for (i = 0; i < rps->nb_refs; i++) {
++        uint8_t delta_poc_msb_present;
++
++        if (i < nb_sps) {
++            uint8_t lt_idx_sps = 0;
++
++            if (sps->num_long_term_ref_pics_sps > 1)
++                lt_idx_sps = get_bits(gb, av_ceil_log2(sps->num_long_term_ref_pics_sps));
++
++            rps->poc[i]  = sps->lt_ref_pic_poc_lsb_sps[lt_idx_sps];
++            rps->used[i] = sps->used_by_curr_pic_lt_sps_flag[lt_idx_sps];
++        } else {
++            rps->poc[i]  = get_bits(gb, sps->log2_max_poc_lsb);
++            rps->used[i] = get_bits1(gb);
++        }
++
++        delta_poc_msb_present = get_bits1(gb);
++        if (delta_poc_msb_present) {
++            int64_t delta = get_ue_golomb_long(gb);
++            int64_t poc;
++
++            if (i && i != nb_sps)
++                delta += prev_delta_msb;
++
++            poc = rps->poc[i] + s->poc - delta * max_poc_lsb - s->sh.pic_order_cnt_lsb;
++            if (poc != (int32_t)poc)
++                return AVERROR_INVALIDDATA;
++            rps->poc[i] = poc;
++            prev_delta_msb = delta;
++        }
++    }
++
++    return 0;
++}
++
++static void export_stream_params(AVCodecContext *avctx, const HEVCRpiParamSets *ps,
++                                 const HEVCRpiSPS *sps)
++{
++    const HEVCRpiVPS *vps = (const HEVCRpiVPS*)ps->vps_list[sps->vps_id]->data;
++    const HEVCRpiWindow *ow = &sps->output_window;
++    unsigned int num = 0, den = 0;
++
++    avctx->pix_fmt             = sps->pix_fmt;
++    avctx->coded_width         = sps->width;
++    avctx->coded_height        = sps->height;
++    avctx->width               = sps->width  - ow->left_offset - ow->right_offset;
++    avctx->height              = sps->height - ow->top_offset  - ow->bottom_offset;
++    avctx->has_b_frames        = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics;
++    avctx->profile             = sps->ptl.general_ptl.profile_idc;
++    avctx->level               = sps->ptl.general_ptl.level_idc;
++
++    ff_set_sar(avctx, sps->vui.sar);
++
++    if (sps->vui.video_signal_type_present_flag)
++        avctx->color_range = sps->vui.video_full_range_flag ? AVCOL_RANGE_JPEG
++                                                            : AVCOL_RANGE_MPEG;
++    else
++        avctx->color_range = AVCOL_RANGE_MPEG;
++
++    if (sps->vui.colour_description_present_flag) {
++        avctx->color_primaries = sps->vui.colour_primaries;
++        avctx->color_trc       = sps->vui.transfer_characteristic;
++        avctx->colorspace      = sps->vui.matrix_coeffs;
++    } else {
++        avctx->color_primaries = AVCOL_PRI_UNSPECIFIED;
++        avctx->color_trc       = AVCOL_TRC_UNSPECIFIED;
++        avctx->colorspace      = AVCOL_SPC_UNSPECIFIED;
++    }
++
++    if (vps->vps_timing_info_present_flag) {
++        num = vps->vps_num_units_in_tick;
++        den = vps->vps_time_scale;
++    } else if (sps->vui.vui_timing_info_present_flag) {
++        num = sps->vui.vui_num_units_in_tick;
++        den = sps->vui.vui_time_scale;
++    }
++
++    if (num != 0 && den != 0)
++        av_reduce(&avctx->framerate.den, &avctx->framerate.num,
++                  num, den, 1 << 30);
++}
++
++static enum AVPixelFormat get_format(HEVCRpiContext *s, const HEVCRpiSPS *sps)
++{
++    enum AVPixelFormat pix_fmts[4], *fmt = pix_fmts;
++
++    // Admit to no h/w formats
++
++    *fmt++ = sps->pix_fmt;
++    *fmt = AV_PIX_FMT_NONE;
++
++    return pix_fmts[0] == AV_PIX_FMT_NONE ? AV_PIX_FMT_NONE: ff_thread_get_format(s->avctx, pix_fmts);
++}
++
++static int is_sps_supported(const HEVCRpiSPS * const sps)
++{
++    return av_rpi_is_sand_format(sps->pix_fmt) &&
++           sps->width <= HEVC_RPI_MAX_WIDTH &&
++           sps->height <= HEVC_RPI_MAX_HEIGHT;
++}
++
++static int set_sps(HEVCRpiContext * const s, const HEVCRpiSPS * const sps,
++                   const enum AVPixelFormat pix_fmt)
++{
++    int ret;
++
++    pic_arrays_free(s);
++    s->ps.sps = NULL;
++    s->ps.vps = NULL;
++
++    if (sps == NULL)
++        return 0;
++
++    if (!is_sps_supported(sps))
++        return AVERROR_DECODER_NOT_FOUND;
++
++    ret = pic_arrays_init(s, sps);
++    if (ret < 0)
++        goto fail;
++
++    export_stream_params(s->avctx, &s->ps, sps);
++
++    s->avctx->pix_fmt = pix_fmt;
++
++    ff_hevc_rpi_pred_init(&s->hpc,     sps->bit_depth);
++    ff_hevc_rpi_dsp_init (&s->hevcdsp, sps->bit_depth);
++
++    // * We don't support cross_component_prediction_enabled_flag but as that
++    //   must be 0 unless we have 4:4:4 there is no point testing for it as we
++    //   only deal with sand which is never 4:4:4
++    //   [support wouldn't be hard]
++
++    rpi_hevc_qpu_set_fns(s, sps->bit_depth);
++
++    av_freep(&s->sao_pixel_buffer_h[0]);
++    av_freep(&s->sao_pixel_buffer_v[0]);
++
++    if (sps->sao_enabled)
++    {
++        const unsigned int c_count = (ctx_cfmt(s) != 0) ? 3 : 1;
++        unsigned int c_idx;
++        size_t vsize[3] = {0};
++        size_t hsize[3] = {0};
++
++        for(c_idx = 0; c_idx < c_count; c_idx++) {
++            int w = sps->width >> ctx_hshift(s, c_idx);
++            int h = sps->height >> ctx_vshift(s, c_idx);
++            // ctb height & width are a min of 8 so this must a multiple of 16
++            // so no point rounding up!
++            hsize[c_idx] = (w * 2 * sps->ctb_height) << sps->pixel_shift;
++            vsize[c_idx] = (h * 2 * sps->ctb_width) << sps->pixel_shift;
++        }
++
++        // Allocate as a single lump so we can extend h[1] & v[1] into h[2] & v[2]
++        // when we have plaited chroma
++        s->sao_pixel_buffer_h[0] = av_malloc(hsize[0] + hsize[1] + hsize[2]);
++        s->sao_pixel_buffer_v[0] = av_malloc(vsize[0] + vsize[1] + vsize[2]);
++        s->sao_pixel_buffer_h[1] = s->sao_pixel_buffer_h[0] + hsize[0];
++        s->sao_pixel_buffer_h[2] = s->sao_pixel_buffer_h[1] + hsize[1];
++        s->sao_pixel_buffer_v[1] = s->sao_pixel_buffer_v[0] + vsize[0];
++        s->sao_pixel_buffer_v[2] = s->sao_pixel_buffer_v[1] + vsize[1];
++    }
++
++    s->ps.sps = sps;
++    s->ps.vps = (HEVCRpiVPS*) s->ps.vps_list[s->ps.sps->vps_id]->data;
++
++    return 0;
++
++fail:
++    pic_arrays_free(s);
++    s->ps.sps = NULL;
++    return ret;
++}
++
++static inline int qp_offset_valid(const int qp_offset)
++{
++    return qp_offset >= -12 && qp_offset <= 12;
++}
++
++static int hls_slice_header(HEVCRpiContext * const s)
++{
++    GetBitContext * const gb = &s->HEVClc->gb;
++    RpiSliceHeader * const sh   = &s->sh;
++    int i, ret;
++
++    // Coded parameters
++    sh->first_slice_in_pic_flag = get_bits1(gb);
++    if ((IS_IDR(s) || IS_BLA(s)) && sh->first_slice_in_pic_flag) {
++        s->seq_decode = (s->seq_decode + 1) & 0xff;
++        s->max_ra     = INT_MAX;
++        if (IS_IDR(s))
++            ff_hevc_rpi_clear_refs(s);
++    }
++    sh->no_output_of_prior_pics_flag = 0;
++    if (IS_IRAP(s))
++        sh->no_output_of_prior_pics_flag = get_bits1(gb);
++
++    sh->pps_id = get_ue_golomb_long(gb);
++    if (sh->pps_id >= HEVC_MAX_PPS_COUNT || !s->ps.pps_list[sh->pps_id]) {
++        av_log(s->avctx, AV_LOG_ERROR, "PPS id out of range: %d\n", sh->pps_id);
++        return AVERROR_INVALIDDATA;
++    }
++    if (!sh->first_slice_in_pic_flag &&
++        s->ps.pps != (HEVCRpiPPS*)s->ps.pps_list[sh->pps_id]->data) {
++        av_log(s->avctx, AV_LOG_ERROR, "PPS changed between slices.\n");
++        return AVERROR_INVALIDDATA;
++    }
++    s->ps.pps = (HEVCRpiPPS*)s->ps.pps_list[sh->pps_id]->data;
++    if (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos == 1)
++        sh->no_output_of_prior_pics_flag = 1;
++
++    if (s->ps.sps != (HEVCRpiSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data) {
++        const HEVCRpiSPS *sps = (HEVCRpiSPS*)s->ps.sps_list[s->ps.pps->sps_id]->data;
++        const HEVCRpiSPS *last_sps = s->ps.sps;
++        enum AVPixelFormat pix_fmt;
++
++        if (last_sps && IS_IRAP(s) && s->nal_unit_type != HEVC_NAL_CRA_NUT) {
++            if (sps->width != last_sps->width || sps->height != last_sps->height ||
++                sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering !=
++                last_sps->temporal_layer[last_sps->max_sub_layers - 1].max_dec_pic_buffering)
++                sh->no_output_of_prior_pics_flag = 0;
++        }
++        ff_hevc_rpi_clear_refs(s);
++
++        ret = set_sps(s, sps, sps->pix_fmt);
++        if (ret < 0)
++            return ret;
++
++        pix_fmt = get_format(s, sps);
++        if (pix_fmt < 0)
++            return pix_fmt;
++
++//        ret = set_sps(s, sps, pix_fmt);
++//        if (ret < 0)
++//            return ret;
++
++        s->avctx->pix_fmt = pix_fmt;
++
++        s->seq_decode = (s->seq_decode + 1) & 0xff;
++        s->max_ra     = INT_MAX;
++    }
++
++    sh->dependent_slice_segment_flag = 0;
++    if (!sh->first_slice_in_pic_flag) {
++        int slice_address_length;
++
++        if (s->ps.pps->dependent_slice_segments_enabled_flag)
++            sh->dependent_slice_segment_flag = get_bits1(gb);
++
++        slice_address_length = av_ceil_log2(s->ps.sps->ctb_size);
++        sh->slice_segment_addr = get_bitsz(gb, slice_address_length);
++        if (sh->slice_segment_addr >= s->ps.sps->ctb_size) {
++            av_log(s->avctx, AV_LOG_ERROR,
++                   "Invalid slice segment address: %u.\n",
++                   sh->slice_segment_addr);
++            return AVERROR_INVALIDDATA;
++        }
++
++        if (!sh->dependent_slice_segment_flag) {
++            sh->slice_addr = sh->slice_segment_addr;
++            s->slice_idx++;
++        }
++    } else {
++        sh->slice_segment_addr = sh->slice_addr = 0;
++        s->slice_idx           = 0;
++        s->slice_initialized   = 0;
++    }
++
++    if (!sh->dependent_slice_segment_flag) {
++        s->slice_initialized = 0;
++
++        for (i = 0; i < s->ps.pps->num_extra_slice_header_bits; i++)
++            skip_bits(gb, 1);  // slice_reserved_undetermined_flag[]
++
++        sh->slice_type = get_ue_golomb_long(gb);
++        if (!(sh->slice_type == HEVC_SLICE_I ||
++              sh->slice_type == HEVC_SLICE_P ||
++              sh->slice_type == HEVC_SLICE_B)) {
++            av_log(s->avctx, AV_LOG_ERROR, "Unknown slice type: %d.\n",
++                   sh->slice_type);
++            return AVERROR_INVALIDDATA;
++        }
++        if (IS_IRAP(s) && sh->slice_type != HEVC_SLICE_I) {
++            av_log(s->avctx, AV_LOG_ERROR, "Inter slices in an IRAP frame.\n");
++            return AVERROR_INVALIDDATA;
++        }
++
++        // when flag is not present, picture is inferred to be output
++        sh->pic_output_flag = 1;
++        if (s->ps.pps->output_flag_present_flag)
++            sh->pic_output_flag = get_bits1(gb);
++
++        if (s->ps.sps->separate_colour_plane_flag)
++            sh->colour_plane_id = get_bits(gb, 2);
++
++        if (!IS_IDR(s)) {
++            int poc, pos;
++
++            sh->pic_order_cnt_lsb = get_bits(gb, s->ps.sps->log2_max_poc_lsb);
++            poc = ff_hevc_rpi_compute_poc(s->ps.sps, s->pocTid0, sh->pic_order_cnt_lsb, s->nal_unit_type);
++            if (!sh->first_slice_in_pic_flag && poc != s->poc) {
++                av_log(s->avctx, AV_LOG_WARNING,
++                       "Ignoring POC change between slices: %d -> %d\n", s->poc, poc);
++                if (s->avctx->err_recognition & AV_EF_EXPLODE)
++                    return AVERROR_INVALIDDATA;
++                poc = s->poc;
++            }
++            s->poc = poc;
++
++            sh->short_term_ref_pic_set_sps_flag = get_bits1(gb);
++            pos = get_bits_left(gb);
++            if (!sh->short_term_ref_pic_set_sps_flag) {
++                ret = ff_hevc_rpi_decode_short_term_rps(gb, s->avctx, &sh->slice_rps, s->ps.sps, 1);
++                if (ret < 0)
++                    return ret;
++
++                sh->short_term_rps = &sh->slice_rps;
++            } else {
++                int numbits, rps_idx;
++
++                if (!s->ps.sps->nb_st_rps) {
++                    av_log(s->avctx, AV_LOG_ERROR, "No ref lists in the SPS.\n");
++                    return AVERROR_INVALIDDATA;
++                }
++
++                numbits = av_ceil_log2(s->ps.sps->nb_st_rps);
++                rps_idx = numbits > 0 ? get_bits(gb, numbits) : 0;
++                sh->short_term_rps = &s->ps.sps->st_rps[rps_idx];
++            }
++            sh->short_term_ref_pic_set_size = pos - get_bits_left(gb);
++
++            pos = get_bits_left(gb);
++            ret = decode_lt_rps(s, &sh->long_term_rps, gb);
++            if (ret < 0) {
++                av_log(s->avctx, AV_LOG_WARNING, "Invalid long term RPS.\n");
++                if (s->avctx->err_recognition & AV_EF_EXPLODE)
++                    return AVERROR_INVALIDDATA;
++            }
++            sh->long_term_ref_pic_set_size = pos - get_bits_left(gb);
++
++            if (s->ps.sps->sps_temporal_mvp_enabled_flag)
++                sh->slice_temporal_mvp_enabled_flag = get_bits1(gb);
++            else
++                sh->slice_temporal_mvp_enabled_flag = 0;
++        } else {
++            s->sh.short_term_rps = NULL;
++            s->poc               = 0;
++        }
++
++        /* 8.3.1 */
++        if (sh->first_slice_in_pic_flag && s->temporal_id == 0 &&
++            s->nal_unit_type != HEVC_NAL_TRAIL_N &&
++            s->nal_unit_type != HEVC_NAL_TSA_N   &&
++            s->nal_unit_type != HEVC_NAL_STSA_N  &&
++            s->nal_unit_type != HEVC_NAL_RADL_N  &&
++            s->nal_unit_type != HEVC_NAL_RADL_R  &&
++            s->nal_unit_type != HEVC_NAL_RASL_N  &&
++            s->nal_unit_type != HEVC_NAL_RASL_R)
++            s->pocTid0 = s->poc;
++
++        if (s->ps.sps->sao_enabled) {
++            sh->slice_sample_adaptive_offset_flag[0] = get_bits1(gb);
++            if (ctx_cfmt(s) != 0) {
++                sh->slice_sample_adaptive_offset_flag[1] =
++                sh->slice_sample_adaptive_offset_flag[2] = get_bits1(gb);
++            }
++        } else {
++            sh->slice_sample_adaptive_offset_flag[0] = 0;
++            sh->slice_sample_adaptive_offset_flag[1] = 0;
++            sh->slice_sample_adaptive_offset_flag[2] = 0;
++        }
++
++        sh->nb_refs[L0] = sh->nb_refs[L1] = 0;
++        if (sh->slice_type == HEVC_SLICE_P || sh->slice_type == HEVC_SLICE_B) {
++            int nb_refs;
++
++            sh->nb_refs[L0] = s->ps.pps->num_ref_idx_l0_default_active;
++            if (sh->slice_type == HEVC_SLICE_B)
++                sh->nb_refs[L1] = s->ps.pps->num_ref_idx_l1_default_active;
++
++            if (get_bits1(gb)) { // num_ref_idx_active_override_flag
++                sh->nb_refs[L0] = get_ue_golomb_long(gb) + 1;
++                if (sh->slice_type == HEVC_SLICE_B)
++                    sh->nb_refs[L1] = get_ue_golomb_long(gb) + 1;
++            }
++            if (sh->nb_refs[L0] > HEVC_MAX_REFS || sh->nb_refs[L1] > HEVC_MAX_REFS) {
++                av_log(s->avctx, AV_LOG_ERROR, "Too many refs: %d/%d.\n",
++                       sh->nb_refs[L0], sh->nb_refs[L1]);
++                return AVERROR_INVALIDDATA;
++            }
++
++            sh->rpl_modification_flag[0] = 0;
++            sh->rpl_modification_flag[1] = 0;
++            nb_refs = ff_hevc_rpi_frame_nb_refs(s);
++            if (!nb_refs) {
++                av_log(s->avctx, AV_LOG_ERROR, "Zero refs for a frame with P or B slices.\n");
++                return AVERROR_INVALIDDATA;
++            }
++
++            if (s->ps.pps->lists_modification_present_flag && nb_refs > 1) {
++                sh->rpl_modification_flag[0] = get_bits1(gb);
++                if (sh->rpl_modification_flag[0]) {
++                    for (i = 0; i < sh->nb_refs[L0]; i++)
++                        sh->list_entry_lx[0][i] = get_bits(gb, av_ceil_log2(nb_refs));
++                }
++
++                if (sh->slice_type == HEVC_SLICE_B) {
++                    sh->rpl_modification_flag[1] = get_bits1(gb);
++                    if (sh->rpl_modification_flag[1] == 1)
++                        for (i = 0; i < sh->nb_refs[L1]; i++)
++                            sh->list_entry_lx[1][i] = get_bits(gb, av_ceil_log2(nb_refs));
++                }
++            }
++
++            if (sh->slice_type == HEVC_SLICE_B)
++                sh->mvd_l1_zero_flag = get_bits1(gb);
++
++            if (s->ps.pps->cabac_init_present_flag)
++                sh->cabac_init_flag = get_bits1(gb);
++            else
++                sh->cabac_init_flag = 0;
++
++            sh->collocated_ref_idx = 0;
++            if (sh->slice_temporal_mvp_enabled_flag) {
++                sh->collocated_list = L0;
++                if (sh->slice_type == HEVC_SLICE_B)
++                    sh->collocated_list = !get_bits1(gb);
++
++                if (sh->nb_refs[sh->collocated_list] > 1) {
++                    sh->collocated_ref_idx = get_ue_golomb_long(gb);
++                    if (sh->collocated_ref_idx >= sh->nb_refs[sh->collocated_list]) {
++                        av_log(s->avctx, AV_LOG_ERROR,
++                               "Invalid collocated_ref_idx: %d.\n",
++                               sh->collocated_ref_idx);
++                        return AVERROR_INVALIDDATA;
++                    }
++                }
++            }
++
++            if ((s->ps.pps->weighted_pred_flag   && sh->slice_type == HEVC_SLICE_P) ||
++                (s->ps.pps->weighted_bipred_flag && sh->slice_type == HEVC_SLICE_B))
++            {
++                if ((ret = pred_weight_table(s, gb)) != 0)
++                    return ret;
++            }
++            else
++            {
++                // Give us unit weights
++                default_pred_weight_table(s);
++            }
++
++            sh->max_num_merge_cand = 5 - get_ue_golomb_long(gb);
++            if (sh->max_num_merge_cand < 1 || sh->max_num_merge_cand > 5) {
++                av_log(s->avctx, AV_LOG_ERROR,
++                       "Invalid number of merging MVP candidates: %d.\n",
++                       sh->max_num_merge_cand);
++                return AVERROR_INVALIDDATA;
++            }
++        }
++
++        sh->slice_qp_delta = get_se_golomb(gb);
++
++        if (s->ps.pps->pic_slice_level_chroma_qp_offsets_present_flag) {
++            sh->slice_cb_qp_offset = get_se_golomb(gb);
++            sh->slice_cr_qp_offset = get_se_golomb(gb);
++            if (!qp_offset_valid(sh->slice_cb_qp_offset) ||
++                !qp_offset_valid(s->ps.pps->cb_qp_offset + sh->slice_cb_qp_offset) ||
++                !qp_offset_valid(sh->slice_cr_qp_offset) ||
++                !qp_offset_valid(s->ps.pps->cr_qp_offset + sh->slice_cr_qp_offset))
++            {
++                av_log(s->avctx, AV_LOG_ERROR, "Bad chroma offset (pps:%d/%d; slice=%d/%d\n",
++                       sh->slice_cr_qp_offset, sh->slice_cr_qp_offset,
++                       s->ps.pps->cb_qp_offset, s->ps.pps->cr_qp_offset);
++                return AVERROR_INVALIDDATA;
++            }
++        } else
++        {
++            sh->slice_cb_qp_offset = 0;
++            sh->slice_cr_qp_offset = 0;
++        }
++
++        if (s->ps.pps->chroma_qp_offset_list_enabled_flag)
++            sh->cu_chroma_qp_offset_enabled_flag = get_bits1(gb);
++        else
++            sh->cu_chroma_qp_offset_enabled_flag = 0;
++
++        if (s->ps.pps->deblocking_filter_control_present_flag) {
++            int deblocking_filter_override_flag = 0;
++
++            if (s->ps.pps->deblocking_filter_override_enabled_flag)
++                deblocking_filter_override_flag = get_bits1(gb);
++
++            if (deblocking_filter_override_flag) {
++                sh->disable_deblocking_filter_flag = get_bits1(gb);
++                if (!sh->disable_deblocking_filter_flag) {
++                    int beta_offset_div2 = get_se_golomb(gb);
++                    int tc_offset_div2   = get_se_golomb(gb) ;
++                    if (beta_offset_div2 < -6 || beta_offset_div2 > 6 ||
++                        tc_offset_div2   < -6 || tc_offset_div2   > 6) {
++                        av_log(s->avctx, AV_LOG_ERROR,
++                            "Invalid deblock filter offsets: %d, %d\n",
++                            beta_offset_div2, tc_offset_div2);
++                        return AVERROR_INVALIDDATA;
++                    }
++                    sh->beta_offset = beta_offset_div2 * 2;
++                    sh->tc_offset   =   tc_offset_div2 * 2;
++                }
++            } else {
++                sh->disable_deblocking_filter_flag = s->ps.pps->disable_dbf;
++                sh->beta_offset                    = s->ps.pps->beta_offset;
++                sh->tc_offset                      = s->ps.pps->tc_offset;
++            }
++        } else {
++            sh->disable_deblocking_filter_flag = 0;
++            sh->beta_offset                    = 0;
++            sh->tc_offset                      = 0;
++        }
++
++        if (s->ps.pps->seq_loop_filter_across_slices_enabled_flag &&
++            (sh->slice_sample_adaptive_offset_flag[0] ||
++             sh->slice_sample_adaptive_offset_flag[1] ||
++             !sh->disable_deblocking_filter_flag)) {
++            sh->slice_loop_filter_across_slices_enabled_flag = get_bits1(gb);
++        } else {
++            sh->slice_loop_filter_across_slices_enabled_flag = s->ps.pps->seq_loop_filter_across_slices_enabled_flag;
++        }
++        sh->no_dblk_boundary_flags =
++            (sh->slice_loop_filter_across_slices_enabled_flag ? 0 :
++                BOUNDARY_UPPER_SLICE | BOUNDARY_LEFT_SLICE) |
++            (s->ps.pps->loop_filter_across_tiles_enabled_flag ? 0 :
++                BOUNDARY_UPPER_TILE | BOUNDARY_LEFT_TILE);
++
++
++    } else if (!s->slice_initialized) {
++        av_log(s->avctx, AV_LOG_ERROR, "Independent slice segment missing.\n");
++        return AVERROR_INVALIDDATA;
++    }
++
++    sh->num_entry_point_offsets = 0;
++    sh->offload_wpp = 0;
++    sh->offload_tiles = 0;
++
++    if (s->ps.pps->tiles_enabled_flag || s->ps.pps->entropy_coding_sync_enabled_flag) {
++        unsigned num_entry_point_offsets = get_ue_golomb_long(gb);
++        // It would be possible to bound this tighter but this here is simpler
++        if (num_entry_point_offsets > get_bits_left(gb)) {
++            av_log(s->avctx, AV_LOG_ERROR, "num_entry_point_offsets %d is invalid\n", num_entry_point_offsets);
++            return AVERROR_INVALIDDATA;
++        }
++
++        sh->num_entry_point_offsets = num_entry_point_offsets;
++        if (sh->num_entry_point_offsets > 0) {
++            int offset_len = get_ue_golomb_long(gb) + 1;
++
++            if (offset_len < 1 || offset_len > 32) {
++                sh->num_entry_point_offsets = 0;
++                av_log(s->avctx, AV_LOG_ERROR, "offset_len %d is invalid\n", offset_len);
++                return AVERROR_INVALIDDATA;
++            }
++
++            if ((ret = alloc_entry_points(sh, sh->num_entry_point_offsets)) < 0)
++            {
++                av_log(s->avctx, AV_LOG_ERROR, "Failed to allocate memory\n");
++                return ret;
++            }
++
++            for (i = 0; i < sh->num_entry_point_offsets; i++) {
++                uint32_t val_minus1 = get_bits_long(gb, offset_len);
++                if (val_minus1 > (1 << 28))
++                {
++                    // We can declare offsets of > 2^28 bad without loss of generality
++                    // Will check actual bounds wrt NAL later, but this keeps
++                    // the values within bounds we can deal with easily
++                    av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset_minus1 %d invalid\n", val_minus1);
++                    return AVERROR_INVALIDDATA;
++                }
++                sh->entry_point_offset[i] = val_minus1 + 1; // +1 to get the size
++            }
++
++            // Do we want to offload this
++            if (s->threads_type != 0)
++            {
++                sh->offload_tiles = (!s->ps.pps->tile_wpp_inter_disable || sh->slice_type == HEVC_SLICE_I) &&
++                    s->ps.pps->num_tile_columns > 1;
++                // * We only cope with WPP in a single column
++                //   Probably want to deal with that case as tiles rather than WPP anyway
++                // ?? Not actually sure that the main code deals with WPP + multi-col correctly
++                sh->offload_wpp = s->ps.pps->entropy_coding_sync_enabled_flag &&
++                    s->ps.pps->num_tile_columns == 1;
++            }
++        }
++    }
++
++    if (s->ps.pps->slice_header_extension_present_flag) {
++        unsigned int length = get_ue_golomb_long(gb);
++        if (length*8LL > get_bits_left(gb)) {
++            av_log(s->avctx, AV_LOG_ERROR, "too many slice_header_extension_data_bytes\n");
++            return AVERROR_INVALIDDATA;
++        }
++        for (i = 0; i < length; i++)
++            skip_bits(gb, 8);  // slice_header_extension_data_byte
++    }
++
++    // Inferred parameters
++    sh->slice_qp = 26U + s->ps.pps->pic_init_qp_minus26 + sh->slice_qp_delta;
++    if (sh->slice_qp > 51 ||
++        sh->slice_qp < -s->ps.sps->qp_bd_offset) {
++        av_log(s->avctx, AV_LOG_ERROR,
++               "The slice_qp %d is outside the valid range "
++               "[%d, 51].\n",
++               sh->slice_qp,
++               -s->ps.sps->qp_bd_offset);
++        return AVERROR_INVALIDDATA;
++    }
++
++    if (get_bits_left(gb) < 0) {
++        av_log(s->avctx, AV_LOG_ERROR,
++               "Overread slice header by %d bits\n", -get_bits_left(gb));
++        return AVERROR_INVALIDDATA;
++    }
++
++    s->slice_initialized = 1;
++    return 0;
++}
++
++static void hls_sao_param(const HEVCRpiContext *s, HEVCRpiLocalContext * const lc, const int rx, const int ry)
++{
++    RpiSAOParams * const sao = s->sao + rx + ry * s->ps.sps->ctb_width;
++    int c_idx, i;
++
++    if (s->sh.slice_sample_adaptive_offset_flag[0] ||
++        s->sh.slice_sample_adaptive_offset_flag[1]) {
++        if ((lc->ctb_avail & AVAIL_L) != 0)
++        {
++            const int sao_merge_left_flag = ff_hevc_rpi_sao_merge_flag_decode(lc);
++            if (sao_merge_left_flag) {
++                *sao = sao[-1];
++                return;
++            }
++        }
++        if ((lc->ctb_avail & AVAIL_U) != 0)
++        {
++            const int sao_merge_up_flag = ff_hevc_rpi_sao_merge_flag_decode(lc);
++            if (sao_merge_up_flag) {
++                *sao = sao[-(int)s->ps.sps->ctb_width];
++                return;
++            }
++        }
++    }
++
++    for (c_idx = 0; c_idx < (ctx_cfmt(s) != 0 ? 3 : 1); c_idx++) {
++        const unsigned int log2_sao_offset_scale = c_idx == 0 ? s->ps.pps->log2_sao_offset_scale_luma :
++                                                 s->ps.pps->log2_sao_offset_scale_chroma;
++        int offset_abs[4];
++        char offset_sign[4] = {0};
++
++        if (!s->sh.slice_sample_adaptive_offset_flag[c_idx]) {
++            sao->type_idx[c_idx] = SAO_NOT_APPLIED;
++            continue;
++        }
++
++        if (c_idx == 2) {
++            sao->type_idx[2] = sao->type_idx[1];
++            sao->eo_class[2] = sao->eo_class[1];
++        } else {
++            sao->type_idx[c_idx] = ff_hevc_rpi_sao_type_idx_decode(lc);
++        }
++
++        // ** Could use BY22 here quite plausibly - this is all bypass stuff
++        //    though only per CTB so not very timing critical
++
++        if (sao->type_idx[c_idx] == SAO_NOT_APPLIED)
++            continue;
++
++        for (i = 0; i < 4; i++)
++            offset_abs[i] = ff_hevc_rpi_sao_offset_abs_decode(s, lc);
++
++        if (sao->type_idx[c_idx] == SAO_BAND) {
++            for (i = 0; i < 4; i++) {
++                if (offset_abs[i] != 0)
++                    offset_sign[i] = ff_hevc_rpi_sao_offset_sign_decode(lc);
++            }
++            sao->band_position[c_idx] = ff_hevc_rpi_sao_band_position_decode(lc);
++        } else if (c_idx != 2) {
++            sao->eo_class[c_idx] = ff_hevc_rpi_sao_eo_class_decode(lc);
++        }
++
++        // Inferred parameters
++        sao->offset_val[c_idx][0] = 0;
++        for (i = 0; i < 4; i++) {
++            sao->offset_val[c_idx][i + 1] = offset_abs[i] << log2_sao_offset_scale;
++            if (sao->type_idx[c_idx] == SAO_EDGE) {
++                if (i > 1)
++                    sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
++            } else if (offset_sign[i]) {
++                sao->offset_val[c_idx][i + 1] = -sao->offset_val[c_idx][i + 1];
++            }
++        }
++    }
++}
++
++#if 0
++static int hls_cross_component_pred(HEVCRpiLocalContext * const lc, const int idx) {
++    int log2_res_scale_abs_plus1 = ff_hevc_rpi_log2_res_scale_abs(lc, idx);  // 0..4
++
++    if (log2_res_scale_abs_plus1 !=  0) {
++        int res_scale_sign_flag = ff_hevc_rpi_res_scale_sign_flag(lc, idx);
++        lc->tu.res_scale_val = (1 << (log2_res_scale_abs_plus1 - 1)) *
++                               (1 - 2 * res_scale_sign_flag);
++    } else {
++        lc->tu.res_scale_val = 0;
++    }
++
++
++    return 0;
++}
++#endif
++
++static inline HEVCPredCmd * rpi_new_intra_cmd(HEVCRpiJob * const jb)
++{
++    return jb->intra.cmds + jb->intra.n++;
++}
++
++#define A0(x, y, U, L, UL, UR, DL) \
++    [(x)+(y)*16] = (((U) ? AVAIL_U : 0) | ((L) ? AVAIL_L : 0) | ((UL) ? AVAIL_UL : 0) | ((UR) ? AVAIL_UR : 0) | ((DL) ? AVAIL_DL : 0))
++
++#define A1(x, y, U, L, UL, UR, DL) \
++    A0((x) + 0, (y) + 0, (U),  (L),  (UL), (U),  (L) ),  A0((x) + 1, (y) + 0, (U),   1,   (U),  (UR),  0  ),\
++    A0((x) + 0, (y) + 1,  1,   (L),  (L),   1,   (DL)),  A0((x) + 1, (y) + 1,  1,    1,    1,    0,    0  )
++
++#define A2(x, y, U, L, UL, UR, DL) \
++    A1((x) + 0, (y) + 0, (U),  (L),  (UL), (U),  (L) ),  A1((x) + 2, (y) + 0, (U),   1,   (U),  (UR),  0  ),\
++    A1((x) + 0, (y) + 2,  1,   (L),  (L),   1,   (DL)),  A1((x) + 2, (y) + 2,  1,    1,    1,    0,    0  )
++
++#define A3(x, y, U, L, UL, UR, DL) \
++    A2((x) + 0, (y) + 0, (U),  (L),  (UL), (U),  (L) ),  A2((x) + 4, (y) + 0, (U),   1,   (U),  (UR),  0  ),\
++    A2((x) + 0, (y) + 4,  1,   (L),  (L),   1,   (DL)),  A2((x) + 4, (y) + 4,  1,    1,    1,    0,    0  )
++
++#define A4(x, y, U, L, UL, UR, DL) \
++    A3((x) + 0, (y) + 0, (U),  (L),  (UL), (U),  (L) ),  A3((x) + 8, (y) + 0, (U),   1,   (U),  (UR),  0  ),\
++    A3((x) + 0, (y) + 8,  1,   (L),  (L),   1,   (DL)),  A3((x) + 8, (y) + 8,  1,    1,    1,    0,    0  )
++
++static const uint8_t tb_flags[16 * 16] = {A4(0, 0, 0, 0, 0, 0, 0)};
++
++unsigned int ff_hevc_rpi_tb_avail_flags(
++    const HEVCRpiContext * const s, const HEVCRpiLocalContext * const lc,
++    const unsigned int x, const unsigned int y, const unsigned int w, const unsigned int h)
++{
++    const unsigned int ctb_mask = ~0U << s->ps.sps->log2_ctb_size;
++    const unsigned int tb_x = x & ~ctb_mask;
++    const unsigned int tb_y = y & ~ctb_mask;
++    const unsigned int ctb_avail = lc->ctb_avail;
++
++    const uint8_t * const tb_f = tb_flags + (tb_x >> 2) + (tb_y >> 2) * 16;
++
++    unsigned int f = (ctb_avail | tb_f[0]) & (AVAIL_L | AVAIL_U | AVAIL_UL);
++
++    // This deals with both the U & L edges
++    if ((tb_x | tb_y) != 0 && (~f & (AVAIL_L | AVAIL_U)) == 0)
++        f |= AVAIL_UL;
++
++    if (x + w < lc->end_of_ctb_x)
++        f |= (tb_y == 0 ? ctb_avail >> (AVAIL_S_U - AVAIL_S_UR) : tb_f[(w - 1) >> 2]) & AVAIL_UR;
++    else if (tb_y == 0)
++        f |= (ctb_avail & AVAIL_UR);
++#if AVAIL_S_U - AVAIL_S_UR < 0
++#error Shift problem
++#endif
++
++    // Never any D if Y beyond eoctb
++    if (y + h < lc->end_of_ctb_y)
++        f |= (tb_x == 0 ? ctb_avail << (AVAIL_S_DL - AVAIL_S_L) : tb_f[((h - 1) >> 2) * 16]) & AVAIL_DL;
++#if AVAIL_S_DL - AVAIL_S_L < 0
++#error Shift problem
++#endif
++
++//    printf("(%#x, %#x): %dx%d ca=%02x, ful=%02x, ftr=%02x, fdl=%02x, eox=%#x, eoy=%#x\n", x, y, w, h,
++//           lc->ctb_avail, tb_f[0], tb_f[(w - 1) >> 2], tb_f[((h - 1) >> 2) * 16],
++//           lc->end_of_ctb_x, lc->end_of_ctb_y);
++
++    return f;
++}
++
++#undef A0
++#undef A1
++#undef A2
++#undef A3
++#undef A4
++
++static void do_intra_pred(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int log2_trafo_size, int x0, int y0, int c_idx,
++                          unsigned int avail)
++{
++    // If rpi_enabled then sand - U & V done on U call
++    if (c_idx <= 1)
++    {
++        HEVCPredCmd *const cmd = rpi_new_intra_cmd(lc->jb0);
++        cmd->type = RPI_PRED_INTRA + c_idx;
++        cmd->size = log2_trafo_size;
++        cmd->avail = avail;
++        cmd->i_pred.x = x0;
++        cmd->i_pred.y = y0;
++        cmd->i_pred.mode = c_idx ? lc->tu.intra_pred_mode_c :  lc->tu.intra_pred_mode;
++
++//        printf("(%#x, %#x) c_idx=%d, s=%d, a=%#x\n", x0, y0, c_idx, 1 << log2_trafo_size, avail);
++    }
++}
++
++#define CBF_CB0_S 0
++#define CBF_CB1_S 1 // CB1 must be CB0 + 1
++#define CBF_CR0_S 2
++#define CBF_CR1_S 3
++
++#define CBF_CB0 (1 << CBF_CB0_S)
++#define CBF_CR0 (1 << CBF_CR0_S)
++#define CBF_CB1 (1 << CBF_CB1_S)
++#define CBF_CR1 (1 << CBF_CR1_S)
++
++// * Only good for chroma_idx == 1
++static int hls_transform_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                              const unsigned int x0, const unsigned int y0,
++                              const unsigned int log2_cb_size, const unsigned int log2_trafo_size,
++                              const unsigned int blk_idx, const int cbf_luma,
++                              const unsigned int cbf_chroma)
++{
++    const unsigned int log2_trafo_size_c = FFMAX(2, log2_trafo_size - 1);
++    const unsigned int x0_c = x0 & ~7;
++    const unsigned int y0_c = y0 & ~7;
++
++    enum ScanType scan_idx   = SCAN_DIAG;
++    enum ScanType scan_idx_c = SCAN_DIAG;
++
++    if (lc->cu.pred_mode == MODE_INTRA)
++    {
++        const unsigned int trafo_size = 1 << log2_trafo_size;
++        const unsigned int avail = ff_hevc_rpi_tb_avail_flags(s, lc, x0, y0, trafo_size, trafo_size);
++
++        do_intra_pred(s, lc, log2_trafo_size, x0, y0, 0, avail);
++
++        if (log2_trafo_size > 2)
++            do_intra_pred(s, lc, log2_trafo_size_c, x0_c, y0_c, 1, avail);
++        else if (blk_idx == 3)
++            do_intra_pred(s, lc, log2_trafo_size_c, x0_c, y0_c, 1,
++                          ff_hevc_rpi_tb_avail_flags(s, lc, x0_c, y0_c, 8, 8));
++
++        if (log2_trafo_size < 4) {
++            if (lc->tu.intra_pred_mode >= 6 &&
++                lc->tu.intra_pred_mode <= 14) {
++                scan_idx = SCAN_VERT;
++            } else if (lc->tu.intra_pred_mode >= 22 &&
++                       lc->tu.intra_pred_mode <= 30) {
++                scan_idx = SCAN_HORIZ;
++            }
++
++            if (lc->tu.intra_pred_mode_c >=  6 &&
++                lc->tu.intra_pred_mode_c <= 14) {
++                scan_idx_c = SCAN_VERT;
++            } else if (lc->tu.intra_pred_mode_c >= 22 &&
++                       lc->tu.intra_pred_mode_c <= 30) {
++                scan_idx_c = SCAN_HORIZ;
++            }
++        }
++    }
++
++    if (!cbf_luma && cbf_chroma == 0)
++        return 0;
++
++    if (lc->tu.is_cu_qp_delta_wanted)
++    {
++        const int qp_delta = ff_hevc_rpi_cu_qp_delta(lc);
++        const unsigned int cb_mask = ~0U << log2_cb_size;
++
++        if (qp_delta < -(26 + (s->ps.sps->qp_bd_offset >> 1)) ||
++            qp_delta >  (25 + (s->ps.sps->qp_bd_offset >> 1)))
++        {
++            av_log(s->avctx, AV_LOG_ERROR,
++                   "The cu_qp_delta %d is outside the valid range "
++                   "[%d, %d].\n",
++                   qp_delta,
++                   -(26 + (s->ps.sps->qp_bd_offset >> 1)),
++                    (25 + (s->ps.sps->qp_bd_offset >> 1)));
++            return AVERROR_INVALIDDATA;
++        }
++
++        lc->tu.is_cu_qp_delta_wanted = 0;
++        lc->tu.cu_qp_delta = qp_delta;
++        ff_hevc_rpi_set_qPy(s, lc, x0 & cb_mask, y0 & cb_mask);
++    }
++
++    // * Not main profile & untested due to no conform streams
++    if (lc->tu.cu_chroma_qp_offset_wanted && cbf_chroma &&
++        !lc->cu.cu_transquant_bypass_flag) {
++        int cu_chroma_qp_offset_flag = ff_hevc_rpi_cu_chroma_qp_offset_flag(lc);
++        if (cu_chroma_qp_offset_flag) {
++            int cu_chroma_qp_offset_idx  = 0;
++            if (s->ps.pps->chroma_qp_offset_list_len_minus1 > 0) {
++                cu_chroma_qp_offset_idx = ff_hevc_rpi_cu_chroma_qp_offset_idx(s, lc);
++            }
++            lc->tu.qp_divmod6[1] += s->ps.pps->cb_qp_offset_list[cu_chroma_qp_offset_idx];
++            lc->tu.qp_divmod6[2] += s->ps.pps->cr_qp_offset_list[cu_chroma_qp_offset_idx];
++        }
++        lc->tu.cu_chroma_qp_offset_wanted = 0;
++    }
++
++    if (cbf_luma)
++        ff_hevc_rpi_hls_residual_coding(s, lc, x0, y0, log2_trafo_size, scan_idx, 0);
++
++    if (log2_trafo_size > 2 || blk_idx == 3)
++    {
++        if ((cbf_chroma & CBF_CB0) != 0)
++            ff_hevc_rpi_hls_residual_coding(s, lc, x0_c, y0_c,
++                                        log2_trafo_size_c, scan_idx_c, 1);
++        if ((cbf_chroma & CBF_CR0) != 0)
++            ff_hevc_rpi_hls_residual_coding(s, lc, x0_c, y0_c,
++                                        log2_trafo_size_c, scan_idx_c, 2);
++    }
++
++    return 0;
++}
++
++static inline void set_deblocking_bypass(const HEVCRpiContext * const s, const int x0, const int y0, const int log2_cb_size)
++{
++    set_bits(s->is_pcm + (y0 >> 3) * s->ps.sps->pcm_width, x0 >> 3, s->ps.sps->pcm_width, log2_cb_size - 3);
++}
++
++
++static int hls_transform_tree(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                              const unsigned int x0, const unsigned int y0,
++                              const unsigned int log2_trafo_size,
++                              const unsigned int trafo_depth, const unsigned int blk_idx,
++                              const unsigned int cbf_c0)
++{
++    // When trafo_size == 2 hls_transform_unit uses c0 so put in c1
++    unsigned int cbf_c1 = cbf_c0;
++    int split_transform_flag;
++    int ret;
++
++    if (lc->cu.intra_split_flag) {
++        if (trafo_depth == 1) {
++            lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[blk_idx];
++            if (ctx_cfmt(s) == 3) {
++                lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[blk_idx];
++                lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[blk_idx];
++            } else {
++                lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
++                lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
++            }
++        }
++    } else {
++        lc->tu.intra_pred_mode   = lc->pu.intra_pred_mode[0];
++        lc->tu.intra_pred_mode_c = lc->pu.intra_pred_mode_c[0];
++        lc->tu.chroma_mode_c     = lc->pu.chroma_mode_c[0];
++    }
++
++    if (log2_trafo_size <= s->ps.sps->log2_max_trafo_size &&
++        log2_trafo_size >  s->ps.sps->log2_min_tb_size    &&
++        trafo_depth     < lc->cu.max_trafo_depth       &&
++        !(lc->cu.intra_split_flag && trafo_depth == 0))
++    {
++        split_transform_flag = ff_hevc_rpi_split_transform_flag_decode(lc, log2_trafo_size);
++    } else {
++        int inter_split = s->ps.sps->max_transform_hierarchy_depth_inter == 0 &&
++                          lc->cu.pred_mode == MODE_INTER &&
++                          lc->cu.part_mode != PART_2Nx2N &&
++                          trafo_depth == 0;
++
++        split_transform_flag = log2_trafo_size > s->ps.sps->log2_max_trafo_size ||
++                               (lc->cu.intra_split_flag && trafo_depth == 0) ||
++                               inter_split;
++    }
++
++    if (log2_trafo_size > 2 || ctx_cfmt(s) == 3)
++    {
++        const int wants_c1 = ctx_cfmt(s) == 2 && (!split_transform_flag || log2_trafo_size == 3);
++        cbf_c1 = 0;
++
++        if ((cbf_c0 & CBF_CB0) != 0)
++        {
++            cbf_c1 = ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth) << CBF_CB0_S;
++            if (wants_c1)
++                cbf_c1 |= ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth) << CBF_CB1_S;
++        }
++
++        if ((cbf_c0 & CBF_CR0) != 0)
++        {
++            cbf_c1 |= ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth) << CBF_CR0_S;
++            if (wants_c1)
++                cbf_c1 |= ff_hevc_rpi_cbf_cb_cr_decode(lc, trafo_depth) << CBF_CR1_S;
++        }
++    }
++
++    if (split_transform_flag) {
++        const int trafo_size_split = 1 << (log2_trafo_size - 1);
++        const int x1 = x0 + trafo_size_split;
++        const int y1 = y0 + trafo_size_split;
++
++#define SUBDIVIDE(x, y, idx)                                                    \
++do {                                                                            \
++    ret = hls_transform_tree(s, lc, x, y,                                       \
++                             log2_trafo_size - 1, trafo_depth + 1, idx,         \
++                             cbf_c1);                                           \
++    if (ret < 0)                                                                \
++        return ret;                                                             \
++} while (0)
++
++        SUBDIVIDE(x0, y0, 0);
++        SUBDIVIDE(x1, y0, 1);
++        SUBDIVIDE(x0, y1, 2);
++        SUBDIVIDE(x1, y1, 3);
++
++#undef SUBDIVIDE
++    } else {
++        // If trafo_size == 2 then we should have cbf_c == 0 here but as we can't have
++        // trafo_size == 2 with depth == 0 the issue is moot
++        const int cbf_luma = ((lc->cu.pred_mode != MODE_INTRA && trafo_depth == 0 && cbf_c1 == 0) ||
++            ff_hevc_rpi_cbf_luma_decode(lc, trafo_depth));
++
++        ret = hls_transform_unit(s, lc, x0, y0,
++                                 log2_trafo_size + trafo_depth, log2_trafo_size,
++                                 blk_idx, cbf_luma, cbf_c1);
++        if (ret < 0)
++            return ret;
++
++        if (!s->sh.disable_deblocking_filter_flag) {
++            ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_trafo_size, cbf_luma);
++        }
++    }
++    return 0;
++}
++
++
++static int pcm_extract(const HEVCRpiContext * const s, const uint8_t * pcm, const int length, const int x0, const int y0, const int cb_size)
++{
++    GetBitContext gb;
++    int ret;
++
++    ret = init_get_bits(&gb, pcm, length);
++    if (ret < 0)
++        return ret;
++
++    s->hevcdsp.put_pcm(av_rpi_sand_frame_pos_y(s->frame, x0, y0),
++                       frame_stride1(s->frame, 0),
++                       cb_size, cb_size, &gb, s->ps.sps->pcm.bit_depth);
++
++    s->hevcdsp.put_pcm_c(av_rpi_sand_frame_pos_c(s->frame, x0 >> ctx_hshift(s, 1), y0 >> ctx_vshift(s, 1)),
++                       s->frame->linesize[1],
++                       cb_size >> ctx_hshift(s, 1),
++                       cb_size >> ctx_vshift(s, 1),
++                       &gb, s->ps.sps->pcm.bit_depth_chroma);
++
++    return 0;
++}
++
++
++// x * 2^(y*2)
++static inline unsigned int xyexp2(const unsigned int x, const unsigned int y)
++{
++    return x << (y * 2);
++}
++
++static int hls_pcm_sample(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int x0, const int y0, unsigned int log2_cb_size)
++{
++    // Length in bits
++    const unsigned int length = xyexp2(s->ps.sps->pcm.bit_depth, log2_cb_size) +
++        xyexp2(s->ps.sps->pcm.bit_depth_chroma, log2_cb_size - ctx_vshift(s, 1)) +
++        xyexp2(s->ps.sps->pcm.bit_depth_chroma, log2_cb_size - ctx_vshift(s, 2));
++
++    const uint8_t * const pcm = ff_hevc_rpi_cabac_skip_bytes(&lc->cc, (length + 7) >> 3);
++
++    if (!s->sh.disable_deblocking_filter_flag)
++        ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size, 0);
++
++    // Copy coeffs
++    {
++        const int blen = (length + 7) >> 3;
++        // Round allocated bytes up to nearest 32 to avoid alignment confusion
++        // Allocation is in int16_t s
++        // As we are only using 1 byte per sample and the coeff buffer allows 2 per
++        // sample this rounding doesn't affect the total size we need to allocate for
++        // the coeff buffer
++        int16_t * const coeffs = rpi_alloc_coeff_buf(lc->jb0, 0, ((blen + 31) & ~31) >> 1);
++        memcpy(coeffs, pcm, blen);
++
++        // Our coeff stash assumes that any partially allocated 64byte lump
++        // is zeroed so make that true.
++        {
++            uint8_t * const eopcm = (uint8_t *)coeffs + blen;
++            if ((-(intptr_t)eopcm & 63) != 0)
++                memset(eopcm, 0, -(intptr_t)eopcm & 63);
++        }
++
++        // Add command
++        {
++            HEVCPredCmd *const cmd = rpi_new_intra_cmd(lc->jb0);
++            cmd->type = RPI_PRED_I_PCM;
++            cmd->size = log2_cb_size;
++            cmd->i_pcm.src = coeffs;
++            cmd->i_pcm.x = x0;
++            cmd->i_pcm.y = y0;
++            cmd->i_pcm.src_len = length;
++        }
++        return 0;
++    }
++}
++
++
++static void hevc_await_progress(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const HEVCRpiFrame * const ref,
++                                const MvXY xy, const int y0, const int height)
++{
++    if (s->threads_type != 0) {
++        const int y = FFMAX(0, (MV_Y(xy) >> 2) + y0 + height + 9);
++
++        // Progress has to be attached to current job as the actual wait
++        // is in worker_core which can't use lc
++        int16_t *const pr = lc->jb0->progress_req + ref->dpb_no;
++        if (*pr < y) {
++            *pr = y;
++        }
++    }
++}
++
++static void hevc_luma_mv_mvp_mode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                  const int x0, const int y0, const int nPbW,
++                                  const int nPbH,
++                                  HEVCRpiMvField * const mv)
++{
++    enum InterPredIdc inter_pred_idc = PRED_L0;
++    int mvp_flag;
++    const unsigned int avail = ff_hevc_rpi_tb_avail_flags(s, lc, x0, y0, nPbW, nPbH);
++
++    mv->pred_flag = 0;
++    if (s->sh.slice_type == HEVC_SLICE_B)
++        inter_pred_idc = ff_hevc_rpi_inter_pred_idc_decode(lc, nPbW, nPbH);
++
++    if (inter_pred_idc != PRED_L1) {
++        MvXY mvd;
++
++        if (s->sh.nb_refs[L0])
++            mv->ref_idx[0]= ff_hevc_rpi_ref_idx_lx_decode(lc, s->sh.nb_refs[L0]);
++
++        mv->pred_flag = PF_L0;
++        mvd = ff_hevc_rpi_hls_mvd_coding(lc);
++        mvp_flag = ff_hevc_rpi_mvp_lx_flag_decode(lc);
++        ff_hevc_rpi_luma_mv_mvp_mode(s, lc, x0, y0, nPbW, nPbH, avail,
++                                 mv, mvp_flag, 0);
++        mv->xy[0] = mvxy_add(mv->xy[0], mvd);
++    }
++
++    if (inter_pred_idc != PRED_L0) {
++        MvXY mvd = 0;
++
++        if (s->sh.nb_refs[L1])
++            mv->ref_idx[1] = ff_hevc_rpi_ref_idx_lx_decode(lc, s->sh.nb_refs[L1]);
++
++        if (s->sh.mvd_l1_zero_flag != 1 || inter_pred_idc != PRED_BI)
++            mvd = ff_hevc_rpi_hls_mvd_coding(lc);
++
++        mv->pred_flag += PF_L1;
++        mvp_flag = ff_hevc_rpi_mvp_lx_flag_decode(lc);
++        ff_hevc_rpi_luma_mv_mvp_mode(s, lc, x0, y0, nPbW, nPbH, avail,
++                                 mv, mvp_flag, 1);
++        mv->xy[1] = mvxy_add(mv->xy[1], mvd);
++    }
++}
++
++
++static HEVCRpiInterPredQ *
++rpi_nxt_pred(HEVCRpiInterPredEnv * const ipe, const unsigned int load_val, const uint32_t fn)
++{
++    HEVCRpiInterPredQ * yp = NULL;
++    HEVCRpiInterPredQ * ypt = ipe->q + ipe->curr;
++    const unsigned int max_fill = ipe->max_fill;
++    unsigned int load = UINT_MAX;
++
++    for (unsigned int i = 0; i != ipe->n_grp; ++i, ++ypt) {
++        // We will always have enough room between the Qs but if we are
++        // running critically low due to poor scheduling then use fill size
++        // rather than load to determine QPU.  This has obvious dire
++        // performance implications but (a) it is better than crashing
++        // and (b) it should (almost) never happen
++        const unsigned int tfill = (char *)ypt->qpu_mc_curr - (char *)ypt->qpu_mc_base;
++        const unsigned int tload = tfill > max_fill ? tfill + 0x1000000 : ypt->load;
++
++        if (tload < load)
++        {
++            yp = ypt;
++            load = tload;
++        }
++    }
++
++    yp->load += load_val;
++    ipe->used_grp = 1;
++    qpu_mc_link_set(yp->qpu_mc_curr, fn);
++
++    return yp;
++}
++
++
++static void rpi_inter_pred_sync(HEVCRpiInterPredEnv * const ipe)
++{
++    for (unsigned int i = 0; i != ipe->n; ++i) {
++        HEVCRpiInterPredQ * const q = ipe->q + i;
++        const unsigned int qfill = (char *)q->qpu_mc_curr - (char *)q->qpu_mc_base;
++
++        qpu_mc_link_set(q->qpu_mc_curr, q->code_sync);
++        q->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(&q->qpu_mc_curr->sync + 1);
++        q->load = (qfill >> 7); // Have a mild preference for emptier Qs to balance memory usage
++    }
++}
++
++// Returns 0 on success
++// We no longer check for Q fullness as wew have emergncy code in ctu alloc
++// * However it might be an idea to have some means of spotting that we've used it
++static int rpi_inter_pred_next_ctu(HEVCRpiInterPredEnv * const ipe)
++{
++    if (!ipe->used_grp)
++        return 0;
++
++    if ((ipe->curr += ipe->n_grp) >= ipe->n)
++    {
++        ipe->curr = 0;
++        rpi_inter_pred_sync(ipe);
++    }
++    ipe->used = 1;
++    ipe->used_grp = 0;
++
++    return 0;
++}
++
++static void rpi_inter_pred_reset(HEVCRpiInterPredEnv * const ipe)
++{
++    unsigned int i;
++
++    ipe->curr = 0;
++    ipe->used = 0;
++    ipe->used_grp = 0;
++    for (i = 0; i != ipe->n; ++i) {
++        HEVCRpiInterPredQ * const q = ipe->q + i;
++        q->qpu_mc_curr = q->qpu_mc_base;
++        q->load = 0;
++        q->last_l0 = NULL;
++        q->last_l1 = NULL;
++    }
++}
++
++static int rpi_inter_pred_alloc(HEVCRpiInterPredEnv * const ipe,
++                                 const unsigned int n_max, const unsigned int n_grp,
++                                 const unsigned int total_size, const unsigned int min_gap)
++{
++    int rv;
++
++    memset(ipe, 0, sizeof(*ipe));
++    if ((ipe->q = av_mallocz(n_max * sizeof(*ipe->q))) == NULL)
++        return AVERROR(ENOMEM);
++
++    ipe->n_grp = n_grp;
++    ipe->min_gap = min_gap;
++
++    if ((rv = gpu_malloc_cached(total_size, &ipe->gptr)) != 0)
++        av_freep(&ipe->q);
++    return rv;
++}
++
++
++#if RPI_QPU_EMU_Y
++#define get_mc_address_y(f) ((f)->data[0])
++#else
++#define get_mc_address_y(f) get_vc_address_y(f)
++#endif
++#if RPI_QPU_EMU_C
++#define get_mc_address_u(f) ((f)->data[1])
++#else
++#define get_mc_address_u(f) get_vc_address_u(f)
++#endif
++
++static inline uint32_t pack_wo_p(const int off, const int mul)
++{
++    return PACK2(off * 2 + 1, mul);
++}
++
++static inline uint32_t pack_wo_b(const int off0, const int off1, const int mul)
++{
++    return PACK2(off0 + off1 + 1, mul);
++}
++
++
++static void
++rpi_pred_y(const HEVCRpiContext *const s, HEVCRpiJob * const jb,
++           const int x0, const int y0,
++           const int nPbW, const int nPbH,
++           const MvXY mv_xy,
++           const int weight_mul,
++           const int weight_offset,
++           AVFrame *const src_frame)
++{
++    const unsigned int y_off = av_rpi_sand_frame_off_y(s->frame, x0, y0);
++    const unsigned int mx          = MV_X(mv_xy) & 3;
++    const unsigned int my          = MV_Y(mv_xy) & 3;
++    const unsigned int my_mx       = (my << 8) | mx;
++    const uint32_t     my2_mx2_my_mx = (my_mx << 16) | my_mx;
++    const qpu_mc_src_addr_t src_vc_address_y = get_mc_address_y(src_frame);
++    qpu_mc_dst_addr_t dst_addr = get_mc_address_y(s->frame) + y_off;
++    const uint32_t wo = pack_wo_p(weight_offset, weight_mul);
++    HEVCRpiInterPredEnv * const ipe = &jb->luma_ip;
++    const unsigned int xshl = av_rpi_sand_frame_xshl(s->frame);
++
++    if (my_mx == 0)
++    {
++        const int x1 = x0 + (MV_X(mv_xy) >> 2);
++        const int y1 = y0 + (MV_Y(mv_xy) >> 2);
++        const int bh = nPbH;
++
++        for (int start_x = 0; start_x < nPbW; start_x += 16)
++        {
++            const int bw = FFMIN(nPbW - start_x, 16);
++            HEVCRpiInterPredQ *const yp = rpi_nxt_pred(ipe, bh, s->qpu.y_p00);
++            qpu_mc_src_t *const src1 = yp->last_l0;
++            qpu_mc_pred_y_p00_t *const cmd_y = &yp->qpu_mc_curr->y.p00;
++
++#if RPI_TSTATS
++            {
++                HEVCRpiStats *const ts = (HEVCRpiStats *)&s->tstats;
++                ++ts->y_pred1_x0y0;
++
++                if (nPbW > 8)
++                    ++ts->y_pred1_wgt8;
++                else
++                    ++ts->y_pred1_wle8;
++
++                if (nPbH > 16)
++                    ++ts->y_pred1_hgt16;
++                else
++                    ++ts->y_pred1_hle16;
++            }
++#endif
++
++            src1->x = x1 + start_x;
++            src1->y = y1;
++            src1->base = src_vc_address_y;
++            cmd_y->w = bw;
++            cmd_y->h = bh;
++            cmd_y->wo1 = wo;
++            cmd_y->dst_addr =  dst_addr + (start_x << xshl);
++            yp->last_l0 = &cmd_y->next_src1;
++            yp->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(cmd_y + 1);
++        }
++    }
++    else
++    {
++        const int x1_m3 = x0 + (MV_X(mv_xy) >> 2) - 3;
++        const int y1_m3 = y0 + (MV_Y(mv_xy) >> 2) - 3;
++        const unsigned int bh = nPbH;
++        int start_x = 0;
++
++#if 1
++        // As Y-pred operates on two independant 8-wide src blocks we can merge
++        // this pred with the previous one if it the previous one is 8 pel wide,
++        // the same height as the current block, immediately to the left of our
++        // current dest block and mono-pred.
++
++        qpu_mc_pred_y_p_t *const last_y8_p = jb->last_y8_p;
++        if (last_y8_p != NULL && last_y8_p->h == bh && last_y8_p->dst_addr + (8 << xshl) == dst_addr)
++        {
++            const int bw = FFMIN(nPbW, 8);
++            qpu_mc_src_t *const last_y8_src2 = jb->last_y8_l1;
++
++            last_y8_src2->x = x1_m3;
++            last_y8_src2->y = y1_m3;
++            last_y8_src2->base = src_vc_address_y;
++            last_y8_p->w += bw;
++            last_y8_p->mymx21 = PACK2(my2_mx2_my_mx, last_y8_p->mymx21);
++            last_y8_p->wo2 = wo;
++
++            jb->last_y8_p = NULL;
++            jb->last_y8_l1 = NULL;
++            start_x = bw;
++#if RPI_TSTATS
++            ++((HEVCRpiStats *)&s->tstats)->y_pred1_y8_merge;
++#endif
++        }
++#endif
++
++        for (; start_x < nPbW; start_x += 16)
++        {
++            const int bw = FFMIN(nPbW - start_x, 16);
++            HEVCRpiInterPredQ *const yp = rpi_nxt_pred(ipe, bh + 7, s->qpu.y_pxx);
++            qpu_mc_src_t *const src1 = yp->last_l0;
++            qpu_mc_src_t *const src2 = yp->last_l1;
++            qpu_mc_pred_y_p_t *const cmd_y = &yp->qpu_mc_curr->y.p;
++#if RPI_TSTATS
++            {
++                HEVCRpiStats *const ts = (HEVCRpiStats *)&s->tstats;
++                if (mx == 0 && my == 0)
++                    ++ts->y_pred1_x0y0;
++                else if (mx == 0)
++                    ++ts->y_pred1_x0;
++                else if (my == 0)
++                    ++ts->y_pred1_y0;
++                else
++                    ++ts->y_pred1_xy;
++
++                if (nPbW > 8)
++                    ++ts->y_pred1_wgt8;
++                else
++                    ++ts->y_pred1_wle8;
++
++                if (nPbH > 16)
++                    ++ts->y_pred1_hgt16;
++                else
++                    ++ts->y_pred1_hle16;
++            }
++#endif
++            src1->x = x1_m3 + start_x;
++            src1->y = y1_m3;
++            src1->base = src_vc_address_y;
++            if (bw <= 8)
++            {
++                src2->x = MC_DUMMY_X;
++                src2->y = MC_DUMMY_Y;
++#if RPI_QPU_EMU_Y
++                src2->base = s->qpu_dummy_frame_emu;
++#else
++                src2->base = s->qpu_dummy_frame_qpu;
++#endif
++            }
++            else
++            {
++                src2->x = x1_m3 + start_x + 8;
++                src2->y = y1_m3;
++                src2->base = src_vc_address_y;
++            }
++            cmd_y->w = bw;
++            cmd_y->h = bh;
++            cmd_y->mymx21 = my2_mx2_my_mx;
++            cmd_y->wo1 = wo;
++            cmd_y->wo2 = wo;
++            cmd_y->dst_addr =  dst_addr + (start_x << xshl);
++            yp->last_l0 = &cmd_y->next_src1;
++            yp->last_l1 = &cmd_y->next_src2;
++            yp->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(cmd_y + 1);
++
++            if (bw == 8) {
++                jb->last_y8_l1 = src2;
++                jb->last_y8_p = cmd_y;
++            }
++        }
++    }
++}
++
++static void
++rpi_pred_y_b(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
++           const int x0, const int y0,
++           const int nPbW, const int nPbH,
++           const struct HEVCRpiMvField *const mv_field,
++           const AVFrame *const src_frame,
++           const AVFrame *const src_frame2)
++{
++    const unsigned int y_off = av_rpi_sand_frame_off_y(s->frame, x0, y0);
++    const MvXY mv  = mv_field->xy[0];
++    const MvXY mv2 = mv_field->xy[1];
++
++    const unsigned int mx          = MV_X(mv) & 3;
++    const unsigned int my          = MV_Y(mv) & 3;
++    const unsigned int my_mx = (my<<8) | mx;
++    const unsigned int mx2          = MV_X(mv2) & 3;
++    const unsigned int my2          = MV_Y(mv2) & 3;
++    const unsigned int my2_mx2 = (my2<<8) | mx2;
++    const uint32_t     my2_mx2_my_mx = (my2_mx2 << 16) | my_mx;
++    const unsigned int ref_idx0 = mv_field->ref_idx[0];
++    const unsigned int ref_idx1 = mv_field->ref_idx[1];
++    const uint32_t wo1 = pack_wo_b(s->sh.luma_offset_l0[ref_idx0], s->sh.luma_offset_l1[ref_idx1], s->sh.luma_weight_l0[ref_idx0]);
++    const uint32_t wo2 = pack_wo_b(s->sh.luma_offset_l0[ref_idx0], s->sh.luma_offset_l1[ref_idx1], s->sh.luma_weight_l1[ref_idx1]);
++
++    const unsigned int xshl = av_rpi_sand_frame_xshl(s->frame);
++    qpu_mc_dst_addr_t dst = get_mc_address_y(s->frame) + y_off;
++    const qpu_mc_src_addr_t src1_base = get_mc_address_y(src_frame);
++    const qpu_mc_src_addr_t src2_base = get_mc_address_y(src_frame2);
++    HEVCRpiInterPredEnv * const ipe = &jb->luma_ip;
++
++    if (my2_mx2_my_mx == 0)
++    {
++        const int x1 = x0 + (MV_X(mv) >> 2);
++        const int y1 = y0 + (MV_Y(mv) >> 2);
++        const int x2 = x0 + (MV_X(mv2) >> 2);
++        const int y2 = y0 + (MV_Y(mv2) >> 2);
++        const int bh = nPbH;
++
++        // Can do chunks a full 16 wide if we don't want the H filter
++        for (int start_x=0; start_x < nPbW; start_x += 16)
++        {
++            HEVCRpiInterPredQ *const yp = rpi_nxt_pred(ipe, bh, s->qpu.y_b00);
++            qpu_mc_src_t *const src1 = yp->last_l0;
++            qpu_mc_src_t *const src2 = yp->last_l1;
++            qpu_mc_pred_y_p_t *const cmd_y = &yp->qpu_mc_curr->y.p;
++#if RPI_TSTATS
++            {
++                HEVCRpiStats *const ts = (HEVCRpiStats *)&s->tstats;
++                ++ts->y_pred2_x0y0;
++
++                if (nPbH > 16)
++                    ++ts->y_pred2_hgt16;
++                else
++                    ++ts->y_pred2_hle16;
++            }
++#endif
++            src1->x = x1 + start_x;
++            src1->y = y1;
++            src1->base = src1_base;
++            src2->x = x2 + start_x;
++            src2->y = y2;
++            src2->base = src2_base;
++            cmd_y->w = FFMIN(nPbW - start_x, 16);
++            cmd_y->h = bh;
++            cmd_y->mymx21 = 0;
++            cmd_y->wo1 = wo1;
++            cmd_y->wo2 = wo2;
++            cmd_y->dst_addr =  dst + (start_x << xshl);
++            yp->last_l0 = &cmd_y->next_src1;
++            yp->last_l1 = &cmd_y->next_src2;
++            yp->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(cmd_y + 1);
++        }
++    }
++    else
++    {
++        // Filter requires a run-up of 3
++        const int x1 = x0 + (MV_X(mv) >> 2) - 3;
++        const int y1 = y0 + (MV_Y(mv) >> 2) - 3;
++        const int x2 = x0 + (MV_X(mv2) >> 2) - 3;
++        const int y2 = y0 + (MV_Y(mv2) >> 2) - 3;
++        const int bh = nPbH;
++
++        for (int start_x=0; start_x < nPbW; start_x += 8)
++        { // B blocks work 8 at a time
++            // B weights aren't doubled as the QPU code does the same
++            // amount of work as it does for P
++            HEVCRpiInterPredQ *const yp = rpi_nxt_pred(ipe, bh + 7, s->qpu.y_bxx);
++            qpu_mc_src_t *const src1 = yp->last_l0;
++            qpu_mc_src_t *const src2 = yp->last_l1;
++            qpu_mc_pred_y_p_t *const cmd_y = &yp->qpu_mc_curr->y.p;
++#if RPI_TSTATS
++            {
++                HEVCRpiStats *const ts = (HEVCRpiStats *)&s->tstats;
++                const unsigned int mmx = mx | mx2;
++                const unsigned int mmy = my | my2;
++                if (mmx == 0 && mmy == 0)
++                    ++ts->y_pred2_x0y0;
++                else if (mmx == 0)
++                    ++ts->y_pred2_x0;
++                else if (mmy == 0)
++                    ++ts->y_pred2_y0;
++                else
++                    ++ts->y_pred2_xy;
++
++                if (nPbH > 16)
++                    ++ts->y_pred2_hgt16;
++                else
++                    ++ts->y_pred2_hle16;
++            }
++#endif
++            src1->x = x1 + start_x;
++            src1->y = y1;
++            src1->base = src1_base;
++            src2->x = x2 + start_x;
++            src2->y = y2;
++            src2->base = src2_base;
++            cmd_y->w = FFMIN(nPbW - start_x, 8);
++            cmd_y->h = bh;
++            cmd_y->mymx21 = my2_mx2_my_mx;
++            cmd_y->wo1 = wo1;
++            cmd_y->wo2 = wo2;
++            cmd_y->dst_addr =  dst + (start_x << xshl);
++            yp->last_l0 = &cmd_y->next_src1;
++            yp->last_l1 = &cmd_y->next_src2;
++            yp->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(cmd_y + 1);
++        }
++    }
++}
++
++// h/v shifts fixed at one as that is all the qasm copes with
++static void
++rpi_pred_c(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
++  const unsigned int lx, const int x0_c, const int y0_c,
++  const int nPbW_c, const int nPbH_c,
++  const MvXY mv,
++  const int16_t * const c_weights,
++  const int16_t * const c_offsets,
++  AVFrame * const src_frame)
++{
++    const unsigned int c_off = av_rpi_sand_frame_off_c(s->frame, x0_c, y0_c);
++    const int hshift = 1; // = s->ps.sps->hshift[1];
++    const int vshift = 1; // = s->ps.sps->vshift[1];
++
++    const int x1_c = x0_c + (MV_X(mv) >> (2 + hshift)) - 1;
++    const int y1_c = y0_c + (MV_Y(mv) >> (2 + hshift)) - 1;
++    const qpu_mc_src_addr_t src_base_u = get_mc_address_u(src_frame);
++    const uint32_t x_coeffs = rpi_filter_coefs[av_mod_uintp2(MV_X(mv), 2 + hshift) << (1 - hshift)];
++    const uint32_t y_coeffs = rpi_filter_coefs[av_mod_uintp2(MV_Y(mv), 2 + vshift) << (1 - vshift)];
++    const uint32_t wo_u = pack_wo_p(c_offsets[0], c_weights[0]);
++    const uint32_t wo_v = pack_wo_p(c_offsets[1], c_weights[1]);
++    qpu_mc_dst_addr_t dst_base_u = get_mc_address_u(s->frame) + c_off;
++    HEVCRpiInterPredEnv * const ipe = &jb->chroma_ip;
++    const unsigned int xshl = av_rpi_sand_frame_xshl(s->frame) + 1;
++    const unsigned int bh = nPbH_c;
++    const uint32_t qfn = lx == 0 ? s->qpu.c_pxx : s->qpu.c_pxx_l1;
++
++    for(int start_x=0; start_x < nPbW_c; start_x+=RPI_CHROMA_BLOCK_WIDTH)
++    {
++        HEVCRpiInterPredQ * const cp = rpi_nxt_pred(ipe, bh + 3, qfn);
++        qpu_mc_pred_c_p_t * const cmd_c = &cp->qpu_mc_curr->c.p;
++        qpu_mc_src_t ** const plast_lx = (lx == 0) ? &cp->last_l0 : &cp->last_l1;
++        qpu_mc_src_t * const last_lx = *plast_lx;
++        const int bw = FFMIN(nPbW_c-start_x, RPI_CHROMA_BLOCK_WIDTH);
++
++        last_lx->x = x1_c + start_x;
++        last_lx->y = y1_c;
++        last_lx->base = src_base_u;
++        cmd_c->h = bh;
++        cmd_c->w = bw;
++        cmd_c->coeffs_x = x_coeffs;
++        cmd_c->coeffs_y = y_coeffs;
++        cmd_c->wo_u = wo_u;
++        cmd_c->wo_v = wo_v;
++        cmd_c->dst_addr_c = dst_base_u + (start_x << xshl);
++        *plast_lx = &cmd_c->next_src;
++        cp->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(cmd_c + 1);
++    }
++    return;
++}
++
++// h/v shifts fixed at one as that is all the qasm copes with
++static void
++rpi_pred_c_b(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
++  const int x0_c, const int y0_c,
++  const int nPbW_c, const int nPbH_c,
++  const struct HEVCRpiMvField * const mv_field,
++  const int16_t * const c_weights,
++  const int16_t * const c_offsets,
++  const int16_t * const c_weights2,
++  const int16_t * const c_offsets2,
++  AVFrame * const src_frame,
++  AVFrame * const src_frame2)
++{
++    const unsigned int c_off = av_rpi_sand_frame_off_c(s->frame, x0_c, y0_c);
++    const int hshift = 1; // s->ps.sps->hshift[1];
++    const int vshift = 1; // s->ps.sps->vshift[1];
++    const MvXY mv = mv_field->xy[0];
++    const MvXY mv2 = mv_field->xy[1];
++
++    const unsigned int mx = av_mod_uintp2(MV_X(mv), 2 + hshift);
++    const unsigned int my = av_mod_uintp2(MV_Y(mv), 2 + vshift);
++    const uint32_t coefs0_x = rpi_filter_coefs[mx << (1 - hshift)];
++    const uint32_t coefs0_y = rpi_filter_coefs[my << (1 - vshift)]; // Fractional part of motion vector
++    const int x1_c = x0_c + (MV_X(mv) >> (2 + hshift)) - 1;
++    const int y1_c = y0_c + (MV_Y(mv) >> (2 + hshift)) - 1;
++
++    const unsigned int mx2 = av_mod_uintp2(MV_X(mv2), 2 + hshift);
++    const unsigned int my2 = av_mod_uintp2(MV_Y(mv2), 2 + vshift);
++    const uint32_t coefs1_x = rpi_filter_coefs[mx2 << (1 - hshift)];
++    const uint32_t coefs1_y = rpi_filter_coefs[my2 << (1 - vshift)]; // Fractional part of motion vector
++
++    const int x2_c = x0_c + (MV_X(mv2) >> (2 + hshift)) - 1;
++    const int y2_c = y0_c + (MV_Y(mv2) >> (2 + hshift)) - 1;
++
++    const uint32_t wo_u2 = pack_wo_b(c_offsets[0], c_offsets2[0], c_weights2[0]);
++    const uint32_t wo_v2 = pack_wo_b(c_offsets[1], c_offsets2[1], c_weights2[1]);
++
++    const qpu_mc_dst_addr_t dst_base_u = get_mc_address_u(s->frame) + c_off;
++    const qpu_mc_src_addr_t src1_base = get_mc_address_u(src_frame);
++    const qpu_mc_src_addr_t src2_base = get_mc_address_u(src_frame2);
++    HEVCRpiInterPredEnv * const ipe = &jb->chroma_ip;
++    const unsigned int xshl = av_rpi_sand_frame_xshl(s->frame) + 1;
++    const unsigned int bh = nPbH_c;
++
++    for (int start_x=0; start_x < nPbW_c; start_x += RPI_CHROMA_BLOCK_WIDTH)
++    {
++        const unsigned int bw = FFMIN(nPbW_c-start_x, RPI_CHROMA_BLOCK_WIDTH);
++
++        HEVCRpiInterPredQ * const cp = rpi_nxt_pred(ipe, bh * 2 + 3, s->qpu.c_bxx);
++        qpu_mc_pred_c_b_t * const u = &cp->qpu_mc_curr->c.b;
++        qpu_mc_src_t * const src_l0 = cp->last_l0;
++        qpu_mc_src_t * const src_l1 = cp->last_l1;
++
++        src_l0->x = x1_c + start_x;
++        src_l0->y = y1_c;
++        src_l0->base = src1_base;
++        src_l1->x = x2_c + start_x;
++        src_l1->y = y2_c;
++        src_l1->base = src2_base;
++
++        u[0].h = bh;
++        u[0].w = bw;
++        u[0].coeffs_x1 = coefs0_x;
++        u[0].coeffs_y1 = coefs0_y;
++        u[0].weight_u1 = c_weights[0]; // Weight L0 U
++        u[0].weight_v1 = c_weights[1]; // Weight L0 V
++        u[0].coeffs_x2 = coefs1_x;
++        u[0].coeffs_y2 = coefs1_y;
++        u[0].wo_u2 = wo_u2;
++        u[0].wo_v2 = wo_v2;
++        u[0].dst_addr_c = dst_base_u + (start_x << xshl);
++
++        cp->last_l0 = &u[0].next_src1;
++        cp->last_l1 = &u[0].next_src2;
++        cp->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(u + 1);
++    }
++}
++
++
++static inline void
++col_stash(const HEVCRpiContext * const s,
++          const unsigned int x0, const unsigned int y0, const unsigned int w0, const unsigned int h0,
++          const HEVCRpiMvField * const mvf)
++{
++    ColMvField * const col_mvf = s->ref->col_mvf;
++    const unsigned int x = (x0 + 15) >> 4;
++    const unsigned int y = (y0 + 15) >> 4;
++    const unsigned int w = ((x0 + 15 + w0) >> 4) - x;
++    const unsigned int h = ((y0 + 15 + h0) >> 4) - y;
++
++    if (col_mvf != NULL && w != 0 && h != 0)
++    {
++        // Only record MV from the top left of the 16x16 block
++
++        const RefPicList * const rpl = s->refPicList;
++        const ColMvField cmv = {
++            .L = {
++                {
++                    .poc = (mvf->pred_flag & PF_L0) == 0 ?
++                            COL_POC_INTRA :
++                            COL_POC_MAKE_INTER(rpl[0].isLongTerm[mvf->ref_idx[0]], rpl[0].list[mvf->ref_idx[0]]),
++                    .xy = mvf->xy[0]
++                },
++                {
++                    .poc = (mvf->pred_flag & PF_L1) == 0 ?
++                            COL_POC_INTRA :
++                            COL_POC_MAKE_INTER(rpl[1].isLongTerm[mvf->ref_idx[1]], rpl[1].list[mvf->ref_idx[1]]),
++                    .xy = mvf->xy[1]
++                }
++            }
++        };
++
++        ColMvField * p = col_mvf + y * s->col_mvf_stride + x;
++        const unsigned int stride = s->col_mvf_stride - w;
++        unsigned int j = h;
++
++        do
++        {
++            unsigned int k = w;
++            do
++            {
++                *p++ = cmv;
++            } while (--k != 0);
++            p += stride;
++        } while (--j != 0);
++    }
++}
++
++static void hls_prediction_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                const unsigned int x0, const unsigned int y0,
++                                const unsigned int nPbW, const unsigned int nPbH,
++                                const unsigned int log2_cb_size, const unsigned int partIdx, const unsigned int idx)
++{
++    HEVCRpiJob * const jb = lc->jb0;
++
++    struct HEVCRpiMvField current_mv = {{0}};
++    const RefPicList  *const refPicList = s->refPicList;
++    const HEVCRpiFrame *ref0 = NULL, *ref1 = NULL;
++
++    if (lc->cu.pred_mode != MODE_SKIP)
++        lc->pu.merge_flag = ff_hevc_rpi_merge_flag_decode(lc);
++
++    if (lc->cu.pred_mode == MODE_SKIP || lc->pu.merge_flag) {
++        const unsigned int merge_idx = s->sh.max_num_merge_cand <= 1 ? 0 :
++            ff_hevc_rpi_merge_idx_decode(s, lc);
++
++        ff_hevc_rpi_luma_mv_merge_mode(s, lc, x0, y0, nPbW, nPbH, log2_cb_size,
++                                   partIdx, merge_idx, &current_mv);
++    } else {
++        hevc_luma_mv_mvp_mode(s, lc, x0, y0, nPbW, nPbH, &current_mv);
++    }
++
++    {
++        HEVCRpiMvField * p = mvf_stash_ptr(s, lc, x0, y0);
++        unsigned int i, j;
++
++        for (j = 0; j < nPbH >> LOG2_MIN_PU_SIZE; j++)
++        {
++            for (i = 0; i < nPbW >> LOG2_MIN_PU_SIZE; i++)
++                p[i] = current_mv;
++            p += MVF_STASH_WIDTH_PU;
++        }
++    }
++
++    col_stash(s, x0, y0, nPbW, nPbH, &current_mv);
++
++    if (current_mv.pred_flag & PF_L0) {
++        ref0 = refPicList[0].ref[current_mv.ref_idx[0]];
++        if (!ref0)
++            return;
++        hevc_await_progress(s, lc, ref0, current_mv.xy[0], y0, nPbH);
++    }
++    if (current_mv.pred_flag & PF_L1) {
++        ref1 = refPicList[1].ref[current_mv.ref_idx[1]];
++        if (!ref1)
++            return;
++        hevc_await_progress(s, lc, ref1, current_mv.xy[1], y0, nPbH);
++    }
++
++    if (current_mv.pred_flag == PF_L0) {
++        const int x0_c = x0 >> ctx_hshift(s, 1);
++        const int y0_c = y0 >> ctx_vshift(s, 1);
++        const int nPbW_c = nPbW >> ctx_hshift(s, 1);
++        const int nPbH_c = nPbH >> ctx_vshift(s, 1);
++
++        rpi_pred_y(s, jb, x0, y0, nPbW, nPbH, current_mv.xy[0],
++          s->sh.luma_weight_l0[current_mv.ref_idx[0]], s->sh.luma_offset_l0[current_mv.ref_idx[0]],
++          ref0->frame);
++
++        if (ctx_cfmt(s) != 0) {
++            rpi_pred_c(s, jb, 0, x0_c, y0_c, nPbW_c, nPbH_c, current_mv.xy[0],
++              s->sh.chroma_weight_l0[current_mv.ref_idx[0]], s->sh.chroma_offset_l0[current_mv.ref_idx[0]],
++              ref0->frame);
++            return;
++        }
++    } else if (current_mv.pred_flag == PF_L1) {
++        const int x0_c = x0 >> ctx_hshift(s, 1);
++        const int y0_c = y0 >> ctx_vshift(s, 1);
++        const int nPbW_c = nPbW >> ctx_hshift(s, 1);
++        const int nPbH_c = nPbH >> ctx_vshift(s, 1);
++
++        rpi_pred_y(s, jb, x0, y0, nPbW, nPbH, current_mv.xy[1],
++          s->sh.luma_weight_l1[current_mv.ref_idx[1]], s->sh.luma_offset_l1[current_mv.ref_idx[1]],
++          ref1->frame);
++
++        if (ctx_cfmt(s) != 0) {
++            rpi_pred_c(s, jb, 1, x0_c, y0_c, nPbW_c, nPbH_c, current_mv.xy[1],
++              s->sh.chroma_weight_l1[current_mv.ref_idx[1]], s->sh.chroma_offset_l1[current_mv.ref_idx[1]],
++              ref1->frame);
++            return;
++        }
++    } else if (current_mv.pred_flag == PF_BI) {
++        const int x0_c = x0 >> ctx_hshift(s, 1);
++        const int y0_c = y0 >> ctx_vshift(s, 1);
++        const int nPbW_c = nPbW >> ctx_hshift(s, 1);
++        const int nPbH_c = nPbH >> ctx_vshift(s, 1);
++
++        rpi_pred_y_b(s, jb, x0, y0, nPbW, nPbH, &current_mv, ref0->frame, ref1->frame);
++
++        if (ctx_cfmt(s) != 0) {
++          rpi_pred_c_b(s, jb, x0_c, y0_c, nPbW_c, nPbH_c,
++                       &current_mv,
++                       s->sh.chroma_weight_l0[current_mv.ref_idx[0]],
++                       s->sh.chroma_offset_l0[current_mv.ref_idx[0]],
++                       s->sh.chroma_weight_l1[current_mv.ref_idx[1]],
++                       s->sh.chroma_offset_l1[current_mv.ref_idx[1]],
++                       ref0->frame,
++                       ref1->frame);
++            return;
++        }
++    }
++}
++
++static void set_ipm(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                    const unsigned int x0, const unsigned int y0,
++                    const unsigned int log2_cb_size,
++                    const unsigned int ipm)
++{
++    const unsigned int x_pu = x0 >> LOG2_MIN_PU_SIZE;
++    const unsigned int y_pu = y0 >> LOG2_MIN_PU_SIZE;
++
++    {
++        const unsigned int ctb_mask = ~(~0U << (s->ps.sps->log2_ctb_size - LOG2_MIN_PU_SIZE));
++        set_stash2(lc->ipm_left + (y_pu & ctb_mask), lc->ipm_up + (x_pu & ctb_mask), log2_cb_size - LOG2_MIN_PU_SIZE, ipm);
++    }
++
++    // If IRAP then everything is Intra & we avoid ever looking at these
++    // stashes so don't bother setting them
++    if (!s->is_irap && lc->cu.pred_mode == MODE_INTRA)
++    {
++        if (s->is_intra != NULL)
++        {
++            set_bits(s->is_intra + (y0 >> LOG2_MIN_CU_SIZE) * s->ps.sps->pcm_width, x0 >> LOG2_MIN_CU_SIZE, s->ps.sps->pcm_width, log2_cb_size - LOG2_MIN_CU_SIZE);
++        }
++
++        {
++            HEVCRpiMvField * p = mvf_stash_ptr(s, lc, x0, y0);
++            const unsigned int size_in_pus = (1 << log2_cb_size) >> LOG2_MIN_PU_SIZE; // min_pu <= log2_cb so >= 1
++            unsigned int n = size_in_pus;
++
++            do
++            {
++                memset(p, 0, size_in_pus * sizeof(*p));
++                p += MVF_STASH_WIDTH_PU;
++            } while (--n != 0);
++        }
++
++
++        if (s->ref->col_mvf != NULL && ((x0 | y0) & 0xf) == 0)
++        {
++            // Only record top left stuff
++            // Blocks should always be alinged on size boundries
++            // so cannot have overflow from a small block
++
++            ColMvField * p = s->ref->col_mvf + (y0 >> 4) * s->col_mvf_stride + (x0 >> 4);
++            const unsigned int size_in_col = log2_cb_size < 4 ? 1 : (1 << (log2_cb_size - 4));
++            const unsigned int stride = s->col_mvf_stride - size_in_col;
++            unsigned int j = size_in_col;
++
++            do
++            {
++                unsigned int k = size_in_col;
++                do
++                {
++                    p->L[0].poc = COL_POC_INTRA;
++                    p->L[0].xy = 0;
++                    p->L[1].poc = COL_POC_INTRA;
++                    p->L[1].xy = 0;
++                    ++p;
++                } while (--k != 0);
++                p += stride;
++            } while (--j != 0);
++        }
++    }
++}
++
++static inline void intra_prediction_unit_default_value(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                                const unsigned int x0, const unsigned int y0,
++                                                const unsigned int log2_cb_size)
++{
++    set_ipm(s, lc, x0, y0, log2_cb_size, INTRA_DC);
++}
++
++
++/**
++ * 8.4.1
++ */
++static int luma_intra_pred_mode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                int x0, int y0, int log2_pu_size,
++                                int prev_intra_luma_pred_flag,
++                                const unsigned int idx)
++{
++    const unsigned int ctb_mask = ~(~0U << s->ps.sps->log2_ctb_size);
++    const unsigned int xb_pu = (x0 & ctb_mask) >> LOG2_MIN_PU_SIZE;
++    const unsigned int yb_pu = (y0 & ctb_mask) >> LOG2_MIN_PU_SIZE;
++
++    // Up does not cross boundries so as we always scan 1 slice-tile-line in an
++    // lc we can just keep 1 CTB lR stashes
++    // Left is reset to DC @ Start of Line/Tile/Slice in fill_job
++    const unsigned int cand_up   = yb_pu == 0 ? INTRA_DC : lc->ipm_up[xb_pu];
++    const unsigned int cand_left = lc->ipm_left[yb_pu];
++
++    unsigned int intra_pred_mode;
++    unsigned int a, b, c;
++
++    if (cand_left == cand_up) {
++        if (cand_left < 2) {
++            a = INTRA_PLANAR;
++            b = INTRA_DC;
++            c = INTRA_ANGULAR_26;
++        } else {
++            a = cand_left;
++            b = 2 + ((cand_left - 2 - 1 + 32) & 31);
++            c = 2 + ((cand_left - 2 + 1) & 31);
++        }
++    } else {
++        a = cand_left;
++        b = cand_up;
++        c = (cand_left != INTRA_PLANAR && cand_up != INTRA_PLANAR) ?
++                INTRA_PLANAR :
++            (cand_left != INTRA_DC && cand_up != INTRA_DC) ?
++                INTRA_DC :
++                INTRA_ANGULAR_26;
++    }
++
++    if (prev_intra_luma_pred_flag) {
++        intra_pred_mode = idx == 0 ? a : idx == 1 ? b : c;
++    } else {
++        // Sort lowest 1st
++        if (a > b)
++            FFSWAP(int, a, b);
++        if (a > c)
++            FFSWAP(int, a, c);
++        if (b > c)
++            FFSWAP(int, b, c);
++
++        intra_pred_mode = idx;
++        if (intra_pred_mode >= a)
++            intra_pred_mode++;
++        if (intra_pred_mode >= b)
++            intra_pred_mode++;
++        if (intra_pred_mode >= c)
++            intra_pred_mode++;
++    }
++
++    /* write the intra prediction units into the mv array */
++    set_ipm(s, lc, x0, y0, log2_pu_size, intra_pred_mode);
++    return intra_pred_mode;
++}
++
++static const uint8_t tab_mode_idx[] = {
++     0,  1,  2,  2,  2,  2,  3,  5,  7,  8, 10, 12, 13, 15, 17, 18, 19, 20,
++    21, 22, 23, 23, 24, 24, 25, 25, 26, 27, 27, 28, 28, 29, 29, 30, 31};
++
++static void intra_prediction_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                  const unsigned int x0, const unsigned int y0,
++                                  const unsigned int log2_cb_size)
++{
++    static const uint8_t intra_chroma_table[4] = { 0, 26, 10, 1 };
++    uint8_t prev_intra_luma_pred_flag[4];
++    int split   = lc->cu.part_mode == PART_NxN;
++    const unsigned int split_size = (1 << (log2_cb_size - 1));
++    int chroma_mode;
++    const unsigned int n = split ? 4 : 1;
++    unsigned int i;
++
++    for (i = 0; i != n; i++)
++        prev_intra_luma_pred_flag[i] = ff_hevc_rpi_prev_intra_luma_pred_flag_decode(lc);
++
++    for (i = 0; i < n; i++) {
++        // depending on mode idx is mpm or luma_pred_mode
++        const unsigned int idx = prev_intra_luma_pred_flag[i] ?
++            ff_hevc_rpi_mpm_idx_decode(lc) :
++            ff_hevc_rpi_rem_intra_luma_pred_mode_decode(lc);
++
++        lc->pu.intra_pred_mode[i] =
++            luma_intra_pred_mode(s, lc,
++                                 x0 + ((i & 1) == 0 ? 0 : split_size),
++                                 y0 + ((i & 2) == 0 ? 0 : split_size),
++                                 log2_cb_size - split,
++                                 prev_intra_luma_pred_flag[i], idx);
++    }
++
++    if (ctx_cfmt(s) == 3) {
++        for (i = 0; i < n; i++) {
++            lc->pu.chroma_mode_c[i] = chroma_mode = ff_hevc_rpi_intra_chroma_pred_mode_decode(lc);
++            if (chroma_mode != 4) {
++                if (lc->pu.intra_pred_mode[i] == intra_chroma_table[chroma_mode])
++                    lc->pu.intra_pred_mode_c[i] = 34;
++                else
++                    lc->pu.intra_pred_mode_c[i] = intra_chroma_table[chroma_mode];
++            } else {
++                lc->pu.intra_pred_mode_c[i] = lc->pu.intra_pred_mode[i];
++            }
++        }
++    } else if (ctx_cfmt(s) == 2) {
++        int mode_idx;
++        lc->pu.chroma_mode_c[0] = chroma_mode = ff_hevc_rpi_intra_chroma_pred_mode_decode(lc);
++        if (chroma_mode != 4) {
++            if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
++                mode_idx = 34;
++            else
++                mode_idx = intra_chroma_table[chroma_mode];
++        } else {
++            mode_idx = lc->pu.intra_pred_mode[0];
++        }
++        lc->pu.intra_pred_mode_c[0] = tab_mode_idx[mode_idx];
++    } else if (ctx_cfmt(s) != 0) {
++        chroma_mode = ff_hevc_rpi_intra_chroma_pred_mode_decode(lc);
++        if (chroma_mode != 4) {
++            if (lc->pu.intra_pred_mode[0] == intra_chroma_table[chroma_mode])
++                lc->pu.intra_pred_mode_c[0] = 34;
++            else
++                lc->pu.intra_pred_mode_c[0] = intra_chroma_table[chroma_mode];
++        } else {
++            lc->pu.intra_pred_mode_c[0] = lc->pu.intra_pred_mode[0];
++        }
++    }
++}
++
++static int hls_coding_unit(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                           const unsigned int x0, const unsigned int y0, const unsigned int log2_cb_size)
++{
++    const unsigned int cb_size          = 1 << log2_cb_size;
++    const unsigned int log2_min_cb_size = s->ps.sps->log2_min_cb_size;
++    const unsigned int min_cb_width     = s->ps.sps->min_cb_width;
++    const unsigned int x_cb             = x0 >> log2_min_cb_size;
++    const unsigned int y_cb             = y0 >> log2_min_cb_size;
++    const unsigned int idx              = log2_cb_size - 2;
++    const unsigned int qp_block_mask    = (1 << s->ps.pps->log2_min_cu_qp_delta_size) - 1;
++    int skip_flag = 0;
++
++    lc->cu.x                = x0;
++    lc->cu.y                = y0;
++    lc->cu.x_split          = x0;
++    lc->cu.y_split          = y0;
++
++    lc->cu.pred_mode        = MODE_INTRA;
++    lc->cu.part_mode        = PART_2Nx2N;
++    lc->cu.intra_split_flag = 0;
++    lc->cu.cu_transquant_bypass_flag = 0;
++    lc->pu.intra_pred_mode[0] = 1;
++    lc->pu.intra_pred_mode[1] = 1;
++    lc->pu.intra_pred_mode[2] = 1;
++    lc->pu.intra_pred_mode[3] = 1;
++
++    if (s->ps.pps->transquant_bypass_enable_flag) {
++        lc->cu.cu_transquant_bypass_flag = ff_hevc_rpi_cu_transquant_bypass_flag_decode(lc);
++        if (lc->cu.cu_transquant_bypass_flag)
++            set_deblocking_bypass(s, x0, y0, log2_cb_size);
++    }
++
++    if (s->sh.slice_type != HEVC_SLICE_I) {
++        lc->cu.pred_mode = MODE_INTER;
++        skip_flag = ff_hevc_rpi_skip_flag_decode(s, lc, x0, y0, x_cb, y_cb);
++    }
++
++    if (skip_flag) {
++        lc->cu.pred_mode = MODE_SKIP;
++
++        hls_prediction_unit(s, lc, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
++        intra_prediction_unit_default_value(s, lc, x0, y0, log2_cb_size);
++
++        if (!s->sh.disable_deblocking_filter_flag)
++            ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size, 0);
++    } else {
++        int pcm_flag = 0;
++
++        if (s->sh.slice_type != HEVC_SLICE_I)
++            lc->cu.pred_mode = ff_hevc_rpi_pred_mode_decode(lc);
++        if (lc->cu.pred_mode != MODE_INTRA ||
++            log2_cb_size == s->ps.sps->log2_min_cb_size) {
++            lc->cu.part_mode        = ff_hevc_rpi_part_mode_decode(s, lc, log2_cb_size);
++            lc->cu.intra_split_flag = lc->cu.part_mode == PART_NxN &&
++                                      lc->cu.pred_mode == MODE_INTRA;
++        }
++
++        if (lc->cu.pred_mode == MODE_INTRA) {
++            if (lc->cu.part_mode == PART_2Nx2N &&
++                log2_cb_size <= s->ps.sps->pcm.log2_max_pcm_cb_size &&  // 0 if not enabled
++                log2_cb_size >= s->ps.sps->pcm.log2_min_pcm_cb_size &&
++                ff_hevc_rpi_pcm_flag_decode(lc) != 0)
++            {
++                int ret;
++                pcm_flag = 1;
++                intra_prediction_unit_default_value(s, lc, x0, y0, log2_cb_size);
++                if ((ret = hls_pcm_sample(s, lc, x0, y0, log2_cb_size)) < 0)
++                    return ret;
++
++                if (s->ps.sps->pcm.loop_filter_disable_flag)
++                    set_deblocking_bypass(s, x0, y0, log2_cb_size);
++            } else {
++                intra_prediction_unit(s, lc, x0, y0, log2_cb_size);
++            }
++        } else {
++            intra_prediction_unit_default_value(s, lc, x0, y0, log2_cb_size);
++            switch (lc->cu.part_mode) {
++            case PART_2Nx2N:
++                hls_prediction_unit(s, lc, x0, y0, cb_size, cb_size, log2_cb_size, 0, idx);
++                break;
++            case PART_2NxN:
++                hls_prediction_unit(s, lc, x0, y0,               cb_size, cb_size / 2, log2_cb_size, 0, idx);
++                lc->cu.y_split = y0 + cb_size / 2;
++                hls_prediction_unit(s, lc, x0, y0 + cb_size / 2, cb_size, cb_size / 2, log2_cb_size, 1, idx);
++                break;
++            case PART_Nx2N:
++                hls_prediction_unit(s, lc, x0,               y0, cb_size / 2, cb_size, log2_cb_size, 0, idx - 1);
++                lc->cu.x_split = x0 + cb_size / 2;
++                hls_prediction_unit(s, lc, x0 + cb_size / 2, y0, cb_size / 2, cb_size, log2_cb_size, 1, idx - 1);
++                break;
++            case PART_2NxnU:
++                hls_prediction_unit(s, lc, x0, y0,               cb_size, cb_size     / 4, log2_cb_size, 0, idx);
++                lc->cu.y_split = y0 + cb_size / 4;
++                hls_prediction_unit(s, lc, x0, y0 + cb_size / 4, cb_size, cb_size / 4 * 3, log2_cb_size, 1, idx);
++                break;
++            case PART_2NxnD:
++                hls_prediction_unit(s, lc, x0, y0,                   cb_size, cb_size / 4 * 3, log2_cb_size, 0, idx);
++                lc->cu.y_split = y0 + cb_size / 4 * 3;
++                hls_prediction_unit(s, lc, x0, y0 + cb_size / 4 * 3, cb_size, cb_size     / 4, log2_cb_size, 1, idx);
++                break;
++            case PART_nLx2N:
++                hls_prediction_unit(s, lc, x0,               y0, cb_size     / 4, cb_size, log2_cb_size, 0, idx - 2);
++                lc->cu.x_split = x0 + cb_size / 4;
++                hls_prediction_unit(s, lc, x0 + cb_size / 4, y0, cb_size * 3 / 4, cb_size, log2_cb_size, 1, idx - 2);
++                break;
++            case PART_nRx2N:
++                hls_prediction_unit(s, lc, x0,                   y0, cb_size / 4 * 3, cb_size, log2_cb_size, 0, idx - 2);
++                lc->cu.x_split = x0 + cb_size / 4 * 3;
++                hls_prediction_unit(s, lc, x0 + cb_size / 4 * 3, y0, cb_size     / 4, cb_size, log2_cb_size, 1, idx - 2);
++                break;
++            case PART_NxN:
++                hls_prediction_unit(s, lc, x0,               y0,               cb_size / 2, cb_size / 2, log2_cb_size, 0, idx - 1);
++                lc->cu.x_split = x0 + cb_size / 2;
++                hls_prediction_unit(s, lc, x0 + cb_size / 2, y0,               cb_size / 2, cb_size / 2, log2_cb_size, 1, idx - 1);
++                lc->cu.y_split = y0 + cb_size / 2;
++                hls_prediction_unit(s, lc, x0,               y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 2, idx - 1);
++                hls_prediction_unit(s, lc, x0 + cb_size / 2, y0 + cb_size / 2, cb_size / 2, cb_size / 2, log2_cb_size, 3, idx - 1);
++                break;
++            }
++        }
++
++        if (!pcm_flag) {
++            int rqt_root_cbf = 1;
++
++            if (lc->cu.pred_mode != MODE_INTRA &&
++                !(lc->cu.part_mode == PART_2Nx2N && lc->pu.merge_flag)) {
++                rqt_root_cbf = ff_hevc_rpi_no_residual_syntax_flag_decode(lc);
++            }
++            if (rqt_root_cbf) {
++                const unsigned int cbf_c = ctx_cfmt(s) == 0 ? 0 : (CBF_CR0 | CBF_CB0);
++                int ret;
++
++                lc->cu.max_trafo_depth = lc->cu.pred_mode == MODE_INTRA ?
++                                         s->ps.sps->max_transform_hierarchy_depth_intra + lc->cu.intra_split_flag :
++                                         s->ps.sps->max_transform_hierarchy_depth_inter;
++                // transform_tree does deblock_boundary_strengths
++                ret = hls_transform_tree(s, lc, x0, y0,
++                                         log2_cb_size, 0, 0, cbf_c);
++                if (ret < 0)
++                    return ret;
++            } else {
++                if (!s->sh.disable_deblocking_filter_flag)
++                    ff_hevc_rpi_deblocking_boundary_strengths(s, lc, x0, y0, log2_cb_size, 0);
++            }
++        }
++    }
++
++    // If the delta is still wanted then we haven't read the delta & therefore need to set qp here
++    if (lc->tu.is_cu_qp_delta_wanted)
++        ff_hevc_rpi_set_qPy(s, lc, x0, y0);
++
++    if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
++       ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0) {
++        lc->qPy_pred = lc->qp_y;
++    }
++
++    set_bytes(s->qp_y_tab + y_cb * min_cb_width + x_cb, min_cb_width, log2_cb_size - log2_min_cb_size, lc->qp_y & 0xff);
++
++    set_stash2(s->cabac_stash_up + (x0 >> 3), s->cabac_stash_left + (y0 >> 3), log2_cb_size - 3, (lc->ct_depth << 1) | skip_flag);
++
++    return 0;
++}
++
++// Returns:
++//  < 0  Error
++//  0    More data wanted
++//  1    EoSlice / EoPicture
++static int hls_coding_quadtree(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const int x0, const int y0,
++                               const int log2_cb_size, const unsigned int cb_depth)
++{
++    const int cb_size    = 1 << log2_cb_size;
++    int ret;
++    int split_cu;
++
++    lc->ct_depth = cb_depth;
++    split_cu = (log2_cb_size > s->ps.sps->log2_min_cb_size);
++    if (x0 + cb_size <= s->ps.sps->width  &&
++        y0 + cb_size <= s->ps.sps->height &&
++        split_cu)
++    {
++        split_cu = ff_hevc_rpi_split_coding_unit_flag_decode(s, lc, cb_depth, x0, y0);
++    }
++
++    // Qp delta (and offset) need to remain wanted if cb_size < min until
++    // a coded block is found so we still initial state at depth 0 (outside
++    // this fn) and only reset here
++    if (s->ps.pps->cu_qp_delta_enabled_flag &&
++        log2_cb_size >= s->ps.pps->log2_min_cu_qp_delta_size)
++    {
++        lc->tu.is_cu_qp_delta_wanted = 1;
++        lc->tu.cu_qp_delta          = 0;
++    }
++    if (s->sh.cu_chroma_qp_offset_enabled_flag &&
++        log2_cb_size >= s->ps.pps->log2_min_cu_qp_delta_size)
++    {
++        lc->tu.cu_chroma_qp_offset_wanted = 1;
++    }
++
++    lc->tu.qp_divmod6[0] = s->ps.pps->qp_bd_x[0];
++    lc->tu.qp_divmod6[1] = s->ps.pps->qp_bd_x[1] + s->sh.slice_cb_qp_offset;
++    lc->tu.qp_divmod6[2] = s->ps.pps->qp_bd_x[2] + s->sh.slice_cr_qp_offset;
++
++    if (split_cu) {
++        int qp_block_mask = (1 << s->ps.pps->log2_min_cu_qp_delta_size) - 1;
++        const int cb_size_split = cb_size >> 1;
++        const int x1 = x0 + cb_size_split;
++        const int y1 = y0 + cb_size_split;
++
++        int more_data = 0;
++
++        more_data = hls_coding_quadtree(s, lc, x0, y0, log2_cb_size - 1, cb_depth + 1);
++        if (more_data < 0)
++            return more_data;
++
++        if (more_data && x1 < s->ps.sps->width) {
++            more_data = hls_coding_quadtree(s, lc, x1, y0, log2_cb_size - 1, cb_depth + 1);
++            if (more_data < 0)
++                return more_data;
++        }
++        if (more_data && y1 < s->ps.sps->height) {
++            more_data = hls_coding_quadtree(s, lc, x0, y1, log2_cb_size - 1, cb_depth + 1);
++            if (more_data < 0)
++                return more_data;
++        }
++        if (more_data && x1 < s->ps.sps->width &&
++            y1 < s->ps.sps->height) {
++            more_data = hls_coding_quadtree(s, lc, x1, y1, log2_cb_size - 1, cb_depth + 1);
++            if (more_data < 0)
++                return more_data;
++        }
++
++        if(((x0 + (1<<log2_cb_size)) & qp_block_mask) == 0 &&
++            ((y0 + (1<<log2_cb_size)) & qp_block_mask) == 0)
++            lc->qPy_pred = lc->qp_y;
++
++        if (more_data)
++            return ((x1 + cb_size_split) < s->ps.sps->width ||
++                    (y1 + cb_size_split) < s->ps.sps->height);
++        else
++            return 0;
++    } else {
++        ret = hls_coding_unit(s, lc, x0, y0, log2_cb_size);
++        if (ret < 0)
++            return ret;
++        if ((!((x0 + cb_size) %
++               (1 << (s->ps.sps->log2_ctb_size))) ||
++             (x0 + cb_size >= s->ps.sps->width)) &&
++            (!((y0 + cb_size) %
++               (1 << (s->ps.sps->log2_ctb_size))) ||
++             (y0 + cb_size >= s->ps.sps->height))) {
++            int end_of_slice_flag = ff_hevc_rpi_get_cabac_terminate(&lc->cc);
++            return !end_of_slice_flag;
++        } else {
++            return 1;
++        }
++    }
++
++    return 0;  // NEVER
++}
++
++static void hls_decode_neighbour(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++                                 const int x_ctb, const int y_ctb, const int ctb_addr_ts)
++{
++    const unsigned int ctb_size          = 1 << s->ps.sps->log2_ctb_size;
++    const unsigned int ctb_addr_rs       = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
++    const unsigned int ctb_addr_rs_in_slice = ctb_addr_rs - s->sh.slice_addr;  // slice_addr = RS addr of start of slice
++    const unsigned int ctb_flags = s->ps.pps->ctb_ts_flags[ctb_addr_ts];
++    const unsigned int line_w = s->ps.sps->ctb_width;
++
++    s->tab_slice_address[ctb_addr_rs] = s->sh.slice_addr;
++
++    lc->end_of_ctb_x = FFMIN(x_ctb + ctb_size, s->ps.sps->width);
++    lc->end_of_ctb_y = FFMIN(y_ctb + ctb_size, s->ps.sps->height);
++
++    lc->boundary_flags = 0;
++
++    if ((ctb_flags & CTB_TS_FLAGS_SOTL) != 0)
++        lc->boundary_flags |= BOUNDARY_LEFT_TILE;
++    if (x_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - 1])
++        lc->boundary_flags |= BOUNDARY_LEFT_SLICE;
++    if ((ctb_flags & CTB_TS_FLAGS_TOT) != 0)
++        lc->boundary_flags |= BOUNDARY_UPPER_TILE;
++    if (y_ctb > 0 && s->tab_slice_address[ctb_addr_rs] != s->tab_slice_address[ctb_addr_rs - line_w])
++        lc->boundary_flags |= BOUNDARY_UPPER_SLICE;
++
++    // Use line width rather than tile width for addr_in_slice test as
++    // addr_in_slice is in raster units
++
++    lc->ctb_avail =
++        ((lc->boundary_flags & (BOUNDARY_LEFT_SLICE | BOUNDARY_LEFT_TILE)) == 0 ? AVAIL_L : 0) |
++        ((lc->boundary_flags & (BOUNDARY_UPPER_SLICE | BOUNDARY_UPPER_TILE)) == 0 ? AVAIL_U : 0) |
++        ((lc->boundary_flags & (BOUNDARY_LEFT_TILE | BOUNDARY_UPPER_TILE)) == 0 &&
++            (ctb_addr_rs_in_slice > line_w) ? AVAIL_UL : 0) |
++        ((ctb_flags & (CTB_TS_FLAGS_EOTL | CTB_TS_FLAGS_TOT)) == 0 &&
++            (ctb_addr_rs_in_slice + 1 >= line_w) ? AVAIL_UR : 0);
++    // Down-left never avail at CTB level
++}
++
++
++static void rpi_execute_dblk_cmds(const HEVCRpiContext * const s, HEVCRpiJob * const jb)
++{
++    int y = ff_hevc_rpi_hls_filter_blk(s, jb->bounds,
++        (s->ps.pps->ctb_ts_flags[jb->ctu_ts_last] & CTB_TS_FLAGS_EOT) != 0);
++
++    // Signal
++    if (y > 0) {
++        // Cast away const as progress is held in s, but this really shouldn't confuse anything
++        ff_hevc_rpi_progress_signal_recon((HEVCRpiContext *)s, y - 1);
++    }
++
++    // Job done now
++    // ? Move outside this fn
++    job_free(s->jbc, jb);
++}
++
++// I-pred, transform_and_add for all blocks types done here
++// All ARM
++static void rpi_execute_pred_cmds(const HEVCRpiContext * const s, HEVCRpiJob * const jb)
++{
++    unsigned int i;
++    HEVCRpiIntraPredEnv * const iap = &jb->intra;
++    const HEVCPredCmd *cmd = iap->cmds;
++
++#if !RPI_WORKER_WAIT_PASS_0
++    rpi_sem_wait(&jb->sem);
++    rpi_cache_flush_execute(jb->rfe);  // Invalidate data set up in pass1
++#endif
++
++    for (i = iap->n; i > 0; i--, cmd++)
++    {
++        switch (cmd->type)
++        {
++            case RPI_PRED_INTRA:
++                s->hpc.intra_pred(s, cmd->i_pred.mode, cmd->i_pred.x, cmd->i_pred.y, cmd->avail, cmd->size);
++                break;
++            case RPI_PRED_INTRA_C:
++                s->hpc.intra_pred_c(s, cmd->i_pred.mode, cmd->i_pred.x, cmd->i_pred.y, cmd->avail, cmd->size);
++                break;
++            case RPI_PRED_ADD_RESIDUAL:
++                s->hevcdsp.add_residual[cmd->size - 2](cmd->ta.dst, (int16_t *)cmd->ta.buf, cmd->ta.stride);
++                break;
++            case RPI_PRED_ADD_DC:
++                s->hevcdsp.add_residual_dc[cmd->size - 2](cmd->dc.dst, cmd->dc.stride, cmd->dc.dc);
++                break;
++            case RPI_PRED_ADD_RESIDUAL_U:
++                s->hevcdsp.add_residual_u[cmd->size - 2](cmd->ta.dst, (int16_t *)cmd->ta.buf, cmd->ta.stride, cmd->ta.dc);
++                break;
++            case RPI_PRED_ADD_RESIDUAL_V:
++                s->hevcdsp.add_residual_v[cmd->size - 2](cmd->ta.dst, (int16_t *)cmd->ta.buf, cmd->ta.stride, cmd->ta.dc);
++                break;
++            case RPI_PRED_ADD_RESIDUAL_C:
++                s->hevcdsp.add_residual_c[cmd->size - 2](cmd->ta.dst, (int16_t *)cmd->ta.buf, cmd->ta.stride);
++                break;
++            case RPI_PRED_ADD_DC_U:
++            case RPI_PRED_ADD_DC_V:
++                s->hevcdsp.add_residual_dc_c[cmd->size - 2](cmd->dc.dst, cmd->dc.stride, cmd->dc.dc);
++                break;
++
++            case RPI_PRED_I_PCM:
++                pcm_extract(s, cmd->i_pcm.src, cmd->i_pcm.src_len, cmd->i_pcm.x, cmd->i_pcm.y, 1 << cmd->size);
++                break;
++
++            default:
++                av_log(s->avctx, AV_LOG_PANIC, "Bad command %d in worker pred Q\n", cmd->type);
++                abort();
++        }
++    }
++
++    // Mark done
++    iap->n = 0;
++}
++
++
++// Set initial uniform job values & zero ctu_count
++static void rpi_begin(const HEVCRpiContext * const s, HEVCRpiJob * const jb, const unsigned int ctu_ts_first)
++{
++    unsigned int i;
++    HEVCRpiInterPredEnv *const cipe = &jb->chroma_ip;
++    HEVCRpiInterPredEnv *const yipe = &jb->luma_ip;
++    const HEVCRpiSPS * const sps = s->ps.sps;
++
++    const uint16_t pic_width_y   = sps->width;
++    const uint16_t pic_height_y  = sps->height;
++
++    const uint16_t pic_width_c   = sps->width >> ctx_hshift(s, 1);
++    const uint16_t pic_height_c  = sps->height >> ctx_vshift(s, 1);
++
++    // We expect the pointer to change if we use another sps
++    if (sps != jb->sps)
++    {
++        worker_pic_free_one(jb);
++
++        set_ipe_from_ici(cipe, &ipe_init_infos[s->ps.sps->bit_depth - 8].chroma);
++        set_ipe_from_ici(yipe, &ipe_init_infos[s->ps.sps->bit_depth - 8].luma);
++
++        {
++            const int coefs_per_luma = HEVC_MAX_CTB_SIZE * HEVC_RPI_MAX_WIDTH;
++            const int coefs_per_chroma = (coefs_per_luma * 2) >> (ctx_vshift(s, 1) + ctx_hshift(s, 1));
++            worker_pic_alloc_one(jb, coefs_per_luma + coefs_per_chroma);
++        }
++
++        jb->sps = sps;
++    }
++
++    jb->waited = 0;
++    jb->ctu_ts_first = ctu_ts_first;
++    jb->ctu_ts_last = -1;
++
++    rpi_inter_pred_reset(cipe);
++    for (i = 0; i < cipe->n; i++) {
++        HEVCRpiInterPredQ * const cp = cipe->q + i;
++        qpu_mc_pred_c_s_t * const u = &cp->qpu_mc_base->c.s;
++
++        u->next_src1.x = 0;
++        u->next_src1.y = 0;
++        u->next_src1.base = 0;
++        u->pic_cw = pic_width_c;
++        u->pic_ch = pic_height_c;
++        u->stride2 = av_rpi_sand_frame_stride2(s->frame);
++        u->stride1 = av_rpi_sand_frame_stride1(s->frame);
++        cp->last_l0 = &u->next_src1;
++
++        u->next_fn = 0;
++        u->next_src2.x = 0;
++        u->next_src2.y = 0;
++        u->next_src2.base = 0;
++        cp->last_l1 = &u->next_src2;
++
++        cp->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(u + 1);
++    }
++
++    rpi_inter_pred_reset(yipe);
++    for (i = 0; i < yipe->n; i++) {
++        HEVCRpiInterPredQ * const yp = yipe->q + i;
++        qpu_mc_pred_y_s_t * const y = &yp->qpu_mc_base->y.s;
++
++        y->next_src1.x = 0;
++        y->next_src1.y = 0;
++        y->next_src1.base = 0;
++        y->next_src2.x = 0;
++        y->next_src2.y = 0;
++        y->next_src2.base = 0;
++        y->pic_h = pic_height_y;
++        y->pic_w = pic_width_y;
++        y->stride2 = av_rpi_sand_frame_stride2(s->frame);
++        y->stride1 = av_rpi_sand_frame_stride1(s->frame);
++        y->next_fn = 0;
++        yp->last_l0 = &y->next_src1;
++        yp->last_l1 = &y->next_src2;
++
++        yp->qpu_mc_curr = (qpu_mc_pred_cmd_t *)(y + 1);
++    }
++
++    jb->last_y8_p = NULL;
++    jb->last_y8_l1 = NULL;
++
++    for (i = 0; i != FF_ARRAY_ELEMS(jb->progress_req); ++i) {
++        jb->progress_req[i] = -1;
++    }
++
++    worker_pic_reset(&jb->coeffs);
++}
++
++
++#if !RPI_QPU_EMU_Y || !RPI_QPU_EMU_C
++static unsigned int mc_terminate_add_qpu(const HEVCRpiContext * const s,
++                                     const vpu_qpu_job_h vqj,
++                                     rpi_cache_flush_env_t * const rfe,
++                                     HEVCRpiInterPredEnv * const ipe)
++{
++    unsigned int i;
++    uint32_t mail[QPU_N_MAX][QPU_MAIL_EL_VALS];
++    unsigned int max_block = 0;
++
++    if (!ipe->used) {
++        return 0;
++    }
++
++    if (ipe->curr != 0) {
++        rpi_inter_pred_sync(ipe);
++    }
++
++    // Add final commands to Q
++    for(i = 0; i != ipe->n; ++i) {
++        HEVCRpiInterPredQ * const yp = ipe->q + i;
++        qpu_mc_src_t *const p0 = yp->last_l0;
++        qpu_mc_src_t *const p1 = yp->last_l1;
++        const unsigned int block_size = (char *)yp->qpu_mc_curr - (char *)yp->qpu_mc_base;
++
++        if (block_size > max_block)
++            max_block = block_size;
++
++        qpu_mc_link_set(yp->qpu_mc_curr, yp->code_exit);
++
++        // Need to set the srcs for L0 & L1 to something that can be (pointlessly) prefetched
++        p0->x = MC_DUMMY_X;
++        p0->y = MC_DUMMY_Y;
++        p0->base = s->qpu_dummy_frame_qpu;
++        p1->x = MC_DUMMY_X;
++        p1->y = MC_DUMMY_Y;
++        p1->base = s->qpu_dummy_frame_qpu;
++
++        yp->last_l0 = NULL;
++        yp->last_l1 = NULL;
++
++        // Add to mailbox list
++        mail[i][0] = ipe->gptr.vc + ((uint8_t *)yp->qpu_mc_base - ipe->gptr.arm);
++        mail[i][1] = yp->code_setup;
++    }
++
++    // We don't need invalidate here as the uniforms aren't changed by the QPU
++    // and leaving them in ARM cache avoids (pointless) pre-reads when writing
++    // new values which seems to give us a small performance advantage
++    //
++    // In most cases we will not have a completely packed set of uniforms and as
++    // we have a 2d invalidate we writeback all uniform Qs to the depth of the
++    // fullest
++    rpi_cache_flush_add_gm_blocks(rfe, &ipe->gptr, RPI_CACHE_FLUSH_MODE_WRITEBACK,
++                                  (uint8_t *)ipe->q[0].qpu_mc_base - ipe->gptr.arm, max_block,
++                                  ipe->n, ipe->max_fill + ipe->min_gap);
++    vpu_qpu_job_add_qpu(vqj, ipe->n, (uint32_t *)mail);
++
++    return 1;
++}
++#endif
++
++#if RPI_QPU_EMU_Y || RPI_QPU_EMU_C
++static unsigned int mc_terminate_add_emu(const HEVCRpiContext * const s,
++                                     const vpu_qpu_job_h vqj,
++                                     rpi_cache_flush_env_t * const rfe,
++                                     HEVCRpiInterPredEnv * const ipe)
++{
++    unsigned int i;
++    if (!ipe->used) {
++        return 0;
++    }
++
++    if (ipe->curr != 0) {
++        rpi_inter_pred_sync(ipe);
++    }
++
++    // Add final commands to Q
++    for(i = 0; i != ipe->n; ++i) {
++        HEVCRpiInterPredQ * const yp = ipe->q + i;
++        qpu_mc_src_t *const p0 = yp->last_l0;
++        qpu_mc_src_t *const p1 = yp->last_l1;
++
++        yp->qpu_mc_curr->data[-1] = yp->code_exit;
++
++        // Need to set the srcs for L0 & L1 to something that can be (pointlessly) prefetched
++        p0->x = MC_DUMMY_X;
++        p0->y = MC_DUMMY_Y;
++        p0->base = s->qpu_dummy_frame_emu;
++        p1->x = MC_DUMMY_X;
++        p1->y = MC_DUMMY_Y;
++        p1->base = s->qpu_dummy_frame_emu;
++
++        yp->last_l0 = NULL;
++        yp->last_l1 = NULL;
++    }
++
++    return 1;
++}
++#endif
++
++
++#if RPI_QPU_EMU_Y
++#define mc_terminate_add_y mc_terminate_add_emu
++#else
++#define mc_terminate_add_y mc_terminate_add_qpu
++#endif
++#if RPI_QPU_EMU_C
++#define mc_terminate_add_c mc_terminate_add_emu
++#else
++#define mc_terminate_add_c mc_terminate_add_qpu
++#endif
++
++
++static void flush_frame(HEVCRpiContext *s,AVFrame *frame)
++{
++    rpi_cache_buf_t cbuf;
++    rpi_cache_flush_env_t * rfe = rpi_cache_flush_init(&cbuf);
++    rpi_cache_flush_add_frame(rfe, frame, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE);
++    rpi_cache_flush_finish(rfe);
++}
++
++static void job_gen_bounds(const HEVCRpiContext * const s, HEVCRpiJob * const jb)
++{
++    const unsigned int rs0 = s->ps.pps->ctb_addr_ts_to_rs[jb->ctu_ts_first];
++    const unsigned int rs1 = s->ps.pps->ctb_addr_ts_to_rs[jb->ctu_ts_last];
++    const unsigned int ctb_width = s->ps.sps->ctb_width;
++    RpiBlk *const bounds = &jb->bounds;
++    av_assert1(jb->ctu_ts_first <= jb->ctu_ts_last);
++    bounds->x = (rs0 % ctb_width) << s->ps.sps->log2_ctb_size;
++    bounds->y = (rs0 / ctb_width) << s->ps.sps->log2_ctb_size;
++    bounds->w = ((rs1 - rs0) % ctb_width + 1) << s->ps.sps->log2_ctb_size;
++    bounds->h = ((rs1 - rs0) / ctb_width + 1) << s->ps.sps->log2_ctb_size;
++
++    bounds->w = FFMIN(bounds->w, s->ps.sps->width - bounds->x);
++    bounds->h = FFMIN(bounds->h, s->ps.sps->height - bounds->y);
++}
++
++#if RPI_PASSES == 2
++static void worker_core2(HEVCRpiContext * const s, HEVCRpiJob * const jb)
++{
++    // Perform intra prediction and residual reconstruction
++    rpi_execute_pred_cmds(s, jb);
++
++    // Perform deblocking for CTBs in this row
++    rpi_execute_dblk_cmds(s, jb);
++}
++#endif
++
++// Core execution tasks
++static void worker_core(const HEVCRpiContext * const s, HEVCRpiJob * const jb)
++{
++    int pred_y, pred_c;
++    vpu_qpu_job_env_t qvbuf;
++    const vpu_qpu_job_h vqj = vpu_qpu_job_init(&qvbuf);
++#if RPI_WORKER_WAIT_PASS_0
++    int do_wait;
++#endif
++
++    {
++        const HEVCRpiCoeffsEnv * const cf = &jb->coeffs;
++        if (cf->s[3].n + cf->s[2].n != 0)
++        {
++            const unsigned int csize = sizeof(cf->s[3].buf[0]);
++            const unsigned int offset32 = ((cf->s[3].buf - cf->s[2].buf) - cf->s[3].n) * csize;
++            unsigned int n16 = (cf->s[2].n >> 8);
++            unsigned int n32 = (cf->s[3].n >> 10);
++#if RPI_COMPRESS_COEFFS
++            if (cf->s[2].packed) {
++                n16 = n16 | (n16<<16);
++            } else {
++                const unsigned int npack16 = (cf->s[2].packed_n>>8);
++                n16 = n16 | (npack16<<16);
++            }
++            if (cf->s[3].packed) {
++                n32 = n32 | (n32<<16);
++            } else {
++                const unsigned int npack32 = (cf->s[3].packed_n>>10);
++                n32 = n32 | (npack32<<16);
++            }
++#endif
++            vpu_qpu_job_add_vpu(vqj,
++                vpu_get_fn(s->ps.sps->bit_depth),
++                vpu_get_constants(),
++                cf->gptr.vc,
++                n16,
++                cf->gptr.vc + offset32,
++                n32,
++                0);
++
++            rpi_cache_flush_add_gm_range(jb->rfe, &cf->gptr, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE, 0, cf->s[2].n * csize);
++            rpi_cache_flush_add_gm_range(jb->rfe, &cf->gptr, RPI_CACHE_FLUSH_MODE_WB_INVALIDATE, offset32, cf->s[3].n * csize);
++        }
++    }
++
++    pred_c = mc_terminate_add_c(s, vqj, jb->rfe, &jb->chroma_ip);
++
++// We could take a sync here and try to locally overlap QPU processing with ARM
++// but testing showed a slightly negative benefit with noticable extra complexity
++
++    pred_y = mc_terminate_add_y(s, vqj, jb->rfe, &jb->luma_ip);
++
++    // Returns 0 if nothing to do, 1 if sync added
++#if RPI_WORKER_WAIT_PASS_0
++    do_wait = vpu_qpu_job_add_sync_sem(vqj, &jb->sem);
++#else
++    if (vpu_qpu_job_add_sync_sem(vqj, &jb->sem) == 0)
++        sem_post(&jb->sem);
++#endif
++
++    rpi_cache_flush_execute(jb->rfe);
++
++    // Await progress as required
++    // jb->waited will only be clear if we have already tested the progress values
++    // (in worker_submit_job) and found we don't have to wait
++    if (jb->waited)
++    {
++        unsigned int i;
++        for (i = 0; i != FF_ARRAY_ELEMS(jb->progress_req); ++i) {
++            if (jb->progress_req[i] >= 0) {
++                ff_hevc_rpi_progress_wait_recon(s, jb, s->DPB + i, jb->progress_req[i]);
++            }
++        }
++    }
++
++    vpu_qpu_job_finish(vqj);
++
++    // We always work on a rectangular block
++    if (pred_y || pred_c)
++    {
++        rpi_cache_flush_add_frame_block(jb->rfe, s->frame, RPI_CACHE_FLUSH_MODE_INVALIDATE,
++                                        jb->bounds.x, jb->bounds.y, jb->bounds.w, jb->bounds.h,
++                                        ctx_vshift(s, 1), pred_y, pred_c);
++    }
++
++    // If we have emulated VPU ops - do it here
++#if RPI_QPU_EMU_Y || RPI_QPU_EMU_C
++    if (av_rpi_is_sand8_frame(s->frame))
++    {
++#if RPI_QPU_EMU_Y && RPI_QPU_EMU_C
++        ff_hevc_rpi_shader_c8(s, &jb->luma_ip, &jb->chroma_ip);
++#elif RPI_QPU_EMU_Y
++        ff_hevc_rpi_shader_c8(s, &jb->luma_ip, NULL);
++#else
++        ff_hevc_rpi_shader_c8(s, NULL, &jb->chroma_ip);
++#endif
++    }
++    else
++    {
++#if RPI_QPU_EMU_Y && RPI_QPU_EMU_C
++        ff_hevc_rpi_shader_c16(s, &jb->luma_ip, &jb->chroma_ip);
++#elif RPI_QPU_EMU_Y
++        ff_hevc_rpi_shader_c16(s, &jb->luma_ip, NULL);
++#else
++        ff_hevc_rpi_shader_c16(s, NULL, &jb->chroma_ip);
++#endif
++    }
++#endif
++
++#if RPI_WORKER_WAIT_PASS_0
++    if (do_wait)
++        rpi_sem_wait(&jb->sem);
++    rpi_cache_flush_execute(jb->rfe);
++#endif
++}
++
++
++static void rpi_free_inter_pred(HEVCRpiInterPredEnv * const ipe)
++{
++    av_freep(&ipe->q);
++    gpu_free(&ipe->gptr);
++}
++
++static HEVCRpiJob * job_new(void)
++{
++    HEVCRpiJob * const jb = av_mallocz(sizeof(HEVCRpiJob));
++
++    if (jb == NULL)
++        return NULL;
++
++    sem_init(&jb->sem, 0, 0);
++    jb->rfe = rpi_cache_flush_init(&jb->flush_buf);
++    ff_hevc_rpi_progress_init_wait(&jb->progress_wait);
++
++    jb->intra.n = 0;
++    if ((jb->intra.cmds = av_mallocz(sizeof(HEVCPredCmd) * RPI_MAX_PRED_CMDS)) == NULL)
++        goto fail1;
++
++    // * Sizeof the union structure might be overkill but at the moment it
++    //   is correct (it certainly isn't going to be too small)
++    // Set max fill to slack/2 from the end of the Q
++    // If we exceed this in any Q then we will schedule by size (which should
++    // mean that we never use that Q again part from syncs)
++    // * Given how agressive the overflow resonse is we could maybe put the
++    //   threshold even nearer the end, but I don't expect us to ever hit
++    //   it on any real stream anyway.
++
++    if (rpi_inter_pred_alloc(&jb->chroma_ip,
++                         QPU_N_MAX, QPU_N_GRP,
++                         QPU_C_COMMANDS * sizeof(qpu_mc_pred_c_t) + QPU_C_SYNCS * sizeof(uint32_t),
++                         QPU_C_CMD_SLACK_PER_Q * sizeof(qpu_mc_pred_c_t) / 2) != 0)
++        goto fail2;
++    if (rpi_inter_pred_alloc(&jb->luma_ip,
++                         QPU_N_MAX,  QPU_N_GRP,
++                         QPU_Y_COMMANDS * sizeof(qpu_mc_pred_y_t) + QPU_Y_SYNCS * sizeof(uint32_t),
++                         QPU_Y_CMD_SLACK_PER_Q * sizeof(qpu_mc_pred_y_t) / 2) != 0)
++        goto fail3;
++
++    return jb;
++
++fail3:
++    rpi_free_inter_pred(&jb->luma_ip);
++fail2:
++    av_freep(&jb->intra.cmds);
++fail1:
++    ff_hevc_rpi_progress_kill_wait(&jb->progress_wait);
++    rpi_cache_flush_finish(jb->rfe);
++    sem_destroy(&jb->sem);
++    return NULL;
++}
++
++static void job_delete(HEVCRpiJob * const jb)
++{
++    worker_pic_free_one(jb);
++    ff_hevc_rpi_progress_kill_wait(&jb->progress_wait);
++    rpi_free_inter_pred(&jb->chroma_ip);
++    rpi_free_inter_pred(&jb->luma_ip);
++    av_freep(&jb->intra.cmds);
++    rpi_cache_flush_finish(jb->rfe);  // Not really needed - should do nothing
++    sem_destroy(&jb->sem);
++    av_free(jb);
++}
++
++static void jbg_delete(HEVCRpiJobGlobal * const jbg)
++{
++    HEVCRpiJob * jb;
++
++    if (jbg == NULL)
++        return;
++
++    jb = jbg->free1;
++    while (jb != NULL)
++    {
++        HEVCRpiJob * const jb2 = jb;
++        jb = jb2->next;
++        job_delete(jb2);
++    }
++
++    pthread_mutex_destroy(&jbg->lock);
++    av_free(jbg);
++}
++
++static HEVCRpiJobGlobal * jbg_new(unsigned int job_count)
++{
++    HEVCRpiJobGlobal * const jbg = av_mallocz(sizeof(HEVCRpiJobGlobal));
++    if (jbg == NULL)
++        return NULL;
++
++    pthread_mutex_init(&jbg->lock, NULL);
++
++    while (job_count-- != 0)
++    {
++        HEVCRpiJob * const jb = job_new();
++        if (jb == NULL)
++            goto fail;
++
++        jb->next = jbg->free1;
++        jbg->free1 = jb;
++    }
++
++    return jbg;
++
++fail:
++    jbg_delete(jbg);
++    return NULL;
++}
++
++static void rpi_job_ctl_delete(HEVCRpiJobCtl * const jbc)
++{
++    HEVCRpiJobGlobal * jbg;
++
++    if (jbc == NULL)
++        return;
++
++    jbg = jbc->jbg;
++
++    if (jbc->jb1 != NULL)
++        job_delete(jbc->jb1);
++
++    pthread_mutex_destroy(&jbc->in_lock);
++    sem_destroy(&jbc->sem_out);
++    av_free(jbc);
++
++    // Deref the global job context
++    if (jbg != NULL && atomic_fetch_add(&jbg->ref_count, -1) == 1)
++        jbg_delete(jbg);
++}
++
++static HEVCRpiJobCtl * rpi_job_ctl_new(HEVCRpiJobGlobal *const jbg)
++{
++    HEVCRpiJobCtl * const jbc = av_mallocz(sizeof(HEVCRpiJobCtl));
++
++    if (jbc == NULL)
++        return NULL;
++
++    jbc->jbg = jbg;
++    atomic_fetch_add(&jbg->ref_count, 1);
++
++    sem_init(&jbc->sem_out, 0, RPI_MAX_JOBS);
++    pthread_mutex_init(&jbc->in_lock, NULL);
++
++    if ((jbc->jb1 = job_new()) == NULL)
++        goto fail;
++    jbc->jb1->jbc_local = jbc;
++
++    return jbc;
++
++fail:
++    rpi_job_ctl_delete(jbc);
++    return NULL;
++}
++
++
++
++static av_cold void hevc_init_worker(HEVCRpiContext * const s)
++{
++#if RPI_PASSES == 2
++    pass_queue_init(s->passq + 1, s, worker_core2, &s->jbc->sem_out, 1);
++#elif RPI_PASSES == 3
++    pass_queue_init(s->passq + 2, s, rpi_execute_dblk_cmds, &s->jbc->sem_out, 2);
++    pass_queue_init(s->passq + 1, s, rpi_execute_pred_cmds, &s->passq[2].sem_in, 1);
++#else
++#error Passes confused
++#endif
++    pass_queue_init(s->passq + 0, s, worker_core, &s->passq[1].sem_in, 0);
++
++    pass_queues_start_all(s);
++}
++
++static av_cold void hevc_exit_worker(HEVCRpiContext *s)
++{
++    pass_queues_term_all(s);
++
++    pass_queues_kill_all(s);
++
++    rpi_job_ctl_delete(s->jbc);
++    s->jbc = NULL;
++}
++
++
++static int slice_start(const HEVCRpiContext * const s, HEVCRpiLocalContext *const lc)
++{
++    const int ctb_addr_ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
++    const int tiles = s->ps.pps->num_tile_rows * s->ps.pps->num_tile_columns;
++    const unsigned int tile_id = s->ps.pps->tile_id[ctb_addr_ts];
++
++    // Check for obvious disasters
++    if (ctb_addr_ts == 0 && s->sh.dependent_slice_segment_flag) {
++        av_log(s->avctx, AV_LOG_ERROR, "Impossible initial tile.\n");
++        return AVERROR_INVALIDDATA;
++    }
++
++    // If dependant then ctb_addr_ts != 0 from previous check
++    if (s->sh.dependent_slice_segment_flag) {
++        int prev_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts - 1];
++        if (s->tab_slice_address[prev_rs] != s->sh.slice_addr) {
++            av_log(s->avctx, AV_LOG_ERROR, "Previous slice segment missing\n");
++            return AVERROR_INVALIDDATA;
++        }
++    }
++
++    if (!s->ps.pps->entropy_coding_sync_enabled_flag &&
++        tile_id + s->sh.num_entry_point_offsets >= tiles)
++    {
++        av_log(s->avctx, AV_LOG_ERROR, "Entry points exceed tiles\n");
++        return AVERROR_INVALIDDATA;
++    }
++
++    // Tiled stuff must start at start of tile if it has multiple entry points
++    if (!s->ps.pps->entropy_coding_sync_enabled_flag &&
++        s->sh.num_entry_point_offsets != 0 &&
++        ctb_addr_ts != s->ps.pps->tile_pos_ts[tile_id])
++    {
++        av_log(s->avctx, AV_LOG_ERROR, "Multiple tiles in slice; slice start != tile start\n");
++        return AVERROR_INVALIDDATA;
++    }
++
++    ff_hevc_rpi_cabac_init_decoder(lc);
++
++    // Setup any required decode vars
++    lc->cabac_init_req = !s->sh.dependent_slice_segment_flag;
++
++//    printf("SS: req=%d, sol=%d, sot=%d\n", lc->cabac_init_req, sol, sot);
++    lc->qp_y = s->sh.slice_qp;
++
++    // General setup
++    lc->bt_line_no = 0;
++    lc->ts = ctb_addr_ts;
++    return 0;
++}
++
++static int gen_entry_points(HEVCRpiContext * const s, const H2645NAL * const nal)
++{
++    const GetBitContext * const gb = &s->HEVClc->gb;
++    RpiSliceHeader * const sh = &s->sh;
++    int i, j;
++
++    const unsigned int length = nal->size;
++    unsigned int offset = ((gb->index) >> 3) + 1;  // We have a bit & align still to come = +1 byte
++    unsigned int cmpt;
++    unsigned int startheader;
++
++    if (sh->num_entry_point_offsets == 0) {
++        s->data = NULL;
++        return 0;
++    }
++
++    // offset in slice header includes emulation prevention bytes.
++    // Unfortunately those have been removed by the time we get here so we
++    // have to compensate.  The nal layer keeps a track of where they were.
++    for (j = 0, cmpt = 0, startheader = offset + sh->entry_point_offset[0]; j < nal->skipped_bytes; j++) {
++        if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
++            startheader--;
++            cmpt++;
++        }
++    }
++
++    for (i = 1; i < sh->num_entry_point_offsets; i++) {
++        offset += (sh->entry_point_offset[i - 1] - cmpt);
++        for (j = 0, cmpt = 0, startheader = offset + sh->entry_point_offset[i]; j < nal->skipped_bytes; j++) {
++            if (nal->skipped_bytes_pos[j] >= offset && nal->skipped_bytes_pos[j] < startheader) {
++                startheader--;
++                cmpt++;
++            }
++        }
++        if (sh->entry_point_offset[i] <= cmpt) {
++            av_log(s->avctx, AV_LOG_ERROR, "entry point offset <= skipped bytes\n");
++            return AVERROR_INVALIDDATA;
++        }
++        sh->size[i - 1] = sh->entry_point_offset[i] - cmpt;
++        sh->offset[i - 1] = offset;
++    }
++
++    offset += sh->entry_point_offset[sh->num_entry_point_offsets - 1] - cmpt;
++    if (length < offset) {
++        av_log(s->avctx, AV_LOG_ERROR, "entry_point_offset table is corrupted\n");
++        return AVERROR_INVALIDDATA;
++    }
++    sh->size[sh->num_entry_point_offsets - 1] = length - offset;
++    sh->offset[sh->num_entry_point_offsets - 1] = offset;
++
++    // Remember data start pointer as we won't have nal later
++    s->data = nal->data;
++    return 0;
++}
++
++
++// Return
++// < 0   Error
++// 0     OK
++//
++// jb->ctu_ts_last < 0       Job still filling
++// jb->ctu_ts_last >= 0      Job ready
++
++static int fill_job(HEVCRpiContext * const s, HEVCRpiLocalContext *const lc, unsigned int max_blocks)
++{
++    const unsigned int log2_ctb_size = s->ps.sps->log2_ctb_size;
++    const unsigned int ctb_size = (1 << log2_ctb_size);
++    HEVCRpiJob * const jb = lc->jb0;
++    int more_data = 1;
++    unsigned int ctb_addr_ts = lc->ts;
++    unsigned int ctb_addr_rs = s->ps.pps->ctb_addr_ts_to_rs[ctb_addr_ts];
++    unsigned int x_ctb = (ctb_addr_rs % s->ps.sps->ctb_width) << log2_ctb_size;
++    const unsigned int y_ctb = (ctb_addr_rs / s->ps.sps->ctb_width) << log2_ctb_size;
++
++    lc->unit_done = 0;
++
++    while (more_data && ctb_addr_ts < s->ps.sps->ctb_size)
++    {
++        int q_full;
++        const unsigned int ctb_flags = s->ps.pps->ctb_ts_flags[ctb_addr_ts];
++
++        hls_decode_neighbour(s, lc, x_ctb, y_ctb, ctb_addr_ts);
++
++        ff_hevc_rpi_cabac_init(s, lc, ctb_flags);
++
++        hls_sao_param(s, lc, x_ctb >> log2_ctb_size, y_ctb >> log2_ctb_size);
++
++        s->deblock[ctb_addr_rs].beta_offset = s->sh.beta_offset;
++        s->deblock[ctb_addr_rs].tc_offset   = s->sh.tc_offset;
++        s->filter_slice_edges[ctb_addr_rs]  = s->sh.slice_loop_filter_across_slices_enabled_flag;
++
++        // Zap stashes if navail
++        if ((lc->ctb_avail & AVAIL_U) == 0)
++            zap_cabac_stash(s->cabac_stash_up + (x_ctb >> 3), log2_ctb_size - 3);
++        if ((lc->ctb_avail & AVAIL_L) == 0)
++        {
++            memset(lc->ipm_left, INTRA_DC, IPM_TAB_SIZE);
++            zap_cabac_stash(s->cabac_stash_left + (y_ctb >> 3), log2_ctb_size - 3);
++        }
++#if MVF_STASH_WIDTH > 64
++        // Restore left mvf stash at start of tile if not at start of line
++        if ((ctb_flags & CTB_TS_FLAGS_SOTL) != 0 && x_ctb != 0 && !s->is_irap)
++        {
++            unsigned int i;
++            HEVCRpiMvField * dst = mvf_stash_ptr(s, lc, x_ctb - 1, 0);
++            const HEVCRpiMvField * src = s->mvf_left + (y_ctb >> LOG2_MIN_PU_SIZE);
++            for (i = 0; i != ctb_size >> LOG2_MIN_PU_SIZE; ++i)
++            {
++                *dst = *src++;
++                dst += MVF_STASH_WIDTH_PU;
++            }
++        }
++#endif
++
++        // Set initial tu states
++        lc->tu.cu_qp_delta = 0;
++        lc->tu.is_cu_qp_delta_wanted = 0;
++        lc->tu.cu_chroma_qp_offset_wanted = 0;
++
++        // Decode
++        more_data = hls_coding_quadtree(s, lc, x_ctb, y_ctb, log2_ctb_size, 0);
++
++        if (ff_hevc_rpi_cabac_overflow(lc))
++        {
++            av_log(s->avctx, AV_LOG_ERROR, "Quadtree bitstream overread\n ");
++            more_data = AVERROR_INVALIDDATA;
++        }
++
++        if (more_data < 0) {
++            s->tab_slice_address[ctb_addr_rs] = TAB_SLICE_ADDR_BROKEN;  // Mark slice as broken
++            return more_data;
++        }
++
++        if (more_data && ((ctb_flags & CTB_TS_FLAGS_EOT) != 0 ||
++             (s->ps.pps->entropy_coding_sync_enabled_flag && (ctb_flags & CTB_TS_FLAGS_EOTL) != 0)))
++        {
++            if (ff_hevc_rpi_get_cabac_terminate(&lc->cc) < 0 ||
++                ff_hevc_rpi_cabac_skip_bytes(&lc->cc, 0) == NULL)
++            {
++                av_log(s->avctx, AV_LOG_ERROR, "Error reading terminate el\n ");
++                return -1;
++            }
++        }
++
++        // --- Post CTB processing
++
++        // Stash rpl top/left for deblock that needs to remember such things cross-slice
++        s->rpl_up[x_ctb >> log2_ctb_size] = s->refPicList;
++        s->rpl_left[y_ctb >> log2_ctb_size] = s->refPicList;
++
++        if (!s->is_irap)
++        {
++            // Copy MVF up to up-left & stash to up
++            {
++                const HEVCRpiMvField * src = mvf_stash_ptr(s, lc, x_ctb, ctb_size - 1);
++                HEVCRpiMvField * dst = s->mvf_up + (x_ctb >> LOG2_MIN_PU_SIZE);
++
++    //            printf("Stash: %d,%d, ctb_size=%d, %p->%p\n", x_ctb, y_ctb, ctb_size, src, dst);
++
++                lc->mvf_ul[0] = dst[(ctb_size - 1) >> LOG2_MIN_PU_SIZE];
++                memcpy(dst, src, (sizeof(*src)*ctb_size) >> LOG2_MIN_PU_SIZE);
++            }
++            // Stash sideways if end of tile line but not end of line (no point)
++            // ** Could/should do this @ end of fn
++#if MVF_STASH_WIDTH > 64
++            if ((ctb_flags & (CTB_TS_FLAGS_EOTL | CTB_TS_FLAGS_EOL)) == CTB_TS_FLAGS_EOTL)
++#endif
++            {
++                unsigned int i;
++                const HEVCRpiMvField * src = mvf_stash_ptr(s, lc, x_ctb + ctb_size - 1, 0);
++                HEVCRpiMvField * dst = s->mvf_left + (y_ctb >> LOG2_MIN_PU_SIZE);
++                for (i = 0; i != ctb_size >> LOG2_MIN_PU_SIZE; ++i)
++                {
++                    *dst++ = *src;
++                    src += MVF_STASH_WIDTH_PU;
++                }
++            }
++        }
++
++        if ((ctb_flags & CTB_TS_FLAGS_CSAVE) != 0)
++            ff_hevc_rpi_save_states(s, lc);
++
++        // Report progress so we can use our MVs in other frames
++        if ((ctb_flags & CTB_TS_FLAGS_EOL) != 0)
++            ff_hevc_rpi_progress_signal_mv(s, y_ctb + ctb_size - 1);
++
++        // End of line || End of tile line || End of tile
++        // (EoL covers end of frame for our purposes here)
++        q_full = ((ctb_flags & CTB_TS_FLAGS_EOTL) != 0);
++
++        // Allocate QPU chunks on fixed size 64 pel boundries rather than
++        // whatever ctb_size is today.
++        // * We might quite like to continue to 64 pel vertical too but that
++        //   currently confuses WPP
++        if (((x_ctb + ctb_size) & 63) == 0 || q_full)
++        {
++            int overflow = 0;
++            if (rpi_inter_pred_next_ctu(&jb->luma_ip) != 0)
++                overflow = 1;
++            if (rpi_inter_pred_next_ctu(&jb->chroma_ip) != 0)
++                overflow = 1;
++            if (overflow)
++            {
++                // * This is very annoying (and slow) to cope with in WPP so
++                //   we treat it as an error there (no known stream triggers this
++                //   with the current buffer sizes).  Non-wpp should cope fine.
++                av_log(s->avctx, AV_LOG_WARNING,  "%s: Q full before EoL\n", __func__);
++                q_full = 1;
++            }
++        }
++
++        // Inc TS to next.
++        ctb_addr_ts++;
++        ctb_addr_rs++;
++        x_ctb += ctb_size;
++
++        if (q_full)
++        {
++            // Do job
++            // Prep for submission
++            jb->ctu_ts_last = ctb_addr_ts - 1;  // Was pre-inced
++            job_gen_bounds(s, jb);
++            break;
++        }
++
++        // If max_blocks started as 0 then this will never be true
++        if (--max_blocks == 0)
++            break;
++    }
++
++    lc->unit_done = (more_data <= 0);
++    lc->ts = ctb_addr_ts;
++    return 0;
++}
++
++static void bt_lc_init(HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, const unsigned int n)
++{
++    lc->context = s;
++    lc->jb0 = NULL;
++    lc->lc_n = n;
++    lc->bt_terminate = 0;
++    lc->bt_psem_out = NULL;
++    sem_init(&lc->bt_sem_in, 0, 0);
++}
++
++#define TRACE_WPP 0
++#if RPI_EXTRA_BIT_THREADS > 0
++static inline unsigned int line_ts_width(const HEVCRpiContext * const s, unsigned int ts)
++{
++    unsigned int rs = s->ps.pps->ctb_addr_ts_to_rs[ts];
++    return s->ps.pps->column_width[s->ps.pps->col_idxX[rs % s->ps.sps->ctb_width]];
++}
++
++// Move local context parameters from an aux bit thread back to the main
++// thread at the end of a slice as processing is going to continue there.
++static void movlc(HEVCRpiLocalContext *const dst_lc, HEVCRpiLocalContext *const src_lc, const int is_dep)
++{
++    if (src_lc == dst_lc) {
++        return;
++    }
++
++    // Move the job
++    // We will still have an active job if the final line terminates early
++    // Dest should always be null by now
++    av_assert1(dst_lc->jb0 == NULL);
++    dst_lc->jb0 = src_lc->jb0;
++    src_lc->jb0 = NULL;
++
++    // Always need to store where we are in the bitstream
++    dst_lc->ts = src_lc->ts;
++    dst_lc->gb = src_lc->gb;
++    // Cabac init request will be built at start of next slice
++
++    // Need to store context if we might have a dependent seg
++    if (is_dep)
++    {
++        dst_lc->qPy_pred = src_lc->qPy_pred;
++        memcpy(dst_lc->ipm_left, src_lc->ipm_left, sizeof(src_lc->ipm_left));
++        memcpy(dst_lc->cabac_state, src_lc->cabac_state, sizeof(src_lc->cabac_state));
++        memcpy(dst_lc->stat_coeff, src_lc->stat_coeff, sizeof(src_lc->stat_coeff));
++    }
++}
++
++static inline int wait_bt_sem_in(HEVCRpiLocalContext * const lc)
++{
++    rpi_sem_wait(&lc->bt_sem_in);
++    return lc->bt_terminate;
++}
++
++// Do one WPP line
++// Will not work correctly over horizontal tile boundries - vertical should be OK
++static int rpi_run_one_line(HEVCRpiContext *const s, HEVCRpiLocalContext * const lc, const int is_first)
++{
++    const int is_tile = lc->bt_is_tile;
++    const unsigned int tile_id = s->ps.pps->tile_id[lc->ts];
++    const unsigned int line = lc->bt_line_no;
++    const unsigned int line_inc = lc->bt_line_inc;
++    const int is_last = (line >= lc->bt_last_line);
++
++    const unsigned int ts_eol = lc->ts + (is_tile ? s->ps.pps->tile_size[tile_id] : lc->bt_line_width);
++    const unsigned int ts_next =
++        line + line_inc > (unsigned int)s->sh.num_entry_point_offsets ?
++            INT_MAX :
++        is_tile ?
++            s->ps.pps->tile_pos_ts[tile_id + line_inc] :
++            lc->ts + lc->bt_line_width * line_inc;
++    // Tile wants line, WPP a few CTUs (must be >= 2 for cabac context to work)
++    const unsigned int partial_size = is_tile ? line_ts_width(s, lc->ts) : 2;
++    unsigned int ts_prev;
++    int loop_n = 0;
++    int err = 0;
++
++    av_assert1(line <= s->sh.num_entry_point_offsets);
++
++#if TRACE_WPP
++    printf("%s[%d]: Start %s: tile=%d, line=%d/%d/%d, ts=%d/%d/%d, width=%d, jb=%p\n", __func__,
++           lc->lc_n,  is_tile ? "Tile" : "WPP", tile_id,
++           line, lc->bt_last_line, s->sh.num_entry_point_offsets,
++           lc->ts, ts_eol, ts_next, partial_size, lc->jb0);
++#endif
++    if (line != 0)
++    {
++        const uint8_t * const data = s->data + s->sh.offset[line - 1];
++        const unsigned int len = s->sh.size[line - 1];
++        if ((err = init_get_bits8(&lc->gb, data, len)) < 0)
++            return err;
++
++        ff_init_cabac_decoder(&lc->cc, data, len);
++    }
++
++    // We should never be processing a dependent slice here so reset is good
++    // ?? These probably shouldn't be needed (as they should be set by later
++    //    logic) but do seem to be required
++    lc->qp_y = s->sh.slice_qp;
++
++    do
++    {
++        if (!is_last && loop_n > 1) {
++#if TRACE_WPP
++            printf("%s[%d]: %sPoke %p\n", __func__, lc->lc_n, err == 0 ? "" : "ERR: ", lc->bt_psem_out);
++#endif
++            sem_post(lc->bt_psem_out);
++        }
++        // The wait for loop_n == 0 has been done in bit_thread
++        if (!is_first && loop_n != 0)
++        {
++#if TRACE_WPP
++            printf("%s[%d]: %sWait %p\n", __func__, lc->lc_n, err == 0 ? "" : "ERR: ", &lc->bt_sem_in);
++#endif
++            if (wait_bt_sem_in(lc) != 0)
++                return AVERROR_EXIT;
++        }
++
++#if TRACE_WPP
++        {
++            int n;
++            sem_getvalue(&lc->bt_sem_in, &n);
++            printf("%s[%d]: ts=%d, sem=%d %p\n", __func__, lc->lc_n, lc->ts, n, &lc->bt_sem_in);
++        }
++#endif
++
++        ts_prev = lc->ts;
++
++        // If we have had an error - do no further decode but do continue
++        // moving signals around so the other threads continue to operate
++        // correctly (or at least as correctly as they can with this line missing)
++        //
++        // Errors in WPP/Tile are less fatal than normal as we have a good idea
++        // of how to restart on the next line so there is no need to give up totally
++        if (err != 0)
++        {
++            lc->unit_done = 0;
++            lc->ts += partial_size;
++        }
++        else
++        {
++            worker_pass0_ready(s, lc);
++
++            if ((err = fill_job(s, lc, partial_size)) < 0 ||
++                (lc->ts < ts_eol && !is_last && (lc->ts != ts_prev + partial_size || lc->unit_done)))
++            {
++                if (err == 0) {
++                    av_log(s->avctx, AV_LOG_ERROR, "Unexpected end of tile/wpp section\n");
++                    err = AVERROR_INVALIDDATA;
++                }
++                worker_free(s, lc);
++                lc->ts = ts_prev + partial_size;  // Pretend we did all that
++                lc->unit_done = 0;
++            }
++            else if (is_tile)
++            {
++                worker_submit_job(s, lc);
++            }
++        }
++
++        ++loop_n;
++    } while (lc->ts < ts_eol && !lc->unit_done);
++
++    // If we are on the last line & we didn't get a whole line we must wait for
++    // and sink the sem_posts from the line above / tile to the left.
++    while ((ts_prev += partial_size) < ts_eol)
++    {
++#if TRACE_WPP
++        printf("%s[%d]: EOL Wait: ts=%d %p\n", __func__, lc->lc_n, ts_prev, &lc->bt_sem_in);
++#endif
++        if (wait_bt_sem_in(lc) != 0)
++            return AVERROR_EXIT;
++    }
++
++    lc->bt_line_no += line_inc;
++
++    if (!is_tile && err == 0)
++        worker_submit_job(s, lc);
++
++    if (!is_last) {
++        lc->ts = ts_next;
++
++#if TRACE_WPP
++        printf("%s[%d]: Poke post submit %p\n", __func__, lc->lc_n, lc->bt_psem_out);
++#endif
++        sem_post(lc->bt_psem_out);
++        if (loop_n > 1) {
++#if TRACE_WPP
++            printf("%s[%d]: Poke post submit2 %p\n", __func__, lc->lc_n, lc->bt_psem_out);
++#endif
++            sem_post(lc->bt_psem_out);
++        }
++    }
++    else
++    {
++        movlc(s->HEVClcList[0], lc, s->ps.pps->dependent_slice_segments_enabled_flag);  // * & not EoT
++#if MVF_STASH_WIDTH > 64
++        // Horrid calculations to work out what we want but luckily this should almost never execute
++        // **** Move to movlc
++        if (!s->is_irap)
++        {
++            const unsigned int ctb_flags = s->ps.pps->ctb_ts_flags[lc->ts];
++            if ((ctb_flags & CTB_TS_FLAGS_EOTL) == 0) // If EOTL then we have already stashed mvf
++            {
++                const unsigned int x_ctb = ((s->ps.pps->ctb_addr_ts_to_rs[lc->ts] % s->ps.sps->ctb_width) << s->ps.sps->log2_ctb_size) - 1;
++                unsigned int i;
++                const HEVCRpiMvField *s_mvf = lc->mvf_stash + ((x_ctb >> LOG2_MIN_PU_SIZE) & (MVF_STASH_WIDTH_PU - 1));
++                HEVCRpiMvField *d_mvf = s->HEVClcList[0]->mvf_stash + ((x_ctb >> LOG2_MIN_PU_SIZE) & (MVF_STASH_WIDTH_PU - 1));
++
++                for (i = 0; i != MVF_STASH_HEIGHT_PU; ++i)
++                {
++                    *d_mvf = *s_mvf;
++                    d_mvf += MVF_STASH_WIDTH_PU;
++                    s_mvf += MVF_STASH_WIDTH_PU;
++                }
++
++            }
++        }
++#endif
++        // When all done poke the thread 0 sem_in one final time
++#if TRACE_WPP
++        printf("%s[%d]: Poke final %p\n", __func__, lc->lc_n, &s->HEVClcList[0]->bt_sem_in);
++#endif
++        sem_post(&s->HEVClcList[0]->bt_sem_in);
++    }
++
++#if TRACE_WPP
++    printf("%s[%d]: End. dep=%d\n", __func__, lc->lc_n, s->ps.pps->dependent_slice_segments_enabled_flag);
++#endif
++    return err;
++}
++
++static void wpp_setup_lcs(HEVCRpiContext * const s)
++{
++    unsigned int ts = s->ps.pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
++    const unsigned int line_width = line_ts_width(s, ts);
++
++    for (int i = 0; i <= s->sh.num_entry_point_offsets && i < RPI_BIT_THREADS; ++i)
++    {
++        HEVCRpiLocalContext * const lc = s->HEVClcList[i];
++        lc->ts = ts;
++        lc->bt_is_tile = 0;
++        lc->bt_line_no = i;
++        lc->bt_line_width = line_width;
++        lc->bt_last_line = s->sh.num_entry_point_offsets;
++        lc->bt_line_inc = RPI_BIT_THREADS;
++        ts += line_width;
++    }
++}
++
++
++// Can only process tile single row at once
++static void tile_one_row_setup_lcs(HEVCRpiContext * const s, unsigned int slice_row)
++{
++    const HEVCRpiPPS * const pps = s->ps.pps;
++    const unsigned int ts0 = pps->ctb_addr_rs_to_ts[s->sh.slice_segment_addr];
++    const unsigned int tile0 = pps->tile_id[ts0];
++    const unsigned int col0 = tile0 % pps->num_tile_columns;
++
++    const unsigned int col = (slice_row == 0) ? col0 : 0;
++    unsigned int line = slice_row * pps->num_tile_columns - col0 + col;
++    const unsigned int last_line = FFMIN(
++        line + pps->num_tile_columns - 1 - col, s->sh.num_entry_point_offsets);
++
++    const unsigned int par =
++        FFMIN(RPI_BIT_THREADS, last_line + 1 - line);
++#if TRACE_WPP
++    printf("ts0=%d, ents=%d, row=%d, tiles=%dx%d, col=%d, par=%d, line=%d/%d\n", ts0, s->sh.num_entry_point_offsets, slice_row,
++           pps->num_tile_columns, pps->num_tile_rows, col, par, line, last_line);
++#endif
++    for (unsigned int i = 0; i != par; ++i, ++line)
++    {
++        HEVCRpiLocalContext * const lc = s->HEVClcList[i];
++        const unsigned int tile = tile0 + line;
++
++        lc->ts = pps->tile_pos_ts[tile];
++        lc->bt_line_no = line;
++        lc->bt_is_tile = 1;
++        lc->bt_line_width = line_ts_width(s, lc->ts);
++        lc->bt_last_line = last_line;
++        lc->bt_line_inc = par;
++    }
++}
++
++
++static void * bit_thread(void * v)
++{
++    HEVCRpiLocalContext * const lc = v;
++    HEVCRpiContext *const s = lc->context;
++
++    while (wait_bt_sem_in(lc) == 0)
++    {
++        int err;
++
++        if ((err = rpi_run_one_line(s, lc, 0)) < 0) {  // Never first tile/wpp
++            if (lc->bt_terminate) {
++                av_log(s->avctx, AV_LOG_ERROR, "%s: Unexpected termination\n", __func__);
++                break;
++            }
++            av_log(s->avctx, AV_LOG_WARNING, "%s: Decode failure: %d\n", __func__, err);
++        }
++    }
++
++    return NULL;
++}
++
++static int bit_threads_start(HEVCRpiContext * const s)
++{
++    if (s->bt_started)
++        return 0;
++
++    for (int i = 1; i < RPI_BIT_THREADS; ++i)
++    {
++        // lc[0] belongs to the main thread - this sets up lc[1..RPI_BIT_THREADS]
++        if (s->HEVClcList[i] == NULL) {
++            if ((s->HEVClcList[i] = av_mallocz(sizeof(*s->HEVClcList[0]))) == NULL)
++                return -1;
++        }
++
++        bt_lc_init(s, s->HEVClcList[i], i);
++        job_lc_init(s->HEVClcList[i]);
++    }
++
++    // Link the sems in a circle
++    for (int i = 0; i < RPI_BIT_THREADS - 1; ++i)
++        s->HEVClcList[i]->bt_psem_out = &s->HEVClcList[i + 1]->bt_sem_in;
++    s->HEVClcList[RPI_BIT_THREADS - 1]->bt_psem_out = &s->HEVClcList[0]->bt_sem_in;
++
++    // Init all lc before starting any threads
++    for (int i = 0; i < RPI_EXTRA_BIT_THREADS; ++i)
++    {
++        if (pthread_create(s->bit_threads + i, NULL, bit_thread, s->HEVClcList[i + 1]) < 0)
++            return -1;
++    }
++
++    s->bt_started = 1;
++    return 0;
++}
++
++static int bit_threads_kill(HEVCRpiContext * const s)
++{
++    if (!s->bt_started)
++        return 0;
++    s->bt_started = 0;
++
++    for (int i = 0; i < RPI_EXTRA_BIT_THREADS; ++i)
++    {
++        HEVCRpiLocalContext *const lc = s->HEVClcList[i + 1];
++        if (lc == NULL)
++            break;
++
++        lc->bt_terminate = 1;
++        sem_post(&lc->bt_sem_in);
++        pthread_join(s->bit_threads[i], NULL);
++
++        sem_destroy(&lc->bt_sem_in);
++        job_lc_kill(lc);
++    }
++    return 0;
++}
++#endif
++
++
++// If we are at EoT and the row is shorter than the number of jobs
++// we can Q we have to wait for it finish otherwise we risk cache/QPU
++// disasters
++static inline int tile_needs_wait(const HEVCRpiContext * const s, const int n)
++{
++    return
++        s->ps.pps->tile_wpp_inter_disable >= 2 &&
++        s->sh.slice_type != HEVC_SLICE_I &&
++        n >= 0 &&
++        (s->ps.pps->ctb_ts_flags[n] & (CTB_TS_FLAGS_EOT | CTB_TS_FLAGS_EOL)) == CTB_TS_FLAGS_EOT;
++}
++
++static int rpi_decode_entry(AVCodecContext *avctxt, void *isFilterThread)
++{
++    HEVCRpiContext * const s  = avctxt->priv_data;
++    HEVCRpiLocalContext * const lc = s->HEVClc;
++    int err;
++
++    // Start of slice
++    if ((err = slice_start(s, lc)) != 0)
++        return err;
++
++#if RPI_EXTRA_BIT_THREADS > 0
++
++    if (s->sh.offload_tiles)
++    {
++        unsigned int slice_row = 0;
++
++#if TRACE_WPP
++        printf("%s: Do Tiles\n", __func__);
++#endif
++        // Generate & start extra bit threads if they aren't already running
++        bit_threads_start(s);
++
++        do
++        {
++            // Reset lc lines etc.
++            tile_one_row_setup_lcs(s, slice_row);
++
++#if TRACE_WPP
++            printf("%s: Row %d: Do 1st: line=%d/%d/%d\n",
++                   __func__, slice_row, lc->bt_line_no, lc->bt_last_line, s->sh.num_entry_point_offsets);
++#endif
++
++            rpi_run_one_line(s, lc, 1);  // Kicks off the other threads
++#if TRACE_WPP
++            printf("%s: Row %d: Done 1st: line=%d/%d/%d\n",
++                   __func__, slice_row, lc->bt_line_no, lc->bt_last_line, s->sh.num_entry_point_offsets);
++#endif
++
++            while (lc->bt_line_no <= lc->bt_last_line) {
++                rpi_sem_wait(&lc->bt_sem_in);
++                rpi_run_one_line(s, lc, 0);
++            }
++#if TRACE_WPP
++            printf("%s: Done body\n", __func__);
++#endif
++
++            // Wait for everything else to finish
++            rpi_sem_wait(&lc->bt_sem_in);
++
++            ++slice_row;
++        } while (lc->bt_last_line < s->sh.num_entry_point_offsets);
++
++
++#if TRACE_WPP
++        printf("%s: Done wait: ts=%d\n", __func__, lc->ts);
++#endif
++    }
++    else if (s->sh.offload_wpp)
++    {
++#if TRACE_WPP
++        printf("%s: Do WPP\n", __func__);
++#endif
++        // Generate & start extra bit threads if they aren't already running
++        bit_threads_start(s);
++
++        // Reset lc lines etc.
++        wpp_setup_lcs(s);
++
++        rpi_run_one_line(s, lc, 1);  // Kicks off the other threads
++#if TRACE_WPP
++        printf("%s: Done 1st\n", __func__);
++#endif
++
++        while (lc->bt_line_no <= s->sh.num_entry_point_offsets) {
++            rpi_sem_wait(&lc->bt_sem_in);
++            rpi_run_one_line(s, lc, 0);
++        }
++#if TRACE_WPP
++        printf("%s: Done body\n", __func__);
++#endif
++
++        // Wait for everything else to finish
++        rpi_sem_wait(&lc->bt_sem_in);
++
++#if TRACE_WPP
++        printf("%s: Done wait: ts=%d\n", __func__, lc->ts);
++#endif
++    }
++    else
++#endif
++    {
++#if TRACE_WPP
++        printf("%s: Single start: ts=%d\n", __func__, lc->ts);
++#endif
++        // Single bit thread
++        do {
++            // Make sure we have space to prepare the next job
++            worker_pass0_ready(s, lc);
++
++            if ((err = fill_job(s, lc, 0)) < 0)
++                goto fail;
++
++            worker_submit_job(s, lc);
++
++            if (tile_needs_wait(s, lc->ts - 1))
++                worker_wait(s, lc);
++
++        } while (!lc->unit_done);
++
++#if TRACE_WPP
++        printf("%s: Single end: ts=%d\n", __func__, lc->ts);
++#endif
++    }
++
++    // If we have reached the end of the frame or
++    // then wait for the worker to finish all its jobs
++    if (lc->ts >= s->ps.sps->ctb_size)
++        worker_wait(s, lc);
++
++#if RPI_TSTATS
++    {
++        HEVCRpiStats *const ts = &s->tstats;
++
++        printf("=== P: xy00:%5d/%5d/%5d/%5d h16gl:%5d/%5d w8gl:%5d/%5d y8m:%d\n    B: xy00:%5d/%5d/%5d/%5d h16gl:%5d/%5d\n",
++               ts->y_pred1_xy, ts->y_pred1_x0, ts->y_pred1_y0, ts->y_pred1_x0y0,
++               ts->y_pred1_hgt16, ts->y_pred1_hle16, ts->y_pred1_wgt8, ts->y_pred1_wle8, ts->y_pred1_y8_merge,
++               ts->y_pred2_xy, ts->y_pred2_x0, ts->y_pred2_y0, ts->y_pred2_x0y0,
++               ts->y_pred2_hgt16, ts->y_pred2_hle16);
++        memset(ts, 0, sizeof(*ts));
++    }
++#endif
++
++    return lc->ts;
++
++fail:
++    // Cleanup
++    av_log(s->avctx, AV_LOG_ERROR, "%s failed: err=%d\n", __func__, err);
++    // Free our job & wait for temination
++    worker_free(s, lc);
++    worker_wait(s, lc);
++    return err;
++}
++
++
++static void set_no_backward_pred(HEVCRpiContext * const s)
++{
++    int i, j;
++    const RefPicList *const refPicList = s->refPicList;
++
++    s->no_backward_pred_flag = 0;
++    if (s->sh.slice_type != HEVC_SLICE_B || !s->sh.slice_temporal_mvp_enabled_flag)
++        return;
++
++    for (j = 0; j < 2; j++) {
++        for (i = 0; i < refPicList[j].nb_refs; i++) {
++            if (refPicList[j].list[i] > s->poc) {
++                s->no_backward_pred_flag = 1;
++                return;
++            }
++        }
++    }
++}
++
++static int hls_slice_data(HEVCRpiContext * const s, const H2645NAL * const nal)
++{
++    int err;
++    if ((err = gen_entry_points(s, nal)) < 0)
++        return err;
++
++    set_no_backward_pred(s);
++
++    return rpi_decode_entry(s->avctx, NULL);
++}
++
++static int set_side_data(HEVCRpiContext *s)
++{
++    AVFrame *out = s->ref->frame;
++
++    if (s->sei.frame_packing.present &&
++        s->sei.frame_packing.arrangement_type >= 3 &&
++        s->sei.frame_packing.arrangement_type <= 5 &&
++        s->sei.frame_packing.content_interpretation_type > 0 &&
++        s->sei.frame_packing.content_interpretation_type < 3) {
++        AVStereo3D *stereo = av_stereo3d_create_side_data(out);
++        if (!stereo)
++            return AVERROR(ENOMEM);
++
++        switch (s->sei.frame_packing.arrangement_type) {
++        case 3:
++            if (s->sei.frame_packing.quincunx_subsampling)
++                stereo->type = AV_STEREO3D_SIDEBYSIDE_QUINCUNX;
++            else
++                stereo->type = AV_STEREO3D_SIDEBYSIDE;
++            break;
++        case 4:
++            stereo->type = AV_STEREO3D_TOPBOTTOM;
++            break;
++        case 5:
++            stereo->type = AV_STEREO3D_FRAMESEQUENCE;
++            break;
++        }
++
++        if (s->sei.frame_packing.content_interpretation_type == 2)
++            stereo->flags = AV_STEREO3D_FLAG_INVERT;
++
++        if (s->sei.frame_packing.arrangement_type == 5) {
++            if (s->sei.frame_packing.current_frame_is_frame0_flag)
++                stereo->view = AV_STEREO3D_VIEW_LEFT;
++            else
++                stereo->view = AV_STEREO3D_VIEW_RIGHT;
++        }
++    }
++
++    if (s->sei.display_orientation.present &&
++        (s->sei.display_orientation.anticlockwise_rotation ||
++         s->sei.display_orientation.hflip || s->sei.display_orientation.vflip)) {
++        double angle = s->sei.display_orientation.anticlockwise_rotation * 360 / (double) (1 << 16);
++        AVFrameSideData *rotation = av_frame_new_side_data(out,
++                                                           AV_FRAME_DATA_DISPLAYMATRIX,
++                                                           sizeof(int32_t) * 9);
++        if (!rotation)
++            return AVERROR(ENOMEM);
++
++        av_display_rotation_set((int32_t *)rotation->data, angle);
++        av_display_matrix_flip((int32_t *)rotation->data,
++                               s->sei.display_orientation.hflip,
++                               s->sei.display_orientation.vflip);
++    }
++
++    // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
++    // so the side data persists for the entire coded video sequence.
++    if (s->sei.mastering_display.present > 0 &&
++        IS_IRAP(s) && s->no_rasl_output_flag) {
++        s->sei.mastering_display.present--;
++    }
++    if (s->sei.mastering_display.present) {
++        // HEVC uses a g,b,r ordering, which we convert to a more natural r,g,b
++        const int mapping[3] = {2, 0, 1};
++        const int chroma_den = 50000;
++        const int luma_den = 10000;
++        int i;
++        AVMasteringDisplayMetadata *metadata =
++            av_mastering_display_metadata_create_side_data(out);
++        if (!metadata)
++            return AVERROR(ENOMEM);
++
++        for (i = 0; i < 3; i++) {
++            const int j = mapping[i];
++            metadata->display_primaries[i][0].num = s->sei.mastering_display.display_primaries[j][0];
++            metadata->display_primaries[i][0].den = chroma_den;
++            metadata->display_primaries[i][1].num = s->sei.mastering_display.display_primaries[j][1];
++            metadata->display_primaries[i][1].den = chroma_den;
++        }
++        metadata->white_point[0].num = s->sei.mastering_display.white_point[0];
++        metadata->white_point[0].den = chroma_den;
++        metadata->white_point[1].num = s->sei.mastering_display.white_point[1];
++        metadata->white_point[1].den = chroma_den;
++
++        metadata->max_luminance.num = s->sei.mastering_display.max_luminance;
++        metadata->max_luminance.den = luma_den;
++        metadata->min_luminance.num = s->sei.mastering_display.min_luminance;
++        metadata->min_luminance.den = luma_den;
++        metadata->has_luminance = 1;
++        metadata->has_primaries = 1;
++
++        av_log(s->avctx, AV_LOG_DEBUG, "Mastering Display Metadata:\n");
++        av_log(s->avctx, AV_LOG_DEBUG,
++               "r(%5.4f,%5.4f) g(%5.4f,%5.4f) b(%5.4f %5.4f) wp(%5.4f, %5.4f)\n",
++               av_q2d(metadata->display_primaries[0][0]),
++               av_q2d(metadata->display_primaries[0][1]),
++               av_q2d(metadata->display_primaries[1][0]),
++               av_q2d(metadata->display_primaries[1][1]),
++               av_q2d(metadata->display_primaries[2][0]),
++               av_q2d(metadata->display_primaries[2][1]),
++               av_q2d(metadata->white_point[0]), av_q2d(metadata->white_point[1]));
++        av_log(s->avctx, AV_LOG_DEBUG,
++               "min_luminance=%f, max_luminance=%f\n",
++               av_q2d(metadata->min_luminance), av_q2d(metadata->max_luminance));
++    }
++    // Decrement the mastering display flag when IRAP frame has no_rasl_output_flag=1
++    // so the side data persists for the entire coded video sequence.
++    if (s->sei.content_light.present > 0 &&
++        IS_IRAP(s) && s->no_rasl_output_flag) {
++        s->sei.content_light.present--;
++    }
++    if (s->sei.content_light.present) {
++        AVContentLightMetadata *metadata =
++            av_content_light_metadata_create_side_data(out);
++        if (!metadata)
++            return AVERROR(ENOMEM);
++        metadata->MaxCLL  = s->sei.content_light.max_content_light_level;
++        metadata->MaxFALL = s->sei.content_light.max_pic_average_light_level;
++
++        av_log(s->avctx, AV_LOG_DEBUG, "Content Light Level Metadata:\n");
++        av_log(s->avctx, AV_LOG_DEBUG, "MaxCLL=%d, MaxFALL=%d\n",
++               metadata->MaxCLL, metadata->MaxFALL);
++    }
++
++    if (s->sei.a53_caption.a53_caption) {
++        AVFrameSideData* sd = av_frame_new_side_data(out,
++                                                     AV_FRAME_DATA_A53_CC,
++                                                     s->sei.a53_caption.a53_caption_size);
++        if (sd)
++            memcpy(sd->data, s->sei.a53_caption.a53_caption, s->sei.a53_caption.a53_caption_size);
++        av_freep(&s->sei.a53_caption.a53_caption);
++        s->sei.a53_caption.a53_caption_size = 0;
++        s->avctx->properties |= FF_CODEC_PROPERTY_CLOSED_CAPTIONS;
++    }
++
++    if (s->sei.alternative_transfer.present &&
++        av_color_transfer_name(s->sei.alternative_transfer.preferred_transfer_characteristics) &&
++        s->sei.alternative_transfer.preferred_transfer_characteristics != AVCOL_TRC_UNSPECIFIED) {
++        s->avctx->color_trc = out->color_trc = s->sei.alternative_transfer.preferred_transfer_characteristics;
++    }
++
++    return 0;
++}
++
++static int hevc_frame_start(HEVCRpiContext * const s)
++{
++    int ret;
++
++    memset(s->bs_horizontal, 0, s->bs_size * 2);  // Does V too
++    memset(s->is_pcm,        0, s->ps.sps->pcm_width * s->ps.sps->pcm_height);
++    memset(s->tab_slice_address, -1, s->ps.sps->ctb_size * sizeof(*s->tab_slice_address));
++
++    // Only need to remember intra for CIP
++    if (!s->ps.pps->constrained_intra_pred_flag || s->is_irap)
++        s->is_intra = NULL;
++    else
++    {
++        s->is_intra = s->is_intra_store;
++        memset(s->is_intra, 0, s->ps.sps->pcm_width * s->ps.sps->pcm_height);
++    }
++
++    s->is_decoded        = 0;
++    s->first_nal_type    = s->nal_unit_type;
++
++    s->no_rasl_output_flag = IS_IDR(s) || IS_BLA(s) || (s->nal_unit_type == HEVC_NAL_CRA_NUT && s->last_eos);
++
++    if (s->pkt.nb_nals > s->rpl_tab_size)
++    {
++        // In most cases it will be faster to free & realloc as that doesn't
++        // require (an unwanted) copy
++        av_freep(&s->rpl_tab);
++        s->rpl_tab_size = 0;
++        if ((s->rpl_tab = av_malloc(s->pkt.nb_nals * sizeof(*s->rpl_tab))) == NULL)
++            goto fail;
++        s->rpl_tab_size = s->pkt.nb_nals;
++    }
++    memset(s->rpl_tab, 0, s->pkt.nb_nals * sizeof(*s->rpl_tab));
++
++    ret = ff_hevc_rpi_set_new_ref(s, &s->frame, s->poc);
++    if (ret < 0)
++        goto fail;
++
++    // Resize rpl_tab to max that we might want
++    ret = ff_hevc_rpi_frame_rps(s);
++    if (ret < 0) {
++        av_log(s->avctx, AV_LOG_ERROR, "Error constructing the frame RPS.\n");
++        goto fail;
++    }
++
++    s->ref->frame->key_frame = IS_IRAP(s);
++
++    ret = set_side_data(s);
++    if (ret < 0)
++        goto fail;
++
++    s->frame->pict_type = 3 - s->sh.slice_type;
++
++    if (!IS_IRAP(s))
++        ff_hevc_rpi_bump_frame(s);
++
++    av_frame_unref(s->output_frame);
++    ret = ff_hevc_rpi_output_frame(s, s->output_frame, 0);
++    if (ret < 0)
++        goto fail;
++
++    ff_thread_finish_setup(s->avctx);
++
++    return 0;
++
++fail:
++    if (s->ref)
++        ff_hevc_rpi_unref_frame(s, s->ref, ~0);
++    s->ref = NULL;
++    return ret;
++}
++
++static inline int is_non_ref_unit_type(const unsigned int nal_unit_type)
++{
++    // From Table 7-1
++    return (nal_unit_type & ~0xe) == 0;  // True for 0, 2, 4, 6, 8, 10, 12, 14
++}
++
++static int decode_nal_unit(HEVCRpiContext *s, const H2645NAL *nal)
++{
++    GetBitContext * const gb    = &s->HEVClc->gb;
++    int ctb_addr_ts, ret;
++
++    *gb              = nal->gb;
++    s->nal_unit_type = nal->type;
++    s->temporal_id   = nal->temporal_id;
++
++    switch (s->nal_unit_type) {
++    case HEVC_NAL_VPS:
++        ret = ff_hevc_rpi_decode_nal_vps(gb, s->avctx, &s->ps);
++        if (ret < 0)
++            goto fail;
++        break;
++    case HEVC_NAL_SPS:
++        ret = ff_hevc_rpi_decode_nal_sps(gb, s->avctx, &s->ps,
++                                     s->apply_defdispwin);
++        if (ret < 0)
++            goto fail;
++        break;
++    case HEVC_NAL_PPS:
++        ret = ff_hevc_rpi_decode_nal_pps(gb, s->avctx, &s->ps);
++        if (ret < 0)
++            goto fail;
++        break;
++    case HEVC_NAL_SEI_PREFIX:
++    case HEVC_NAL_SEI_SUFFIX:
++        ret = ff_hevc_rpi_decode_nal_sei(gb, s->avctx, &s->sei, &s->ps, s->nal_unit_type);
++        if (ret < 0)
++            goto fail;
++        break;
++    case HEVC_NAL_TRAIL_R:
++    case HEVC_NAL_TRAIL_N:
++    case HEVC_NAL_TSA_N:
++    case HEVC_NAL_TSA_R:
++    case HEVC_NAL_STSA_N:
++    case HEVC_NAL_STSA_R:
++    case HEVC_NAL_BLA_W_LP:
++    case HEVC_NAL_BLA_W_RADL:
++    case HEVC_NAL_BLA_N_LP:
++    case HEVC_NAL_IDR_W_RADL:
++    case HEVC_NAL_IDR_N_LP:
++    case HEVC_NAL_CRA_NUT:
++    case HEVC_NAL_RADL_N:
++    case HEVC_NAL_RADL_R:
++    case HEVC_NAL_RASL_N:
++    case HEVC_NAL_RASL_R:
++        ret = hls_slice_header(s);
++        if (ret < 0)
++            return ret;
++
++        // The definition of _N unit types is "non-reference for other frames
++        // with the same temporal_id" so they may/will be ref frames for pics
++        // with a higher temporal_id.
++        s->used_for_ref = s->ps.sps->max_sub_layers > s->temporal_id + 1 ||
++            !is_non_ref_unit_type(s->nal_unit_type);
++        s->offload_recon = s->threads_type != 0 && s->used_for_ref;
++        s->is_irap = IS_IRAP(s);
++
++#if DEBUG_DECODE_N
++        {
++            static int z = 0;
++            if (IS_IDR(s)) {
++                z = 1;
++            }
++            if (z != 0 && z++ > DEBUG_DECODE_N) {
++                s->is_decoded = 0;
++                break;
++            }
++        }
++#endif
++        if (
++            (s->avctx->skip_frame >= AVDISCARD_NONREF && !s->used_for_ref) ||
++            (s->avctx->skip_frame >= AVDISCARD_BIDIR && s->sh.slice_type == HEVC_SLICE_B) ||
++            (s->avctx->skip_frame >= AVDISCARD_NONINTRA && s->sh.slice_type != HEVC_SLICE_I) ||
++            (s->avctx->skip_frame >= AVDISCARD_NONKEY && !IS_IRAP(s)))
++        {
++            s->is_decoded = 0;
++            break;
++        }
++
++        if (s->sh.first_slice_in_pic_flag) {
++            if (s->max_ra == INT_MAX) {
++                if (s->nal_unit_type == HEVC_NAL_CRA_NUT || IS_BLA(s)) {
++                    s->max_ra = s->poc;
++                } else {
++                    if (IS_IDR(s))
++                        s->max_ra = INT_MIN;
++                }
++            }
++
++            if ((s->nal_unit_type == HEVC_NAL_RASL_R || s->nal_unit_type == HEVC_NAL_RASL_N) &&
++                s->poc <= s->max_ra) {
++                s->is_decoded = 0;
++                break;
++            } else {
++                if (s->nal_unit_type == HEVC_NAL_RASL_R && s->poc > s->max_ra)
++                    s->max_ra = INT_MIN;
++            }
++
++            ret = hevc_frame_start(s);
++            if (ret < 0)
++                return ret;
++        } else if (!s->ref) {
++            av_log(s->avctx, AV_LOG_ERROR, "First slice in a frame missing.\n");
++            goto fail;
++        }
++
++        if (s->nal_unit_type != s->first_nal_type) {
++            av_log(s->avctx, AV_LOG_ERROR,
++                   "Non-matching NAL types of the VCL NALUs: %d %d\n",
++                   s->first_nal_type, s->nal_unit_type);
++            return AVERROR_INVALIDDATA;
++        }
++
++        if (!s->sh.dependent_slice_segment_flag &&
++            s->sh.slice_type != HEVC_SLICE_I) {
++            ret = ff_hevc_rpi_slice_rpl(s);
++            if (ret < 0) {
++                av_log(s->avctx, AV_LOG_WARNING,
++                       "Error constructing the reference lists for the current slice.\n");
++                goto fail;
++            }
++        }
++
++        ctb_addr_ts = hls_slice_data(s, nal);
++        if (ctb_addr_ts >= s->ps.sps->ctb_size) {
++            s->is_decoded = 1;
++        }
++
++        if (ctb_addr_ts < 0) {
++            ret = ctb_addr_ts;
++            goto fail;
++        }
++        break;
++    case HEVC_NAL_EOS_NUT:
++    case HEVC_NAL_EOB_NUT:
++        s->seq_decode = (s->seq_decode + 1) & 0xff;
++        s->max_ra     = INT_MAX;
++        break;
++    case HEVC_NAL_AUD:
++    case HEVC_NAL_FD_NUT:
++        break;
++    default:
++        av_log(s->avctx, AV_LOG_INFO,
++               "Skipping NAL unit %d\n", s->nal_unit_type);
++    }
++
++    return 0;
++fail:
++    if (s->avctx->err_recognition & AV_EF_EXPLODE)
++        return ret;
++    return 0;
++}
++
++static int decode_nal_units(HEVCRpiContext *s, const uint8_t *buf, int length)
++{
++    int i, ret = 0;
++    int eos_at_start = 1;
++
++    s->ref = NULL;
++    s->last_eos = s->eos;
++    s->eos = 0;
++
++    /* split the input packet into NAL units, so we know the upper bound on the
++     * number of slices in the frame */
++    ret = ff_h2645_packet_split(&s->pkt, buf, length, s->avctx, s->is_nalff,
++                                s->nal_length_size, s->avctx->codec_id, 0, 0);
++    if (ret < 0) {
++        av_log(s->avctx, AV_LOG_ERROR,
++               "Error splitting the input into NAL units.\n");
++        return ret;
++    }
++
++    for (i = 0; i < s->pkt.nb_nals; i++) {
++        if (s->pkt.nals[i].type == HEVC_NAL_EOB_NUT ||
++            s->pkt.nals[i].type == HEVC_NAL_EOS_NUT) {
++            if (eos_at_start) {
++                s->last_eos = 1;
++            } else {
++                s->eos = 1;
++            }
++        } else {
++            eos_at_start = 0;
++        }
++    }
++
++    /* decode the NAL units */
++    for (i = 0; i < s->pkt.nb_nals; i++) {
++        ret = decode_nal_unit(s, &s->pkt.nals[i]);
++        if (ret < 0) {
++            av_log(s->avctx, AV_LOG_WARNING,
++                   "Error parsing NAL unit #%d.\n", i);
++            goto fail;
++        }
++    }
++
++fail:  // Also success path
++    if (s->ref != NULL) {
++        if (s->used_for_ref && s->threads_type != 0) {
++            ff_hevc_rpi_progress_signal_all_done(s);
++        }
++        else {
++            // Flush frame to real memory as we expect to be able to pass
++            // it straight on to mmal
++            flush_frame(s, s->frame);
++        }
++    }
++    return ret;
++}
++
++static void print_md5(void *log_ctx, int level, uint8_t md5[16])
++{
++    int i;
++    for (i = 0; i < 16; i++)
++        av_log(log_ctx, level, "%02"PRIx8, md5[i]);
++}
++
++static int verify_md5(HEVCRpiContext *s, AVFrame *frame)
++{
++    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
++    int pixel_shift;
++    int i, j;
++
++    if (!desc)
++        return AVERROR(EINVAL);
++
++    pixel_shift = desc->comp[0].depth > 8;
++
++    av_log(s->avctx, AV_LOG_DEBUG, "Verifying checksum for frame with POC %d: ",
++           s->poc);
++
++    /* the checksums are LE, so we have to byteswap for >8bpp formats
++     * on BE arches */
++#if HAVE_BIGENDIAN
++    if (pixel_shift && !s->checksum_buf) {
++        av_fast_malloc(&s->checksum_buf, &s->checksum_buf_size,
++                       FFMAX3(frame->linesize[0], frame->linesize[1],
++                              frame->linesize[2]));
++        if (!s->checksum_buf)
++            return AVERROR(ENOMEM);
++    }
++#endif
++
++    for (i = 0; frame->data[i]; i++) {
++        int width  = s->avctx->coded_width;
++        int height = s->avctx->coded_height;
++        int w = (i == 1 || i == 2) ? (width  >> desc->log2_chroma_w) : width;
++        int h = (i == 1 || i == 2) ? (height >> desc->log2_chroma_h) : height;
++        uint8_t md5[16];
++
++        av_md5_init(s->md5_ctx);
++        for (j = 0; j < h; j++) {
++            const uint8_t *src = frame->data[i] + j * frame_stride1(frame, 1);
++#if HAVE_BIGENDIAN
++            if (pixel_shift) {
++                s->bdsp.bswap16_buf((uint16_t *) s->checksum_buf,
++                                    (const uint16_t *) src, w);
++                src = s->checksum_buf;
++            }
++#endif
++            av_md5_update(s->md5_ctx, src, w << pixel_shift);
++        }
++        av_md5_final(s->md5_ctx, md5);
++
++        if (!memcmp(md5, s->sei.picture_hash.md5[i], 16)) {
++            av_log   (s->avctx, AV_LOG_DEBUG, "plane %d - correct ", i);
++            print_md5(s->avctx, AV_LOG_DEBUG, md5);
++            av_log   (s->avctx, AV_LOG_DEBUG, "; ");
++        } else {
++            av_log   (s->avctx, AV_LOG_ERROR, "mismatching checksum of plane %d - ", i);
++            print_md5(s->avctx, AV_LOG_ERROR, md5);
++            av_log   (s->avctx, AV_LOG_ERROR, " != ");
++            print_md5(s->avctx, AV_LOG_ERROR, s->sei.picture_hash.md5[i]);
++            av_log   (s->avctx, AV_LOG_ERROR, "\n");
++            return AVERROR_INVALIDDATA;
++        }
++    }
++
++    av_log(s->avctx, AV_LOG_DEBUG, "\n");
++
++    return 0;
++}
++
++static int all_sps_supported(const HEVCRpiContext * const s)
++{
++    for (unsigned int i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
++        if (s->ps.sps_list[i] != NULL)
++        {
++            const HEVCRpiSPS * const sps = (const HEVCRpiSPS*)s->ps.sps_list[i]->data;
++            if (!is_sps_supported(sps))
++                return 0;
++        }
++    }
++    return 1;
++}
++
++static int hevc_rpi_decode_extradata(HEVCRpiContext *s, uint8_t *buf, int length, int first)
++{
++    int ret, i;
++
++    ret = ff_hevc_rpi_decode_extradata(buf, length, &s->ps, &s->sei, &s->is_nalff,
++                                   &s->nal_length_size, s->avctx->err_recognition,
++                                   s->apply_defdispwin, s->avctx);
++    if (ret < 0)
++        return ret;
++
++    /* export stream parameters from the first SPS */
++    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
++        if (first && s->ps.sps_list[i]) {
++            const HEVCRpiSPS *sps = (const HEVCRpiSPS*)s->ps.sps_list[i]->data;
++            export_stream_params(s->avctx, &s->ps, sps);
++            break;
++        }
++    }
++
++    return 0;
++}
++
++static int hevc_rpi_decode_frame(AVCodecContext *avctx, void *data, int *got_output,
++                             AVPacket *avpkt)
++{
++    int ret;
++    int new_extradata_size;
++    uint8_t *new_extradata;
++    HEVCRpiContext *s = avctx->priv_data;
++
++    if (!avpkt->size) {
++        ret = ff_hevc_rpi_output_frame(s, data, 1);
++        if (ret < 0)
++            return ret;
++
++        *got_output = ret;
++        return 0;
++    }
++
++    new_extradata = av_packet_get_side_data(avpkt, AV_PKT_DATA_NEW_EXTRADATA,
++                                            &new_extradata_size);
++    if (new_extradata && new_extradata_size > 0) {
++        ret = hevc_rpi_decode_extradata(s, new_extradata, new_extradata_size, 0);
++        if (ret < 0)
++            return ret;
++    }
++
++    s->ref = NULL;
++    ret    = decode_nal_units(s, avpkt->data, avpkt->size);
++    if (ret < 0)
++        return ret;
++
++    /* verify the SEI checksum */
++    if (avctx->err_recognition & AV_EF_CRCCHECK && s->is_decoded &&
++        s->sei.picture_hash.is_md5) {
++        ret = verify_md5(s, s->ref->frame);
++        if (ret < 0 && avctx->err_recognition & AV_EF_EXPLODE) {
++            ff_hevc_rpi_unref_frame(s, s->ref, ~0);
++            return ret;
++        }
++    }
++    s->sei.picture_hash.is_md5 = 0;
++
++    if (s->is_decoded) {
++        av_log(avctx, AV_LOG_DEBUG, "Decoded frame with POC %d.\n", s->poc);
++        s->is_decoded = 0;
++    }
++
++    if (s->output_frame->buf[0]) {
++        av_frame_move_ref(data, s->output_frame);
++        *got_output = 1;
++    }
++
++    return avpkt->size;
++}
++
++static int hevc_ref_frame(HEVCRpiContext *s, HEVCRpiFrame *dst, HEVCRpiFrame *src)
++{
++    int ret;
++
++    ret = ff_thread_ref_frame(&dst->tf, &src->tf);
++    if (ret < 0)
++        return ret;
++
++    if (src->col_mvf_buf != NULL)
++    {
++        dst->col_mvf_buf = av_buffer_ref(src->col_mvf_buf);
++        if (!dst->col_mvf_buf)
++            goto fail;
++    }
++    dst->col_mvf = src->col_mvf;
++
++    dst->poc        = src->poc;
++    dst->flags      = src->flags;
++    dst->sequence   = src->sequence;
++    return 0;
++
++fail:
++    ff_hevc_rpi_unref_frame(s, dst, ~0);
++    return AVERROR(ENOMEM);
++}
++
++
++static av_cold int hevc_decode_free(AVCodecContext *avctx)
++{
++    HEVCRpiContext * const s = avctx->priv_data;
++    int i;
++
++    pic_arrays_free(s);
++
++    av_freep(&s->md5_ctx);
++
++    av_freep(&s->cabac_save);
++
++#if RPI_EXTRA_BIT_THREADS
++    bit_threads_kill(s);
++#endif
++
++    hevc_exit_worker(s);
++    for (i = 0; i != 2; ++i) {
++        ff_hevc_rpi_progress_kill_state(s->progress_states + i);
++    }
++    job_lc_kill(s->HEVClc);
++
++    av_freep(&s->sao_pixel_buffer_h[0]);  // [1] & [2] allocated with [0]
++    av_freep(&s->sao_pixel_buffer_v[0]);
++    av_frame_free(&s->output_frame);
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        ff_hevc_rpi_unref_frame(s, &s->DPB[i], ~0);
++        av_frame_free(&s->DPB[i].frame);
++    }
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++)
++        av_buffer_unref(&s->ps.vps_list[i]);
++    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++)
++        av_buffer_unref(&s->ps.sps_list[i]);
++    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++)
++        av_buffer_unref(&s->ps.pps_list[i]);
++    s->ps.sps = NULL;
++    s->ps.pps = NULL;
++    s->ps.vps = NULL;
++
++    // Free separately from sLists as used that way by RPI WPP
++    for (i = 0; i < MAX_NB_THREADS && s->HEVClcList[i] != NULL; ++i) {
++        av_freep(s->HEVClcList + i);
++    }
++    s->HEVClc = NULL;  // Allocated as part of HEVClcList
++
++    ff_h2645_packet_uninit(&s->pkt);
++
++    if (s->qpu_init_ok)
++        vpu_qpu_term();
++    s->qpu_init_ok = 0;
++
++    return 0;
++}
++
++
++static av_cold int hevc_init_context(AVCodecContext *avctx)
++{
++    HEVCRpiContext *s = avctx->priv_data;
++    int i;
++
++    s->avctx = avctx;
++
++    s->HEVClc = av_mallocz(sizeof(HEVCRpiLocalContext));
++    if (!s->HEVClc)
++        goto fail;
++    s->HEVClcList[0] = s->HEVClc;
++
++    if (vpu_qpu_init() != 0)
++        goto fail;
++    s->qpu_init_ok = 1;
++
++#if RPI_QPU_EMU_Y || RPI_QPU_EMU_C
++    {
++        static const uint32_t dframe[1] = {0x80808080};
++        s->qpu_dummy_frame_emu = (const uint8_t *)dframe;
++    }
++#endif
++#if !RPI_QPU_EMU_Y || !RPI_QPU_EMU_C
++    s->qpu_dummy_frame_qpu = qpu_dummy();
++#endif
++
++    bt_lc_init(s, s->HEVClc, 0);
++    job_lc_init(s->HEVClc);
++
++    for (i = 0; i != 2; ++i) {
++        ff_hevc_rpi_progress_init_state(s->progress_states + i);
++    }
++
++    if ((s->cabac_save = av_malloc(sizeof(*s->cabac_save))) == NULL)
++        goto fail;
++
++     if ((s->output_frame = av_frame_alloc()) == NULL)
++        goto fail;
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        s->DPB[i].frame = av_frame_alloc();
++        if (!s->DPB[i].frame)
++            goto fail;
++        s->DPB[i].tf.f = s->DPB[i].frame;
++        s->DPB[i].dpb_no = i;
++    }
++
++    s->max_ra = INT_MAX;
++
++    if ((s->md5_ctx = av_md5_alloc()) == NULL)
++        goto fail;
++
++    s->context_initialized = 1;
++    s->eos = 0;
++
++    ff_hevc_rpi_reset_sei(&s->sei);
++
++    return 0;
++
++fail:
++    av_log(s->avctx, AV_LOG_ERROR, "%s: Failed\n", __func__);
++    hevc_decode_free(avctx);
++    return AVERROR(ENOMEM);
++}
++
++#if HAVE_THREADS
++static int hevc_update_thread_context(AVCodecContext *dst,
++                                      const AVCodecContext *src)
++{
++    HEVCRpiContext *s  = dst->priv_data;
++    HEVCRpiContext *s0 = src->priv_data;
++    int i, ret;
++
++    av_assert0(s->context_initialized);
++
++    // dst == src can happen according to the comments and in that case
++    // there is nothing to do here
++    if (dst == src)
++        return 0;
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->DPB); i++) {
++        ff_hevc_rpi_unref_frame(s, &s->DPB[i], ~0);
++        if (s0->DPB[i].frame->buf[0]) {
++            ret = hevc_ref_frame(s, &s->DPB[i], &s0->DPB[i]);
++            if (ret < 0)
++                return ret;
++        }
++    }
++
++    if (s->ps.sps != s0->ps.sps)
++        s->ps.sps = NULL;
++    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.vps_list); i++) {
++        av_buffer_unref(&s->ps.vps_list[i]);
++        if (s0->ps.vps_list[i]) {
++            s->ps.vps_list[i] = av_buffer_ref(s0->ps.vps_list[i]);
++            if (!s->ps.vps_list[i])
++                return AVERROR(ENOMEM);
++        }
++    }
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.sps_list); i++) {
++        av_buffer_unref(&s->ps.sps_list[i]);
++        if (s0->ps.sps_list[i]) {
++            s->ps.sps_list[i] = av_buffer_ref(s0->ps.sps_list[i]);
++            if (!s->ps.sps_list[i])
++                return AVERROR(ENOMEM);
++        }
++    }
++
++    for (i = 0; i < FF_ARRAY_ELEMS(s->ps.pps_list); i++) {
++        av_buffer_unref(&s->ps.pps_list[i]);
++        if (s0->ps.pps_list[i]) {
++            s->ps.pps_list[i] = av_buffer_ref(s0->ps.pps_list[i]);
++            if (!s->ps.pps_list[i])
++                return AVERROR(ENOMEM);
++        }
++    }
++
++    if (s->ps.sps != s0->ps.sps)
++        if ((ret = set_sps(s, s0->ps.sps, src->pix_fmt)) < 0)
++            return ret;
++
++    s->seq_decode = s0->seq_decode;
++    s->seq_output = s0->seq_output;
++    s->pocTid0    = s0->pocTid0;
++    s->max_ra     = s0->max_ra;
++    s->eos        = s0->eos;
++    s->no_rasl_output_flag = s0->no_rasl_output_flag;
++
++    s->is_nalff        = s0->is_nalff;
++    s->nal_length_size = s0->nal_length_size;
++
++    s->threads_type        = s0->threads_type;
++
++    if (s0->eos) {
++        s->seq_decode = (s->seq_decode + 1) & 0xff;
++        s->max_ra = INT_MAX;
++    }
++
++    s->sei.frame_packing        = s0->sei.frame_packing;
++    s->sei.display_orientation  = s0->sei.display_orientation;
++    s->sei.mastering_display    = s0->sei.mastering_display;
++    s->sei.content_light        = s0->sei.content_light;
++    s->sei.alternative_transfer = s0->sei.alternative_transfer;
++
++    // * We do this here as it allows us to easily locate our parents
++    //   global job pool, but there really should be a less nasty way
++    if (s->jbc == NULL)
++    {
++        av_assert0((s->jbc = rpi_job_ctl_new(s0->jbc->jbg)) != NULL);
++        hevc_init_worker(s);
++    }
++
++    return 0;
++}
++#endif
++
++#include <sys/stat.h>
++static int qpu_ok(void)
++{
++    static int is_pi3 = -1;
++    if (is_pi3 == -1)
++    {
++        struct stat sb;
++        is_pi3 = (stat("/dev/rpivid-intcmem", &sb) != 0);
++    }
++    return is_pi3;
++}
++
++static av_cold int hevc_decode_init(AVCodecContext *avctx)
++{
++    HEVCRpiContext *s = avctx->priv_data;
++    int ret;
++
++    if (!qpu_ok())
++        return AVERROR_DECODER_NOT_FOUND;
++
++    if ((ret = hevc_init_context(avctx)) < 0)
++        return ret;
++
++    // If we are a child context then stop now
++    // Everything after this point is either 1st decode setup or global alloc
++    // that must not be repeated
++    // Global info will be copied into children in update_thread_context (we
++    // can't do it here as we have no way of finding the parent context)
++    if (avctx->internal->is_copy)
++        return 0;
++
++    // Job allocation requires VCSM alloc to work so ensure that we have it
++    // initialised by this point
++    {
++        HEVCRpiJobGlobal * const jbg = jbg_new(FFMAX(avctx->thread_count * 3, 5));
++        if (jbg == NULL) {
++            av_log(s->avctx, AV_LOG_ERROR, "%s: Job global init failed\n", __func__);
++            ret = AVERROR(ENOMEM);
++            goto fail;
++        }
++
++        if ((s->jbc = rpi_job_ctl_new(jbg)) == NULL) {
++            av_log(s->avctx, AV_LOG_ERROR, "%s: Job ctl init failed\n", __func__);
++            ret = AVERROR(ENOMEM);
++            goto fail;
++        }
++    }
++
++    hevc_init_worker(s);
++
++    s->eos = 1;
++
++    if (avctx->extradata_size > 0 && avctx->extradata) {
++        if ((ret = hevc_rpi_decode_extradata(s, avctx->extradata, avctx->extradata_size, 1)) < 0)
++            goto fail;
++
++        if (!all_sps_supported(s)) {
++            ret = AVERROR_DECODER_NOT_FOUND;
++            goto fail;
++        }
++    }
++
++    if((avctx->active_thread_type & FF_THREAD_FRAME) && avctx->thread_count > 1)
++        s->threads_type = FF_THREAD_FRAME;
++    else
++        s->threads_type = 0;
++
++    return 0;
++
++fail:
++    hevc_decode_free(avctx);
++    return ret;
++}
++
++static void hevc_decode_flush(AVCodecContext *avctx)
++{
++    HEVCRpiContext *s = avctx->priv_data;
++    ff_hevc_rpi_flush_dpb(s);
++    s->max_ra = INT_MAX;
++    s->eos = 1;
++}
++
++typedef struct  hwaccel_rpi3_qpu_env_s {
++    const AVClass *av_class;
++    AVZcEnvPtr zc;
++} hwaccel_rpi3_qpu_env_t;
++
++static int hwaccel_alloc_frame(AVCodecContext *s, AVFrame *frame)
++{
++    hwaccel_rpi3_qpu_env_t * const r3 = s->internal->hwaccel_priv_data;
++    int rv;
++
++    if (av_rpi_zc_in_use(s))
++    {
++        rv = s->get_buffer2(s, frame, 0);
++    }
++    else
++    {
++        rv = av_rpi_zc_get_buffer(r3->zc, frame);
++        if (rv == 0)
++            rv = av_rpi_zc_resolve_frame(frame, ZC_RESOLVE_ALLOC_VALID);  // actually do the alloc
++    }
++
++    if (rv == 0 &&
++        (rv = ff_attach_decode_data(frame)) < 0)
++    {
++        av_frame_unref(frame);
++    }
++
++    return rv;
++}
++
++static int hwaccel_rpi3_qpu_free(AVCodecContext *avctx)
++{
++    hwaccel_rpi3_qpu_env_t * const r3 = avctx->internal->hwaccel_priv_data;
++    av_rpi_zc_int_env_freep(&r3->zc);
++    return 0;
++}
++
++static int hwaccel_rpi3_qpu_init(AVCodecContext *avctx)
++{
++    hwaccel_rpi3_qpu_env_t * const r3 = avctx->internal->hwaccel_priv_data;
++
++    if ((r3->zc = av_rpi_zc_int_env_alloc(avctx)) == NULL)
++        goto fail;
++
++    return 0;
++
++fail:
++    av_log(avctx, AV_LOG_ERROR, "Rpi3 QPU init failed\n");
++    hwaccel_rpi3_qpu_free(avctx);
++    return AVERROR(ENOMEM);
++}
++
++
++#define OFFSET(x) offsetof(HEVCRpiContext, x)
++#define PAR (AV_OPT_FLAG_DECODING_PARAM | AV_OPT_FLAG_VIDEO_PARAM)
++
++
++static const AVOption options[] = {
++    { "apply_defdispwin", "Apply default display window from VUI", OFFSET(apply_defdispwin),
++        AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
++    { "strict-displaywin", "stricly apply default display window size", OFFSET(apply_defdispwin),
++        AV_OPT_TYPE_BOOL, {.i64 = 0}, 0, 1, PAR },
++    { NULL },
++};
++
++static const AVClass hevc_rpi_decoder_class = {
++    .class_name = "HEVC RPI decoder",
++    .item_name  = av_default_item_name,
++    .option     = options,
++    .version    = LIBAVUTIL_VERSION_INT,
++};
++
++static const enum AVPixelFormat hevc_rpi_pix_fmts[] = {
++    AV_PIX_FMT_SAND128,
++    AV_PIX_FMT_SAND64_10,
++    AV_PIX_FMT_NONE
++};
++
++
++static const AVHWAccel hwaccel_rpi3_qpu = {
++    .name           = "Pi3 QPU Hwaccel",
++    .alloc_frame    = hwaccel_alloc_frame,
++    .init           = hwaccel_rpi3_qpu_init,
++    .uninit         = hwaccel_rpi3_qpu_free,
++    .priv_data_size = sizeof(hwaccel_rpi3_qpu_env_t),
++    .caps_internal  = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE,
++};
++
++static const AVCodecHWConfigInternal hevc_rpi_hw_config_sand128 =
++{
++    .public = {
++        .pix_fmt = AV_PIX_FMT_SAND128,
++        .methods = AV_CODEC_HW_CONFIG_METHOD_AD_HOC,
++        .device_type = AV_HWDEVICE_TYPE_NONE,
++    },
++    .hwaccel = &hwaccel_rpi3_qpu
++};
++static const AVCodecHWConfigInternal hevc_rpi_hw_config_sand64_10 =
++{
++    .public = {
++        .pix_fmt = AV_PIX_FMT_SAND64_10,
++        .methods = AV_CODEC_HW_CONFIG_METHOD_AD_HOC,
++        .device_type = AV_HWDEVICE_TYPE_NONE,
++    },
++    .hwaccel = &hwaccel_rpi3_qpu
++};
++
++
++static const AVCodecHWConfigInternal *hevc_rpi_hw_configs[] = {
++    &hevc_rpi_hw_config_sand128,
++    &hevc_rpi_hw_config_sand64_10,
++    NULL
++};
++
++
++AVCodec ff_hevc_rpi_decoder = {
++    .name                  = "hevc_rpi",
++    .long_name             = NULL_IF_CONFIG_SMALL("HEVC (rpi)"),
++    .type                  = AVMEDIA_TYPE_VIDEO,
++    .id                    = AV_CODEC_ID_HEVC,
++    .priv_data_size        = sizeof(HEVCRpiContext),
++    .priv_class            = &hevc_rpi_decoder_class,
++    .init                  = hevc_decode_init,
++    .close                 = hevc_decode_free,
++    .decode                = hevc_rpi_decode_frame,
++    .flush                 = hevc_decode_flush,
++    .update_thread_context = ONLY_IF_THREADS_ENABLED(hevc_update_thread_context),
++    .capabilities          = AV_CODEC_CAP_DR1 | AV_CODEC_CAP_DELAY |
++                             AV_CODEC_CAP_HARDWARE |
++                             AV_CODEC_CAP_AVOID_PROBING |
++#if 0
++    // Debugging is often easier without threads getting in the way
++                            0,
++#warning H265 threading turned off
++#else
++    // We only have decent optimisation for frame - so only admit to that
++                             AV_CODEC_CAP_FRAME_THREADS,
++#endif
++    .caps_internal         = FF_CODEC_CAP_INIT_THREADSAFE |
++                             FF_CODEC_CAP_EXPORTS_CROPPING |
++                             FF_CODEC_CAP_ALLOCATE_PROGRESS,
++    .pix_fmts              = hevc_rpi_pix_fmts,
++    .profiles              = NULL_IF_CONFIG_SMALL(ff_hevc_profiles),
++    .hw_configs            = hevc_rpi_hw_configs,
++//    .wrapper_name          = "hevc_rpi",
++};
++
+--- /dev/null
++++ b/libavcodec/rpi_hevcdec.h
+@@ -0,0 +1,1091 @@
++/*
++ * HEVC video decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_RPI_HEVCDEC_H
++#define AVCODEC_RPI_HEVCDEC_H
++
++#include "config.h"
++
++#include <stdatomic.h>
++
++#include "libavutil/buffer.h"
++
++#include "avcodec.h"
++#include "bswapdsp.h"
++#include "cabac.h"
++#include "get_bits.h"
++#include "rpi_hevcpred.h"
++#include "h2645_parse.h"
++#include "hevc.h"
++#include "rpi_hevc_mv.h"
++#include "rpi_hevc_ps.h"
++#include "rpi_hevc_sei.h"
++#include "rpi_hevcdsp.h"
++#include "internal.h"
++#include "thread.h"
++#include "videodsp.h"
++
++#if ARCH_ARM
++#include "arm/rpi_hevc_misc_neon.h"
++#endif
++
++#define MAX_NB_THREADS 16
++#define SHIFT_CTB_WPP 2
++
++//TODO: check if this is really the maximum
++#define MAX_TRANSFORM_DEPTH 5
++
++#define MAX_TB_SIZE 32
++#define MAX_QP 51
++#define DEFAULT_INTRA_TC_OFFSET 2
++
++#define HEVC_CONTEXTS 199
++
++#define MRG_MAX_NUM_CANDS     5
++
++#define HEVC_MAX_CTB_SIZE (1 << HEVC_MAX_LOG2_CTB_SIZE)  // 64
++
++// Size of DPB array
++#define HEVC_DPB_ELS            32
++
++#define L0 0
++#define L1 1
++
++#define EPEL_EXTRA_BEFORE 1
++#define EPEL_EXTRA_AFTER  2
++#define EPEL_EXTRA        3
++#define QPEL_EXTRA_BEFORE 3
++#define QPEL_EXTRA_AFTER  4
++#define QPEL_EXTRA        7
++
++#define EDGE_EMU_BUFFER_STRIDE 80
++
++#include <semaphore.h>
++#include "rpi_qpu.h"
++
++// Max jobs per frame thread. Actual usage will be limited by the size
++// of the global job pool
++// ?? Limits
++#define RPI_MAX_JOBS            8
++
++// This is the number of _extra_ bit threads - we will have
++// RPI_EXTRA_BIT_THREADS+1 threads actually doing the processing
++//
++// 0 is legitimate and will disable our WPP processing
++//#define RPI_EXTRA_BIT_THREADS 0
++#define RPI_EXTRA_BIT_THREADS   2
++
++// Number of separate threads/passes in worker
++// 2 and 3 are the currently valid numbers
++// At the moment 3 seems fractionally faster
++//#define RPI_PASSES              2
++#define RPI_PASSES              3
++
++// Print out various usage stats
++#define RPI_TSTATS              0
++
++// Define RPI_COMPRESS_COEFFS to 1 to send coefficients in compressed form
++#define RPI_COMPRESS_COEFFS     1
++
++// Wait for VPU/QPU to finish in worker pass 0
++// If 0 then the wait is in pass 1
++//
++// One might expect the better place to wait would be in pass 1 however
++// testing shows that pass 0 produces overall faster decode.
++// Interestingly it is QPU/VPU limited streams that seem to suffer
++// from pass 1 waits, CPU limited ones tend to show a very mild gain.
++// This define exists so it is easy to test this.
++#define RPI_WORKER_WAIT_PASS_0  1
++
++// Use ARM emulation of QPU pred
++// These are for debug only as the emulation makes only limited
++// effort to be fast
++#define RPI_QPU_EMU_Y           0
++#define RPI_QPU_EMU_C           0
++
++// Max width & height we are prepared to consider
++// Sand frame shape calc becomes confused with large frames
++// Some buffer alloc also depends on this
++#define HEVC_RPI_MAX_WIDTH      2048
++#define HEVC_RPI_MAX_HEIGHT     1088
++
++
++// Min CTB size is 16
++#define HEVC_RPI_MAX_CTBS ((HEVC_RPI_MAX_WIDTH + 15) / 16) * ((HEVC_RPI_MAX_HEIGHT + 15) / 16)
++
++/**
++ * Value of the luma sample at position (x, y) in the 2D array tab.
++ */
++#define SAMPLE(tab, x, y) ((tab)[(y) * s->sps->width + (x)])
++#define SAMPLE_CTB(tab, x, y) ((tab)[(y) * min_cb_width + (x)])
++
++#define IS_IDR(s) ((s)->nal_unit_type == HEVC_NAL_IDR_W_RADL || (s)->nal_unit_type == HEVC_NAL_IDR_N_LP)
++#define IS_BLA(s) ((s)->nal_unit_type == HEVC_NAL_BLA_W_RADL || (s)->nal_unit_type == HEVC_NAL_BLA_W_LP || \
++                   (s)->nal_unit_type == HEVC_NAL_BLA_N_LP)
++#define IS_IRAP(s) ((s)->nal_unit_type >= 16 && (s)->nal_unit_type <= 23)
++
++enum RPSType {
++    ST_CURR_BEF = 0,
++    ST_CURR_AFT,
++    ST_FOLL,
++    LT_CURR,
++    LT_FOLL,
++    NB_RPS_TYPE,
++};
++
++enum SyntaxElement {
++    SAO_MERGE_FLAG = 0,
++    SAO_TYPE_IDX,
++    SAO_EO_CLASS,
++    SAO_BAND_POSITION,
++    SAO_OFFSET_ABS,
++    SAO_OFFSET_SIGN,
++    END_OF_SLICE_FLAG,
++    SPLIT_CODING_UNIT_FLAG,
++    CU_TRANSQUANT_BYPASS_FLAG,
++    SKIP_FLAG,
++    CU_QP_DELTA,
++    PRED_MODE_FLAG,
++    PART_MODE,
++    PCM_FLAG,
++    PREV_INTRA_LUMA_PRED_FLAG,
++    MPM_IDX,
++    REM_INTRA_LUMA_PRED_MODE,
++    INTRA_CHROMA_PRED_MODE,
++    MERGE_FLAG,
++    MERGE_IDX,
++    INTER_PRED_IDC,
++    REF_IDX_L0,
++    REF_IDX_L1,
++    ABS_MVD_GREATER0_FLAG,
++    ABS_MVD_GREATER1_FLAG,
++    ABS_MVD_MINUS2,
++    MVD_SIGN_FLAG,
++    MVP_LX_FLAG,
++    NO_RESIDUAL_DATA_FLAG,
++    SPLIT_TRANSFORM_FLAG,
++    CBF_LUMA,
++    CBF_CB_CR,
++    TRANSFORM_SKIP_FLAG,
++    EXPLICIT_RDPCM_FLAG,
++    EXPLICIT_RDPCM_DIR_FLAG,
++    LAST_SIGNIFICANT_COEFF_X_PREFIX,
++    LAST_SIGNIFICANT_COEFF_Y_PREFIX,
++    LAST_SIGNIFICANT_COEFF_X_SUFFIX,
++    LAST_SIGNIFICANT_COEFF_Y_SUFFIX,
++    SIGNIFICANT_COEFF_GROUP_FLAG,
++    SIGNIFICANT_COEFF_FLAG,
++    COEFF_ABS_LEVEL_GREATER1_FLAG,
++    COEFF_ABS_LEVEL_GREATER2_FLAG,
++    COEFF_ABS_LEVEL_REMAINING,
++    COEFF_SIGN_FLAG,
++    LOG2_RES_SCALE_ABS,
++    RES_SCALE_SIGN_FLAG,
++    CU_CHROMA_QP_OFFSET_FLAG,
++    CU_CHROMA_QP_OFFSET_IDX,
++};
++
++enum PartMode {
++    PART_2Nx2N = 0,
++    PART_2NxN  = 1,
++    PART_Nx2N  = 2,
++    PART_NxN   = 3,
++    PART_2NxnU = 4,
++    PART_2NxnD = 5,
++    PART_nLx2N = 6,
++    PART_nRx2N = 7,
++};
++
++enum PredMode {
++    MODE_INTER = 0,
++    MODE_INTRA,
++    MODE_SKIP,
++};
++
++enum InterPredIdc {
++    PRED_L0 = 0,
++    PRED_L1,
++    PRED_BI,
++};
++
++enum PredFlag {
++    PF_INTRA = 0,
++    PF_L0,
++    PF_L1,
++    PF_BI,
++};
++
++enum SAOType {
++    SAO_NOT_APPLIED = 0,
++    SAO_BAND,
++    SAO_EDGE,
++    SAO_APPLIED
++};
++
++enum SAOEOClass {
++    SAO_EO_HORIZ = 0,
++    SAO_EO_VERT,
++    SAO_EO_135D,
++    SAO_EO_45D,
++};
++
++enum ScanType {
++    SCAN_DIAG = 0,
++    SCAN_HORIZ,
++    SCAN_VERT,
++};
++
++typedef struct RefPicList {
++    struct HEVCRpiFrame *ref[HEVC_MAX_REFS];
++    int list[HEVC_MAX_REFS];
++    uint8_t isLongTerm[HEVC_MAX_REFS];
++    int nb_refs;
++} RefPicList;
++
++typedef struct RefPicListTab {
++    RefPicList refPicList[2];
++} RefPicListTab;
++
++typedef struct RpiCodingUnit {
++    unsigned int x;             // Passed to deblock
++    unsigned int y;
++    unsigned int x_split;
++    unsigned int y_split;
++
++    enum PredMode pred_mode;    ///< PredMode
++    enum PartMode part_mode;    ///< PartMode
++
++    // Inferred parameters
++    uint8_t intra_split_flag;   ///< IntraSplitFlag
++    uint8_t max_trafo_depth;    ///< MaxTrafoDepth
++    uint8_t cu_transquant_bypass_flag;
++} RpiCodingUnit;
++
++typedef struct RpiPredictionUnit {
++    uint8_t intra_pred_mode[4];
++    uint8_t intra_pred_mode_c[4];
++    uint8_t chroma_mode_c[4];
++    uint8_t merge_flag;
++} RpiPredictionUnit;
++
++typedef struct HEVCRpiTransformUnit {
++    int8_t cu_qp_delta;
++
++    // Inferred parameters;
++    uint8_t intra_pred_mode;
++    uint8_t intra_pred_mode_c;
++    uint8_t chroma_mode_c;
++    uint8_t is_cu_qp_delta_wanted;
++    uint8_t cu_chroma_qp_offset_wanted;
++    const int8_t * qp_divmod6[3];
++} HEVCRpiTransformUnit;
++
++typedef struct DBParams {
++    int8_t beta_offset; // -12 to +12
++    int8_t tc_offset;   // -12 to +12
++} DBParams;
++
++#define HEVC_FRAME_FLAG_OUTPUT    (1 << 0)
++#define HEVC_FRAME_FLAG_SHORT_REF (1 << 1)
++#define HEVC_FRAME_FLAG_LONG_REF  (1 << 2)
++#define HEVC_FRAME_FLAG_BUMPING   (1 << 3)
++
++struct HEVCRpiJob;
++
++typedef struct HEVCRpiFrame {
++    AVFrame *frame;
++    ThreadFrame tf;
++    ColMvField *col_mvf;
++    int poc;
++    struct HEVCRpiFrame *collocated_ref;
++
++    AVBufferRef *col_mvf_buf;
++
++    /**
++     * A sequence counter, so that old frames are output first
++     * after a POC reset
++     */
++    uint16_t sequence;
++
++    /**
++     * A combination of HEVC_FRAME_FLAG_*
++     */
++    uint8_t flags;
++
++    // Entry no in DPB - can be used as a small unique
++    // frame identifier (within the current thread)
++    uint8_t dpb_no;
++} HEVCRpiFrame;
++
++typedef struct HEVCRpiLocalContext {
++    HEVCRpiTransformUnit tu;
++
++    CABACContext cc;
++
++    // Vars that allow us to locate everything from just an lc
++    struct HEVCRpiContext * context;  // ??? make const ???
++    unsigned int lc_n; // lc list el no
++
++    // Job wait links
++    struct HEVCRpiLocalContext * jw_next;
++    struct HEVCRpiLocalContext * jw_prev;
++    struct HEVCRpiLocalContext * ljw_next;
++    struct HEVCRpiLocalContext * ljw_prev;
++    struct HEVCRpiJob * volatile jw_job;
++    sem_t jw_sem;
++
++    // ?? Wrap in structure ??
++    sem_t bt_sem_in;
++    sem_t * bt_psem_out;
++    volatile int bt_terminate;
++    unsigned int ts;
++    unsigned int bt_last_line;  // Last line in this bit_thread chunk
++    unsigned int bt_line_no;
++    unsigned int bt_line_width;
++    unsigned int bt_line_inc;
++
++    struct HEVCRpiJob * jb0;
++    char unit_done;  // Set once we have dealt with this slice
++    char bt_is_tile;
++    char last_progress_good;
++    char cabac_init_req;
++
++    uint8_t cabac_state[HEVC_CONTEXTS];
++    uint8_t stat_coeff[4];
++    GetBitContext gb;
++
++    uint8_t ct_depth;
++    int8_t qp_y;
++    int8_t curr_qp_y;
++    int8_t qPy_pred;
++
++// N.B. Used by asm (neon) - do not change
++#define AVAIL_S_UR  0
++#define AVAIL_S_U   1
++#define AVAIL_S_UL  2
++#define AVAIL_S_L   3
++#define AVAIL_S_DL  4
++
++#define AVAIL_U     (1 << AVAIL_S_U)
++#define AVAIL_L     (1 << AVAIL_S_L)
++#define AVAIL_UL    (1 << AVAIL_S_UL)
++#define AVAIL_UR    (1 << AVAIL_S_UR)
++#define AVAIL_DL    (1 << AVAIL_S_DL)
++
++// Intra filters - same number space as avail
++#define FILTER_LIGHT    0x40
++#define FILTER_STRONG   0x80
++#define FILTER_EITHER   (FILTER_LIGHT | FILTER_STRONG)
++
++    uint8_t ctb_avail;
++    int     end_of_ctb_x;
++    int     end_of_ctb_y;
++
++    RpiCodingUnit cu;
++    RpiPredictionUnit pu;
++
++#define BOUNDARY_LEFT_SLICE     (1 << 0)
++#define BOUNDARY_LEFT_TILE      (1 << 1)
++#define BOUNDARY_UPPER_SLICE    (1 << 2)
++#define BOUNDARY_UPPER_TILE     (1 << 3)
++    /* properties of the boundary of the current CTB for the purposes
++     * of the deblocking filter */
++    unsigned int boundary_flags;
++
++#define IPM_TAB_SIZE (HEVC_MAX_CTB_SIZE >> LOG2_MIN_PU_SIZE)
++    uint8_t ipm_left[IPM_TAB_SIZE];
++    uint8_t ipm_up[IPM_TAB_SIZE];
++
++//#define MVF_STASH_WIDTH       128
++#define MVF_STASH_WIDTH       64
++#define MVF_STASH_HEIGHT      64
++#define MVF_STASH_WIDTH_PU    (MVF_STASH_WIDTH >> LOG2_MIN_PU_SIZE)
++#define MVF_STASH_HEIGHT_PU   (MVF_STASH_HEIGHT >> LOG2_MIN_PU_SIZE)
++    HEVCRpiMvField mvf_ul[1];
++    HEVCRpiMvField mvf_stash[MVF_STASH_WIDTH_PU * MVF_STASH_HEIGHT_PU];
++
++    /* +7 is for subpixel interpolation, *2 for high bit depths */
++//    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
++    /* The extended size between the new edge emu buffer is abused by SAO */
++//    DECLARE_ALIGNED(32, uint8_t, edge_emu_buffer2)[(MAX_PB_SIZE + 7) * EDGE_EMU_BUFFER_STRIDE * 2];
++//    DECLARE_ALIGNED(32, int16_t, tmp [MAX_PB_SIZE * MAX_PB_SIZE]);
++
++} HEVCRpiLocalContext;
++
++// Each block can have an intra prediction and an add_residual command
++// noof-cmds(2) * max-ctu height(64) / min-transform(4) * planes(3) * MAX_WIDTH
++
++// Sand only has 2 planes (Y/C)
++#define RPI_MAX_PRED_CMDS (2*(HEVC_MAX_CTB_SIZE/4)*2*(HEVC_RPI_MAX_WIDTH/4))
++
++// Command for intra prediction and transform_add of predictions to coefficients
++enum rpi_pred_cmd_e
++{
++    RPI_PRED_ADD_RESIDUAL,
++    RPI_PRED_ADD_RESIDUAL_U, // = RPI_PRED_TRANSFORM_ADD + c_idx
++    RPI_PRED_ADD_RESIDUAL_V, // = RPI_PRED_TRANSFORM_ADD + c_idx
++    RPI_PRED_ADD_RESIDUAL_C, // Merged U+V
++    RPI_PRED_ADD_DC,
++    RPI_PRED_ADD_DC_U,       // Both U & V are effectively C
++    RPI_PRED_ADD_DC_V,
++    RPI_PRED_INTRA,
++    RPI_PRED_INTRA_C,
++    RPI_PRED_I_PCM,
++    RPI_PRED_CMD_MAX
++};
++
++typedef struct HEVCPredCmd {
++    uint8_t type;
++    uint8_t size;  // log2 "size" used by all variants
++    uint8_t avail; // i_pred - but left here as they pack well
++    uint8_t dummy;
++    union {
++        struct {  // TRANSFORM_ADD
++            uint8_t * dst;
++            const int16_t * buf;
++            uint16_t stride;  // Should be good enough for all pic fmts we use
++            int16_t dc;
++        } ta;
++        struct {
++            uint8_t * dst;
++            uint32_t stride;
++            int dc;
++        } dc;
++        struct {  // INTRA
++            uint16_t x;
++            uint16_t y;
++            enum IntraPredMode mode;
++        } i_pred;
++        struct {  // I_PCM
++            uint16_t x;
++            uint16_t y;
++            const void * src;
++            uint32_t src_len;
++        } i_pcm;
++    };
++} HEVCPredCmd;
++
++union qpu_mc_pred_cmd_s;
++struct qpu_mc_pred_y_p_s;
++struct qpu_mc_src_s;
++
++typedef struct HEVCRpiInterPredQ
++{
++    union qpu_mc_pred_cmd_u *qpu_mc_base;
++    union qpu_mc_pred_cmd_u *qpu_mc_curr;
++    struct qpu_mc_src_s *last_l0;
++    struct qpu_mc_src_s *last_l1;
++    unsigned int load;
++    uint32_t code_setup;
++    uint32_t code_sync;
++    uint32_t code_exit;
++} HEVCRpiInterPredQ;
++
++typedef struct HEVCRpiInterPredEnv
++{
++    HEVCRpiInterPredQ * q;
++    uint8_t n;                  // Number of Qs
++    uint8_t n_grp;              // Number of Q in a group
++    uint8_t curr;               // Current Q number (0..n-1)
++    uint8_t used;               // 0 if nothing in any Q, 1 otherwise
++    uint8_t used_grp;           // 0 if nothing in any Q in the current group
++    unsigned int max_fill;
++    unsigned int min_gap;
++    GPU_MEM_PTR_T gptr;
++} HEVCRpiInterPredEnv;
++
++typedef struct HEVCRpiIntraPredEnv {
++    unsigned int n;        // Number of commands
++    HEVCPredCmd * cmds;
++} HEVCRpiIntraPredEnv;
++
++typedef struct HEVCRpiCoeffEnv {
++    unsigned int n;
++#if RPI_COMPRESS_COEFFS
++    unsigned int packed; // Equal to 1 if coefficients should be being packed
++    unsigned int packed_n; // Value of n when packed was set equal to 0 (i.e. the amount that is sent compressed).  Only valid if packed==0
++#endif
++    int16_t * buf;
++} HEVCRpiCoeffEnv;
++
++typedef struct HEVCRpiCoeffsEnv {
++    HEVCRpiCoeffEnv s[4];
++    GPU_MEM_PTR_T gptr;
++    void * mptr;
++} HEVCRpiCoeffsEnv;
++
++typedef struct HEVCRpiFrameProgressWait {
++    int req;
++    struct HEVCRpiFrameProgressWait * next;
++    sem_t sem;
++} HEVCRpiFrameProgressWait;
++
++typedef struct HEVCRpiFrameProgressState {
++    struct HEVCRpiFrameProgressWait * first;
++    struct HEVCRpiFrameProgressWait * last;
++    pthread_mutex_t lock;
++} HEVCRpiFrameProgressState;
++
++typedef struct RpiBlk
++{
++    unsigned int x;
++    unsigned int y;
++    unsigned int w;
++    unsigned int h;
++} RpiBlk;
++
++typedef struct HEVCRpiJob {
++    struct HEVCRpiJob * next;  // Free chain
++    struct HEVCRpiJobCtl * jbc_local;
++    const HEVCRpiSPS * sps;       // sps used to set up this job
++
++    int waited;
++    int ctu_ts_first;
++    int ctu_ts_last;
++    RpiBlk bounds;  // Bounding box of job
++
++    struct qpu_mc_pred_y_p_s * last_y8_p;
++    struct qpu_mc_src_s * last_y8_l1;
++    rpi_cache_flush_env_t * rfe;
++
++    HEVCRpiInterPredEnv chroma_ip;
++    HEVCRpiInterPredEnv luma_ip;
++    int16_t progress_req[HEVC_DPB_ELS]; // index by dpb_no
++    HEVCRpiIntraPredEnv intra;
++    HEVCRpiCoeffsEnv coeffs;
++    HEVCRpiFrameProgressWait progress_wait;
++    sem_t sem;
++    rpi_cache_buf_t flush_buf;
++} HEVCRpiJob;
++
++struct HEVCRpiContext;
++
++typedef void HEVCRpiWorkerFn(const struct HEVCRpiContext * const s, HEVCRpiJob * const jb);
++
++typedef struct HEVCRpiPassQueue
++{
++//    int pending;
++    volatile int terminate;
++    sem_t sem_in;
++    sem_t * psem_out;
++    unsigned int job_n;
++    struct HEVCRpiContext * context; // Context pointer as we get to pass a single "void * this" to the thread
++    HEVCRpiWorkerFn * worker;
++    pthread_t thread;
++    uint8_t pass_n;  // Pass number - debug
++    uint8_t started;
++} HEVCRpiPassQueue;
++
++
++struct HEVCRpiJobGlobal;
++
++typedef struct HEVCRpiJobCtl
++{
++    sem_t sem_out;
++
++    HEVCRpiJob * volatile jb1;  // The job associated with this frame if unallocated - NULL if allocated
++    struct HEVCRpiJobGlobal * jbg;
++
++    HEVCRpiLocalContext * lcw_head;
++    HEVCRpiLocalContext * lcw_tail;
++
++    pthread_mutex_t in_lock;
++    int offload_in;
++
++    HEVCRpiJob *offloadq[RPI_MAX_JOBS];
++} HEVCRpiJobCtl;
++
++
++typedef struct HEVCRpiJobGlobal
++{
++    intptr_t ref_count;
++    pthread_mutex_t lock;
++    HEVCRpiJob * free1;                 // Singly linked list of free jobs
++    HEVCRpiLocalContext * wait_head;       // Double linked list of lcs waiting for a job
++    HEVCRpiLocalContext * wait_good;  // Last good tail
++    HEVCRpiLocalContext * wait_tail;
++
++} HEVCRpiJobGlobal;
++
++#define RPI_BIT_THREADS (RPI_EXTRA_BIT_THREADS + 1)
++
++#if RPI_TSTATS
++typedef struct HEVCRpiStats {
++    int y_pred1_y8_merge;
++    int y_pred1_xy;
++    int y_pred1_x0;
++    int y_pred1_y0;
++    int y_pred1_x0y0;
++    int y_pred1_wle8;
++    int y_pred1_wgt8;
++    int y_pred1_hle16;
++    int y_pred1_hgt16;
++    int y_pred2_xy;
++    int y_pred2_x0;
++    int y_pred2_y0;
++    int y_pred2_x0y0;
++    int y_pred2_hle16;
++    int y_pred2_hgt16;
++} HEVCRpiStats;
++#endif
++
++typedef struct HEVCRpiCabacState
++{
++    uint8_t rice[4];
++    uint8_t state[HEVC_CONTEXTS];
++} HEVCRpiCabacState;
++
++#define HEVC_RPI_BS_STRIDE1_PEL_SHIFT   6   // 64 pels
++#define HEVC_RPI_BS_STRIDE1_PELS        (1U << HEVC_RPI_BS_STRIDE1_PEL_SHIFT)
++#define HEVC_RPI_BS_STRIDE1_PEL_MASK    (HEVC_RPI_BS_STRIDE1_PELS - 1)
++#define HEVC_RPI_BS_ELS_PER_BYTE_SHIFT  2   // 4 els per byte
++#define HEVC_RPI_BS_PELS_PER_EL_SHIFT   2   // 4 pels per el
++#define HEVC_RPI_BS_PELS_PER_BYTE_SHIFT (HEVC_RPI_BS_PELS_PER_EL_SHIFT + HEVC_RPI_BS_ELS_PER_BYTE_SHIFT)
++#define HEVC_RPI_BS_STRIDE1_BYTE_SHIFT  (HEVC_RPI_BS_STRIDE1_PEL_SHIFT - HEVC_RPI_BS_PELS_PER_BYTE_SHIFT)
++#define HEVC_RPI_BS_STRIDE1_BYTES       (1U << HEVC_RPI_BS_STRIDE1_BYTE_SHIFT)
++#define HEVC_RPI_BS_Y_SHR               3   // 8 vertical pels per row
++#define HEVC_RPI_BS_COL_BYTES_SHR       (HEVC_RPI_BS_Y_SHR - HEVC_RPI_BS_STRIDE1_BYTE_SHIFT)
++
++typedef struct HEVCRpiContext {
++    const AVClass *c;  // needed by private avoptions
++    AVCodecContext *avctx;
++
++    uint8_t             threads_type;
++    char qpu_init_ok;
++
++    /** 1 if the independent slice segment header was successfully parsed */
++    uint8_t slice_initialized;
++    char used_for_ref;  // rpi
++    char is_irap;
++    char offload_recon;
++    uint8_t eos;       ///< current packet contains an EOS/EOB NAL
++    uint8_t last_eos;  ///< last packet contains an EOS/EOB NAL
++    uint8_t no_backward_pred_flag;
++    uint8_t is_decoded;
++    uint8_t no_rasl_output_flag;
++
++
++    /**
++     * Sequence counters for decoded and output frames, so that old
++     * frames are output first after a POC reset
++     */
++    uint16_t seq_decode;
++    uint16_t seq_output;
++
++    int                 width;
++    int                 height;
++
++    HEVCRpiJobCtl * jbc;
++    // cabac stash
++    // b0       skip flag
++    // b1+      ct_depth
++    uint8_t * cabac_stash_left;
++    uint8_t * cabac_stash_up;
++
++    // Function pointers
++#if RPI_QPU_EMU_Y || RPI_QPU_EMU_C
++    const uint8_t * qpu_dummy_frame_emu;
++#endif
++#if !RPI_QPU_EMU_Y || !RPI_QPU_EMU_C
++    uint32_t qpu_dummy_frame_qpu;  // Not a frame - just a bit of memory
++#endif
++    HEVCRpiQpu qpu;
++
++    HEVCRpiFrameProgressState progress_states[2];
++
++    HEVCRpiCabacState *cabac_save;
++
++    AVFrame *frame;
++    AVFrame *output_frame;
++    uint8_t *sao_pixel_buffer_h[3];
++    uint8_t *sao_pixel_buffer_v[3];
++
++    unsigned int col_mvf_stride;
++    AVBufferPool *col_mvf_pool;
++
++    RpiSAOParams *sao;
++    DBParams *deblock;
++    enum HEVCNALUnitType nal_unit_type;
++    int temporal_id;  ///< temporal_id_plus1 - 1
++    HEVCRpiFrame *ref;
++    int poc;
++    int pocTid0;
++    int slice_idx; ///< number of the slice being currently decoded
++    int max_ra;
++
++    int8_t *qp_y_tab;
++
++    // Deblocking block strength bitmaps
++    unsigned int bs_stride2;
++    unsigned int bs_size;
++    uint8_t *bs_horizontal;
++    uint8_t *bs_vertical;
++    uint8_t *bsf_stash_up;
++    uint8_t *bsf_stash_left;
++
++#if HEVC_RPI_MAX_CTBS >= 0xffff
++#define TAB_SLICE_ADDR_BROKEN ~(uint32_t)0
++    uint32_t *tab_slice_address;
++#else
++#define TAB_SLICE_ADDR_BROKEN ~(uint16_t)0
++    uint16_t *tab_slice_address;
++#endif
++
++    // Bitfield 1 bit per 8 pels (min pcm size)
++    uint8_t *is_pcm;
++    // Bitfield 1 bit per 8 pels (min cb size)
++    // Only needed for CIP as CIP processing is async to the main thread
++    uint8_t *is_intra;
++
++    // PU
++    HEVCRpiMvField *mvf_up;
++    HEVCRpiMvField *mvf_left;
++
++    const RefPicList **rpl_up;
++    const RefPicList **rpl_left;
++    RefPicList * refPicList;
++
++    // CTB-level flags affecting loop filter operation
++    uint8_t *filter_slice_edges;
++
++    /** used on BE to byteswap the lines for checksumming */
++    uint8_t *checksum_buf;
++    int      checksum_buf_size;
++
++    const uint8_t *data;
++
++    H2645Packet pkt;
++    // type of the first VCL NAL of the current frame
++    enum HEVCNALUnitType first_nal_type;
++
++    uint8_t context_initialized;
++    int is_nalff;           ///< this flag is != 0 if bitstream is encapsulated
++                            ///< as a format defined in 14496-15
++    int apply_defdispwin;
++
++    int nal_length_size;    ///< Number of bytes used for nal length (1, 2 or 4)
++    int nuh_layer_id;
++
++    struct AVMD5 *md5_ctx;
++
++    RefPicListTab * rpl_tab;
++    unsigned int rpl_tab_size;
++
++    uint8_t *is_intra_store;
++
++    RpiSliceHeader sh;
++
++    HEVCRpiParamSets ps;
++
++    HEVCRpiLocalContext    *HEVClc;
++    HEVCRpiLocalContext    *HEVClcList[MAX_NB_THREADS];
++
++    HEVCRpiFrame DPB[HEVC_DPB_ELS];
++
++    ///< candidate references for the current frame
++    RefPicList rps[5];
++
++    HEVCRpiPredContext hpc;
++    HEVCDSPContext hevcdsp;
++
++    HEVCSEIContext sei;
++
++    // Put structures that allocate non-trivial storage at the end
++    // These are mostly used indirectly so position in the structure doesn't matter
++    HEVCRpiPassQueue passq[RPI_PASSES];
++#if RPI_EXTRA_BIT_THREADS > 0
++    int bt_started;
++    // This simply contains thread descriptors - task setup is held elsewhere
++    pthread_t bit_threads[RPI_EXTRA_BIT_THREADS];
++#endif
++#if RPI_TSTATS
++    HEVCRpiStats tstats;
++#endif
++} HEVCRpiContext;
++
++/**
++ * Mark all frames in DPB as unused for reference.
++ */
++void ff_hevc_rpi_clear_refs(HEVCRpiContext *s);
++
++/**
++ * Drop all frames currently in DPB.
++ */
++void ff_hevc_rpi_flush_dpb(HEVCRpiContext *s);
++
++/**
++ * Construct the reference picture sets for the current frame.
++ */
++int ff_hevc_rpi_frame_rps(HEVCRpiContext *s);
++
++/**
++ * Construct the reference picture list(s) for the current slice.
++ */
++int ff_hevc_rpi_slice_rpl(HEVCRpiContext *s);
++
++
++/**
++ * Get the number of candidate references for the current frame.
++ */
++int ff_hevc_rpi_frame_nb_refs(HEVCRpiContext *s);
++
++int ff_hevc_rpi_set_new_ref(HEVCRpiContext *s, AVFrame **frame, int poc);
++
++/**
++ * Find next frame in output order and put a reference to it in frame.
++ * @return 1 if a frame was output, 0 otherwise
++ */
++int ff_hevc_rpi_output_frame(HEVCRpiContext *s, AVFrame *frame, int flush);
++
++void ff_hevc_rpi_bump_frame(HEVCRpiContext *s);
++
++void ff_hevc_rpi_unref_frame(HEVCRpiContext *s, HEVCRpiFrame *frame, int flags);
++
++unsigned int ff_hevc_rpi_tb_avail_flags(
++    const HEVCRpiContext * const s, const HEVCRpiLocalContext * const lc,
++    const unsigned int x, const unsigned int y, const unsigned int w, const unsigned int h);
++
++void ff_hevc_rpi_luma_mv_merge_mode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int x0, int y0, int nPbW,
++                                int nPbH, int log2_cb_size, int part_idx,
++                                int merge_idx, HEVCRpiMvField * const mv);
++void ff_hevc_rpi_luma_mv_mvp_mode(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc,
++    const unsigned int x0, const unsigned int y0,
++    const unsigned int nPbW, const unsigned int nPbH,
++    const unsigned int avail,
++    HEVCRpiMvField * const mv,
++    const unsigned int mvp_lx_flag, const unsigned int LX);
++void ff_hevc_rpi_set_qPy(const HEVCRpiContext * const s, HEVCRpiLocalContext * const lc, int xBase, int yBase);
++void ff_hevc_rpi_deblocking_boundary_strengths(const HEVCRpiContext * const s, const HEVCRpiLocalContext * const lc,
++                                               const unsigned int x0, const unsigned int y0,
++                                               const unsigned int log2_trafo_size, const int is_coded_block);
++int ff_hevc_rpi_hls_filter_blk(const HEVCRpiContext * const s, const RpiBlk bounds, const int eot);
++
++extern const uint8_t ff_hevc_rpi_qpel_extra_before[4];
++extern const uint8_t ff_hevc_rpi_qpel_extra_after[4];
++extern const uint8_t ff_hevc_rpi_qpel_extra[4];
++
++int16_t * rpi_alloc_coeff_buf(HEVCRpiJob * const jb, const int buf_no, const int n);
++
++// arm/hevc_misc_neon.S
++// Neon coeff zap fn
++#if HAVE_NEON
++extern void rpi_zap_coeff_vals_neon(int16_t * dst, unsigned int l2ts_m2);
++#endif
++
++void ff_hevc_rpi_progress_wait_field(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
++                                     const HEVCRpiFrame * const ref, const int val, const int field);
++
++void ff_hevc_rpi_progress_signal_field(HEVCRpiContext * const s, const int val, const int field);
++
++// All of these expect that s->threads_type == FF_THREAD_FRAME
++
++static inline void ff_hevc_rpi_progress_wait_mv(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
++                                     const HEVCRpiFrame * const ref, const int y)
++{
++    if (s->threads_type != 0)
++        ff_hevc_rpi_progress_wait_field(s, jb, ref, y, 1);
++}
++
++static inline void ff_hevc_rpi_progress_signal_mv(HEVCRpiContext * const s, const int y)
++{
++    if (s->used_for_ref && s->threads_type != 0)
++        ff_hevc_rpi_progress_signal_field(s, y, 1);
++}
++
++static inline void ff_hevc_rpi_progress_wait_recon(const HEVCRpiContext * const s, HEVCRpiJob * const jb,
++                                     const HEVCRpiFrame * const ref, const int y)
++{
++    ff_hevc_rpi_progress_wait_field(s, jb, ref, y, 0);
++}
++
++static inline void ff_hevc_rpi_progress_signal_recon(HEVCRpiContext * const s, const int y)
++{
++    if (s->used_for_ref && s->threads_type != 0)
++    {
++        ff_hevc_rpi_progress_signal_field(s, y, 0);
++    }
++}
++
++static inline void ff_hevc_rpi_progress_signal_all_done(HEVCRpiContext * const s)
++{
++    ff_hevc_rpi_progress_signal_field(s, INT_MAX, 0);
++    ff_hevc_rpi_progress_signal_field(s, INT_MAX, 1);
++}
++
++
++// Set all done - signal nothing (used in missing refs)
++// Works for both rpi & non-rpi
++static inline void ff_hevc_rpi_progress_set_all_done(HEVCRpiFrame * const ref)
++{
++    if (ref->tf.progress != NULL)
++    {
++        int * const p = (int *)ref->tf.progress->data;
++        p[0] = INT_MAX;
++        p[1] = INT_MAX;
++    }
++}
++
++#define HEVC_RPI_420_ONLY 1
++#define HEVC_RPI_SAND128_ONLY 1
++
++static inline unsigned int ctx_hshift(const HEVCRpiContext * const s, const int cidx)
++{
++#if HEVC_RPI_420_ONLY
++    return cidx == 0 ? 0 : 1;
++#else
++    return s->ps.sps->hshift[cidx];
++#endif
++}
++
++static inline unsigned int ctx_vshift(const HEVCRpiContext * const s, const int cidx)
++{
++#if HEVC_RPI_420_ONLY
++    return cidx == 0 ? 0 : 1;
++#else
++    return s->ps.sps->vshift[cidx];
++#endif
++}
++
++static inline int ctx_cfmt(const HEVCRpiContext * const s)
++{
++#if HEVC_RPI_420_ONLY
++    return 1;
++#else
++    return s->ps.sps->chroma_format_idc;
++#endif
++}
++
++static inline int frame_stride1(const AVFrame * const frame, const int c_idx)
++{
++#if HEVC_RPI_SAND128_ONLY
++    return 128;
++#else
++    return frame->linesize[c_idx];
++#endif
++}
++
++#if HEVC_RPI_SAND128_ONLY
++// Propagate this decision to later zc includes
++#define RPI_ZC_SAND128_ONLY 1
++#endif
++
++#ifndef ff_hevc_rpi_copy_vert
++static inline void ff_hevc_rpi_copy_vert(uint8_t *dst, const uint8_t *src,
++                                         int pixel_shift, int height,
++                                         ptrdiff_t stride_dst, ptrdiff_t stride_src)
++{
++    int i;
++    switch (pixel_shift)
++    {
++        case 2:
++            for (i = 0; i < height; i++) {
++                *(uint32_t *)dst = *(uint32_t *)src;
++                dst += stride_dst;
++                src += stride_src;
++            }
++            break;
++        case 1:
++            for (i = 0; i < height; i++) {
++                *(uint16_t *)dst = *(uint16_t *)src;
++                dst += stride_dst;
++                src += stride_src;
++            }
++            break;
++        default:
++            for (i = 0; i < height; i++) {
++                *dst = *src;
++                dst += stride_dst;
++                src += stride_src;
++            }
++            break;
++    }
++}
++#endif
++
++
++#if MVF_STASH_WIDTH == 64
++static inline HEVCRpiMvField* mvf_stash_ptr(const HEVCRpiContext *const s, const HEVCRpiLocalContext * const lc,
++                               const unsigned int x, const unsigned int y)
++{
++    const unsigned int mask_cs_hi = (~0U << s->ps.sps->log2_ctb_size);
++    return (HEVCRpiMvField*)(lc->mvf_stash + ((y & ~mask_cs_hi) >> LOG2_MIN_PU_SIZE) * MVF_STASH_WIDTH_PU + ((x & ~mask_cs_hi) >> LOG2_MIN_PU_SIZE));
++}
++
++static inline HEVCRpiMvField* mvf_ptr(const HEVCRpiContext *const s, const HEVCRpiLocalContext * const lc,
++                               const unsigned int x0, const unsigned int y0,
++                               const unsigned int x, const unsigned int y)
++{
++    const unsigned int mask_cs_hi = (~0U << s->ps.sps->log2_ctb_size);
++    const unsigned int x0_ctb = x0 & mask_cs_hi;
++    const unsigned int y0_ctb = y0 & mask_cs_hi;
++
++    return (HEVCRpiMvField *)((y < y0_ctb) ?
++        (x < x0_ctb ? lc->mvf_ul : s->mvf_up + (x >> LOG2_MIN_PU_SIZE)) :
++        (x < x0_ctb ? s->mvf_left + (y >> LOG2_MIN_PU_SIZE) :
++            lc->mvf_stash +
++                ((y & ~mask_cs_hi) >> LOG2_MIN_PU_SIZE) * MVF_STASH_WIDTH_PU +
++                ((x & ~mask_cs_hi) >> LOG2_MIN_PU_SIZE)));
++}
++
++static inline unsigned int mvf_left_stride(const HEVCRpiContext *const s,
++                               const unsigned int x0,
++                               const unsigned int x)
++{
++    const unsigned int mask_cs_hi = (~0U << s->ps.sps->log2_ctb_size);
++    const unsigned int x0_ctb = x0 & mask_cs_hi;
++    return x < x0_ctb ? 1 : MVF_STASH_WIDTH_PU;
++}
++
++#else
++static inline HEVCRpiMvField* mvf_stash_ptr(const HEVCRpiContext *const s, const HEVCRpiLocalContext * const lc,
++                               const unsigned int x, const unsigned int y)
++{
++    const unsigned int mask_cs_hi = (~0U << s->ps.sps->log2_ctb_size);
++    return (HEVCRpiMvField*)(lc->mvf_stash + ((y & ~mask_cs_hi) >> LOG2_MIN_PU_SIZE) * MVF_STASH_WIDTH_PU + ((x >> LOG2_MIN_PU_SIZE) & (MVF_STASH_WIDTH_PU - 1)));
++}
++
++static inline HEVCRpiMvField* mvf_ptr(const HEVCRpiContext *const s, const HEVCRpiLocalContext * const lc,
++                               const unsigned int x0, const unsigned int y0,
++                               const unsigned int x, const unsigned int y)
++{
++    const unsigned int mask_cs_hi = (~0U << s->ps.sps->log2_ctb_size);
++
++    const unsigned int x0_ctb = x0 & mask_cs_hi;
++    const unsigned int y0_ctb = y0 & mask_cs_hi;
++
++    // If not in the same CTB for Y assume up
++    if (y < y0_ctb) {
++        // If not in the same CTB for X too assume up-left
++        return (HEVCRpiMvField *)(x < x0_ctb ? lc->mvf_ul : s->mvf_up + (x >> LOG2_MIN_PU_SIZE));
++    }
++    return mvf_stash_ptr(s, lc, x, y);
++}
++
++static inline unsigned int mvf_left_stride(const HEVCRpiContext *const s,
++                               const unsigned int x0,
++                               const unsigned int x)
++{
++    return MVF_STASH_WIDTH_PU;
++}
++#endif
++
++#endif /* AVCODEC_RPI_HEVCDEC_H */
+--- /dev/null
++++ b/libavcodec/rpi_hevcdsp.c
+@@ -0,0 +1,450 @@
++/*
++ * HEVC video decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
++ * Copyright (C) 2018 John Cox, Ben Avison for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "rpi_hevcdsp.h"
++#include "rpi_hevc_mv.h"
++
++static const int8_t transform[32][32] = {
++    { 64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,
++      64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64 },
++    { 90,  90,  88,  85,  82,  78,  73,  67,  61,  54,  46,  38,  31,  22,  13,   4,
++      -4, -13, -22, -31, -38, -46, -54, -61, -67, -73, -78, -82, -85, -88, -90, -90 },
++    { 90,  87,  80,  70,  57,  43,  25,   9,  -9, -25, -43, -57, -70, -80, -87, -90,
++     -90, -87, -80, -70, -57, -43, -25,  -9,   9,  25,  43,  57,  70,  80,  87,  90 },
++    { 90,  82,  67,  46,  22,  -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13,
++      13,  38,  61,  78,  88,  90,  85,  73,  54,  31,   4, -22, -46, -67, -82, -90 },
++    { 89,  75,  50,  18, -18, -50, -75, -89, -89, -75, -50, -18,  18,  50,  75,  89,
++      89,  75,  50,  18, -18, -50, -75, -89, -89, -75, -50, -18,  18,  50,  75,  89 },
++    { 88,  67,  31, -13, -54, -82, -90, -78, -46, -4,   38,  73,  90,  85,  61,  22,
++     -22, -61, -85, -90, -73, -38,   4,  46,  78,  90,  82,  54,  13, -31, -67, -88 },
++    { 87,  57,   9, -43, -80, -90, -70, -25,  25,  70,  90,  80,  43,  -9, -57, -87,
++     -87, -57,  -9,  43,  80,  90,  70,  25, -25, -70, -90, -80, -43,   9,  57,  87 },
++    { 85,  46, -13, -67, -90, -73, -22,  38,  82,  88,  54,  -4, -61, -90, -78, -31,
++      31,  78,  90,  61,   4, -54, -88, -82, -38,  22,  73,  90,  67,  13, -46, -85 },
++    { 83,  36, -36, -83, -83, -36,  36,  83,  83,  36, -36, -83, -83, -36,  36,  83,
++      83,  36, -36, -83, -83, -36,  36,  83,  83,  36, -36, -83, -83, -36,  36,  83 },
++    { 82,  22, -54, -90, -61,  13,  78,  85,  31, -46, -90, -67,   4,  73,  88,  38,
++     -38, -88, -73,  -4,  67,  90,  46, -31, -85, -78, -13,  61,  90,  54, -22, -82 },
++    { 80,   9, -70, -87, -25,  57,  90,  43, -43, -90, -57,  25,  87,  70,  -9, -80,
++     -80,  -9,  70,  87,  25, -57, -90, -43,  43,  90,  57, -25, -87, -70,   9,  80 },
++    { 78,  -4, -82, -73,  13,  85,  67, -22, -88, -61,  31,  90,  54, -38, -90, -46,
++      46,  90,  38, -54, -90, -31,  61,  88,  22, -67, -85, -13,  73,  82,   4, -78 },
++    { 75, -18, -89, -50,  50,  89,  18, -75, -75,  18,  89,  50, -50, -89, -18,  75,
++      75, -18, -89, -50,  50,  89,  18, -75, -75,  18,  89,  50, -50, -89, -18,  75 },
++    { 73, -31, -90, -22,  78,  67, -38, -90, -13,  82,  61, -46, -88,  -4,  85,  54,
++     -54, -85,   4,  88,  46, -61, -82,  13,  90,  38, -67, -78,  22,  90,  31, -73 },
++    { 70, -43, -87,   9,  90,  25, -80, -57,  57,  80, -25, -90,  -9,  87,  43, -70,
++     -70,  43,  87,  -9, -90, -25,  80,  57, -57, -80,  25,  90,   9, -87, -43,  70 },
++    { 67, -54, -78,  38,  85, -22, -90,   4,  90,  13, -88, -31,  82,  46, -73, -61,
++      61,  73, -46, -82,  31,  88, -13, -90,  -4,  90,  22, -85, -38,  78,  54, -67 },
++    { 64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64,
++      64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64 },
++    { 61, -73, -46,  82,  31, -88, -13,  90,  -4, -90,  22,  85, -38, -78,  54,  67,
++     -67, -54,  78,  38, -85, -22,  90,   4, -90,  13,  88, -31, -82,  46,  73, -61 },
++    { 57, -80, -25,  90,  -9, -87,  43,  70, -70, -43,  87,   9, -90,  25,  80, -57,
++     -57,  80,  25, -90,   9,  87, -43, -70,  70,  43, -87,  -9,  90, -25, -80,  57 },
++    { 54, -85,  -4,  88, -46, -61,  82,  13, -90,  38,  67, -78, -22,  90, -31, -73,
++      73,  31, -90,  22,  78, -67, -38,  90, -13, -82,  61,  46, -88,   4,  85, -54 },
++    { 50, -89,  18,  75, -75, -18,  89, -50, -50,  89, -18, -75,  75,  18, -89,  50,
++      50, -89,  18,  75, -75, -18,  89, -50, -50,  89, -18, -75,  75,  18, -89,  50 },
++    { 46, -90,  38,  54, -90,  31,  61, -88,  22,  67, -85,  13,  73, -82,   4,  78,
++     -78,  -4,  82, -73, -13,  85, -67, -22,  88, -61, -31,  90, -54, -38,  90, -46 },
++    { 43, -90,  57,  25, -87,  70,   9, -80,  80,  -9, -70,  87, -25, -57,  90, -43,
++     -43,  90, -57, -25,  87, -70,  -9,  80, -80,   9,  70, -87,  25,  57, -90,  43 },
++    { 38, -88,  73,  -4, -67,  90, -46, -31,  85, -78,  13,  61, -90,  54,  22, -82,
++      82, -22, -54,  90, -61, -13,  78, -85,  31,  46, -90,  67,   4, -73,  88, -38 },
++    { 36, -83,  83, -36, -36,  83, -83,  36,  36, -83,  83, -36, -36,  83, -83,  36,
++      36, -83,  83, -36, -36,  83, -83,  36,  36, -83,  83, -36, -36,  83, -83,  36 },
++    { 31, -78,  90, -61,   4,  54, -88,  82, -38, -22,  73, -90,  67, -13, -46,  85,
++     -85,  46,  13, -67,  90, -73,  22,  38, -82,  88, -54,  -4,  61, -90,  78, -31 },
++    { 25, -70,  90, -80,  43,   9, -57,  87, -87,  57,  -9, -43,  80, -90,  70, -25,
++     -25,  70, -90,  80, -43,  -9,  57, -87,  87, -57,   9,  43, -80,  90, -70,  25 },
++    { 22, -61,  85, -90,  73, -38,  -4,  46, -78,  90, -82,  54, -13, -31,  67, -88,
++      88, -67,  31,  13, -54,  82, -90,  78, -46,   4,  38, -73,  90, -85,  61, -22 },
++    { 18, -50,  75, -89,  89, -75,  50, -18, -18,  50, -75,  89, -89,  75, -50,  18,
++      18, -50,  75, -89,  89, -75,  50, -18, -18,  50, -75,  89, -89,  75, -50,  18 },
++    { 13, -38,  61, -78,  88, -90,  85, -73,  54, -31,   4,  22, -46,  67, -82,  90,
++     -90,  82, -67,  46, -22,  -4,  31, -54,  73, -85,  90, -88,  78, -61,  38, -13 },
++    {  9, -25,  43, -57,  70, -80,  87, -90,  90, -87,  80, -70,  57, -43,  25, -9,
++      -9,  25, -43,  57, -70,  80, -87,  90, -90,  87, -80,  70, -57,  43, -25,   9 },
++    {  4, -13,  22, -31,  38, -46,  54, -61,  67, -73,  78, -82,  85, -88,  90, -90,
++      90, -90,  88, -85,  82, -78,  73, -67,  61, -54,  46, -38,  31, -22,  13,  -4 },
++};
++
++DECLARE_ALIGNED(16, const int8_t, ff_hevc_rpi_epel_filters[7][4]) = {
++    { -2, 58, 10, -2},
++    { -4, 54, 16, -2},
++    { -6, 46, 28, -4},
++    { -4, 36, 36, -4},
++    { -4, 28, 46, -6},
++    { -2, 16, 54, -4},
++    { -2, 10, 58, -2},
++};
++
++DECLARE_ALIGNED(16, const int8_t, ff_hevc_rpi_qpel_filters[3][16]) = {
++    { -1,  4,-10, 58, 17, -5,  1,  0, -1,  4,-10, 58, 17, -5,  1,  0},
++    { -1,  4,-11, 40, 40,-11,  4, -1, -1,  4,-11, 40, 40,-11,  4, -1},
++    {  0,  1, -5, 17, 58,-10,  4, -1,  0,  1, -5, 17, 58,-10,  4, -1}
++};
++
++#define BIT_DEPTH 8
++#include "rpi_hevcdsp_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 9
++#include "rpi_hevcdsp_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 10
++#include "rpi_hevcdsp_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 12
++#include "rpi_hevcdsp_template.c"
++#undef BIT_DEPTH
++
++static uint32_t hevc_deblocking_boundary_strengths(int pus, int dup, const HEVCRpiMvField *curr, const HEVCRpiMvField *neigh,
++                                               const int *curr_rpl0, const int *curr_rpl1, const int *neigh_rpl0, const int *neigh_rpl1,
++                                               int in_inc0, int in_inc1)
++{
++    int shift = 32;
++    uint32_t bs = 0;
++    for (; pus > 0; pus--) {
++        int strength, out;
++        int curr_refL0 = curr_rpl0[curr->ref_idx[0]];
++        int curr_refL1 = curr_rpl1[curr->ref_idx[1]];
++        int nr_idx0 = neigh->ref_idx[0];
++        int nr_idx1 = neigh->ref_idx[1];
++        int neigh_refL0 = neigh_rpl0[nr_idx0];
++        int neigh_refL1 = neigh_rpl1[nr_idx1];
++
++        av_assert0(nr_idx0 >= 0 && nr_idx0 <=31);
++        av_assert0(nr_idx1 >= 0 && nr_idx1 <=31);
++
++#if 1 // This more directly matches the original implementation
++        if (curr->pred_flag == PF_BI &&  neigh->pred_flag == PF_BI) {
++            // same L0 and L1
++            if (curr_refL0 == neigh_refL0 &&
++                curr_refL0 == curr_refL1 &&
++                neigh_refL0 == neigh_refL1) {
++                if ((FFABS(MV_X(neigh->xy[0]) - MV_X(curr->xy[0])) >= 4 || FFABS(MV_Y(neigh->xy[0]) - MV_Y(curr->xy[0])) >= 4 ||
++                     FFABS(MV_X(neigh->xy[1]) - MV_X(curr->xy[1])) >= 4 || FFABS(MV_Y(neigh->xy[1]) - MV_Y(curr->xy[1])) >= 4) &&
++                    (FFABS(MV_X(neigh->xy[1]) - MV_X(curr->xy[0])) >= 4 || FFABS(MV_Y(neigh->xy[1]) - MV_Y(curr->xy[0])) >= 4 ||
++                     FFABS(MV_X(neigh->xy[0]) - MV_X(curr->xy[1])) >= 4 || FFABS(MV_Y(neigh->xy[0]) - MV_Y(curr->xy[1])) >= 4))
++                    strength = 1;
++                else
++                    strength = 0;
++            } else if (neigh_refL0 == curr_refL0 &&
++                       neigh_refL1 == curr_refL1) {
++                if (FFABS(MV_X(neigh->xy[0]) - MV_X(curr->xy[0])) >= 4 || FFABS(MV_Y(neigh->xy[0]) - MV_Y(curr->xy[0])) >= 4 ||
++                    FFABS(MV_X(neigh->xy[1]) - MV_X(curr->xy[1])) >= 4 || FFABS(MV_Y(neigh->xy[1]) - MV_Y(curr->xy[1])) >= 4)
++                    strength = 1;
++                else
++                    strength = 0;
++            } else if (neigh_refL1 == curr_refL0 &&
++                       neigh_refL0 == curr_refL1) {
++                if (FFABS(MV_X(neigh->xy[1]) - MV_X(curr->xy[0])) >= 4 || FFABS(MV_Y(neigh->xy[1]) - MV_Y(curr->xy[0])) >= 4 ||
++                    FFABS(MV_X(neigh->xy[0]) - MV_X(curr->xy[1])) >= 4 || FFABS(MV_Y(neigh->xy[0]) - MV_Y(curr->xy[1])) >= 4)
++                    strength = 1;
++                else
++                    strength = 0;
++            } else {
++                strength = 1;
++            }
++        } else if ((curr->pred_flag != PF_BI) && (neigh->pred_flag != PF_BI)){ // 1 MV
++            MvXY curr_mv0, neigh_mv0;
++
++            if (curr->pred_flag & 1) {
++                curr_mv0   = curr->xy[0];
++            } else {
++                curr_mv0   = curr->xy[1];
++                curr_refL0 = curr_refL1;
++            }
++
++            if (neigh->pred_flag & 1) {
++                neigh_mv0   = neigh->xy[0];
++            } else {
++                neigh_mv0   = neigh->xy[1];
++                neigh_refL0 = neigh_refL1;
++            }
++
++            if (curr_refL0 == neigh_refL0) {
++                if (FFABS(MV_X(curr_mv0) - MV_X(neigh_mv0)) >= 4 || FFABS(MV_Y(curr_mv0) - MV_Y(neigh_mv0)) >= 4)
++                    strength = 1;
++                else
++                    strength = 0;
++            } else
++                strength = 1;
++        } else
++            strength = 1;
++#else // This has exactly the same effect, but is more suitable for vectorisation
++        MvXY curr_mv[2];
++        MvXY neigh_mv[2];
++        memcpy(curr_mv, curr->xy, sizeof curr_mv);
++        memcpy(neigh_mv, neigh->xy, sizeof neigh_mv);
++
++        if (!(curr->pred_flag & 2)) {
++            curr_mv[1] = curr_mv[0];
++            curr_refL1 = curr_refL0;
++        }
++        if (!(neigh->pred_flag & 2)) {
++            neigh_mv[1] = neigh_mv[0];
++            neigh_refL1 = neigh_refL0;
++        }
++        if (!(curr->pred_flag & 1)) {
++            curr_mv[0] = curr_mv[1];
++            curr_refL0 = curr_refL1;
++        }
++        if (!(neigh->pred_flag & 1)) {
++            neigh_mv[0] = neigh_mv[1];
++            neigh_refL0 = neigh_refL1;
++        }
++
++        strength = 1;
++
++        strength &= (neigh_refL0 != curr_refL0) | (neigh_refL1 != curr_refL1) |
++                (FFABS(MV_X(neigh_mv[0]) - MV_X(curr_mv[0])) >= 4) | (FFABS(MV_Y(neigh_mv[0]) - MV_Y(curr_mv[0])) >= 4) |
++                (FFABS(MV_X(neigh_mv[1]) - MV_X(curr_mv[1])) >= 4) | (FFABS(MV_Y(neigh_mv[1]) - MV_Y(curr_mv[1])) >= 4);
++
++        strength &= (neigh_refL1 != curr_refL0) | (neigh_refL0 != curr_refL1) |
++                (FFABS(MV_X(neigh_mv[1]) - MV_X(curr_mv[0])) >= 4) | (FFABS(MV_Y(neigh_mv[1]) - MV_Y(curr_mv[0])) >= 4) |
++                (FFABS(MV_X(neigh_mv[0]) - MV_X(curr_mv[1])) >= 4) | (FFABS(MV_Y(neigh_mv[0]) - MV_Y(curr_mv[1])) >= 4);
++
++        strength |= (((curr->pred_flag + 1) ^ (neigh->pred_flag + 1)) >> 2);
++#endif
++
++        curr += in_inc0 / sizeof (HEVCRpiMvField);
++        neigh += in_inc1 / sizeof (HEVCRpiMvField);
++
++        for (out = dup; out > 0; out--)
++        {
++            bs = (bs >> 2) | (strength << 30);
++            shift -= 2;
++        }
++    }
++    return bs >> shift;
++}
++
++
++static void cpy_blk(uint8_t *dst, unsigned int stride_dst, const uint8_t *src, unsigned stride_src, unsigned int width, unsigned int height)
++{
++    unsigned int i, j;
++
++    if (((intptr_t)dst | (intptr_t)src | stride_dst | stride_src) & 15) {
++        for (i = 0; i < height; i++) {
++            for (j = 0; j < width; j+=8)
++                AV_COPY64U(dst+j, src+j);
++            dst += stride_dst;
++            src += stride_src;
++        }
++    } else {
++        for (i = 0; i < height; i++) {
++            for (j = 0; j < width; j+=16)
++                AV_COPY128(dst+j, src+j);
++            dst += stride_dst;
++            src += stride_src;
++        }
++    }
++}
++
++
++
++void ff_hevc_rpi_dsp_init(HEVCDSPContext *hevcdsp, int bit_depth)
++{
++#undef FUNC
++#define FUNC(a, depth) a ## _ ## depth
++
++#undef PEL_FUNC
++#define PEL_FUNC(dst1, idx1, idx2, a, depth)                                   \
++    for(i = 0 ; i < 10 ; i++)                                                  \
++{                                                                              \
++    hevcdsp->dst1[i][idx1][idx2] = a ## _ ## depth;                            \
++}
++
++#undef EPEL_FUNCS
++#define EPEL_FUNCS(depth)                                                     \
++    PEL_FUNC(put_hevc_epel, 0, 0, put_hevc_pel_pixels, depth);                \
++    PEL_FUNC(put_hevc_epel, 0, 1, put_hevc_epel_h, depth);                    \
++    PEL_FUNC(put_hevc_epel, 1, 0, put_hevc_epel_v, depth);                    \
++    PEL_FUNC(put_hevc_epel, 1, 1, put_hevc_epel_hv, depth)
++
++#undef EPEL_UNI_FUNCS
++#define EPEL_UNI_FUNCS(depth)                                                 \
++    PEL_FUNC(put_hevc_epel_uni, 0, 0, put_hevc_pel_uni_pixels, depth);        \
++    PEL_FUNC(put_hevc_epel_uni, 0, 1, put_hevc_epel_uni_h, depth);            \
++    PEL_FUNC(put_hevc_epel_uni, 1, 0, put_hevc_epel_uni_v, depth);            \
++    PEL_FUNC(put_hevc_epel_uni, 1, 1, put_hevc_epel_uni_hv, depth);           \
++    PEL_FUNC(put_hevc_epel_uni_w, 0, 0, put_hevc_pel_uni_w_pixels, depth);    \
++    PEL_FUNC(put_hevc_epel_uni_w, 0, 1, put_hevc_epel_uni_w_h, depth);        \
++    PEL_FUNC(put_hevc_epel_uni_w, 1, 0, put_hevc_epel_uni_w_v, depth);        \
++    PEL_FUNC(put_hevc_epel_uni_w, 1, 1, put_hevc_epel_uni_w_hv, depth)
++
++#undef EPEL_BI_FUNCS
++#define EPEL_BI_FUNCS(depth)                                                \
++    PEL_FUNC(put_hevc_epel_bi, 0, 0, put_hevc_pel_bi_pixels, depth);        \
++    PEL_FUNC(put_hevc_epel_bi, 0, 1, put_hevc_epel_bi_h, depth);            \
++    PEL_FUNC(put_hevc_epel_bi, 1, 0, put_hevc_epel_bi_v, depth);            \
++    PEL_FUNC(put_hevc_epel_bi, 1, 1, put_hevc_epel_bi_hv, depth);           \
++    PEL_FUNC(put_hevc_epel_bi_w, 0, 0, put_hevc_pel_bi_w_pixels, depth);    \
++    PEL_FUNC(put_hevc_epel_bi_w, 0, 1, put_hevc_epel_bi_w_h, depth);        \
++    PEL_FUNC(put_hevc_epel_bi_w, 1, 0, put_hevc_epel_bi_w_v, depth);        \
++    PEL_FUNC(put_hevc_epel_bi_w, 1, 1, put_hevc_epel_bi_w_hv, depth)
++
++#undef QPEL_FUNCS
++#define QPEL_FUNCS(depth)                                                     \
++    PEL_FUNC(put_hevc_qpel, 0, 0, put_hevc_pel_pixels, depth);                \
++    PEL_FUNC(put_hevc_qpel, 0, 1, put_hevc_qpel_h, depth);                    \
++    PEL_FUNC(put_hevc_qpel, 1, 0, put_hevc_qpel_v, depth);                    \
++    PEL_FUNC(put_hevc_qpel, 1, 1, put_hevc_qpel_hv, depth)
++
++#undef QPEL_UNI_FUNCS
++#define QPEL_UNI_FUNCS(depth)                                                 \
++    PEL_FUNC(put_hevc_qpel_uni, 0, 0, put_hevc_pel_uni_pixels, depth);        \
++    PEL_FUNC(put_hevc_qpel_uni, 0, 1, put_hevc_qpel_uni_h, depth);            \
++    PEL_FUNC(put_hevc_qpel_uni, 1, 0, put_hevc_qpel_uni_v, depth);            \
++    PEL_FUNC(put_hevc_qpel_uni, 1, 1, put_hevc_qpel_uni_hv, depth);           \
++    PEL_FUNC(put_hevc_qpel_uni_w, 0, 0, put_hevc_pel_uni_w_pixels, depth);    \
++    PEL_FUNC(put_hevc_qpel_uni_w, 0, 1, put_hevc_qpel_uni_w_h, depth);        \
++    PEL_FUNC(put_hevc_qpel_uni_w, 1, 0, put_hevc_qpel_uni_w_v, depth);        \
++    PEL_FUNC(put_hevc_qpel_uni_w, 1, 1, put_hevc_qpel_uni_w_hv, depth)
++
++#undef QPEL_BI_FUNCS
++#define QPEL_BI_FUNCS(depth)                                                  \
++    PEL_FUNC(put_hevc_qpel_bi, 0, 0, put_hevc_pel_bi_pixels, depth);          \
++    PEL_FUNC(put_hevc_qpel_bi, 0, 1, put_hevc_qpel_bi_h, depth);              \
++    PEL_FUNC(put_hevc_qpel_bi, 1, 0, put_hevc_qpel_bi_v, depth);              \
++    PEL_FUNC(put_hevc_qpel_bi, 1, 1, put_hevc_qpel_bi_hv, depth);             \
++    PEL_FUNC(put_hevc_qpel_bi_w, 0, 0, put_hevc_pel_bi_w_pixels, depth);      \
++    PEL_FUNC(put_hevc_qpel_bi_w, 0, 1, put_hevc_qpel_bi_w_h, depth);          \
++    PEL_FUNC(put_hevc_qpel_bi_w, 1, 0, put_hevc_qpel_bi_w_v, depth);          \
++    PEL_FUNC(put_hevc_qpel_bi_w, 1, 1, put_hevc_qpel_bi_w_hv, depth)
++
++#define SLICED_ADD_RESIDUAL(depth)\
++    hevcdsp->add_residual_u[0]      = FUNC(add_residual4x4_u, depth);         \
++    hevcdsp->add_residual_u[1]      = FUNC(add_residual8x8_u, depth);         \
++    hevcdsp->add_residual_u[2]      = FUNC(add_residual16x16_u, depth);       \
++    hevcdsp->add_residual_u[3]      = FUNC(add_residual32x32_u, depth);       \
++    hevcdsp->add_residual_v[0]      = FUNC(add_residual4x4_v, depth);         \
++    hevcdsp->add_residual_v[1]      = FUNC(add_residual8x8_v, depth);         \
++    hevcdsp->add_residual_v[2]      = FUNC(add_residual16x16_v, depth);       \
++    hevcdsp->add_residual_v[3]      = FUNC(add_residual32x32_v, depth);       \
++    hevcdsp->add_residual_c[0]      = FUNC(add_residual4x4_c, depth);         \
++    hevcdsp->add_residual_c[1]      = FUNC(add_residual8x8_c, depth);         \
++    hevcdsp->add_residual_c[2]      = FUNC(add_residual16x16_c, depth);       \
++    hevcdsp->add_residual_c[3]      = FUNC(add_residual32x32_c, depth);       \
++    hevcdsp->add_residual_dc_c[0]   = FUNC(add_residual4x4_dc_c, depth);         \
++    hevcdsp->add_residual_dc_c[1]   = FUNC(add_residual8x8_dc_c, depth);         \
++    hevcdsp->add_residual_dc_c[2]   = FUNC(add_residual16x16_dc_c, depth);       \
++    hevcdsp->add_residual_dc_c[3]   = FUNC(add_residual32x32_dc_c, depth);       \
++    hevcdsp->put_pcm_c              = FUNC(put_pcm_c, depth)
++#define SLICED_LOOP_FILTERS(depth)\
++    hevcdsp->hevc_h_loop_filter_luma2 = FUNC(hevc_h_loop_filter_luma2, depth); \
++    hevcdsp->hevc_v_loop_filter_luma2 = FUNC(hevc_v_loop_filter_luma2, depth); \
++    hevcdsp->hevc_h_loop_filter_uv    = FUNC(hevc_h_loop_filter_uv, depth);    \
++    hevcdsp->hevc_v_loop_filter_uv2   = FUNC(hevc_v_loop_filter_uv2, depth)
++#define SLICED_SAO(depth)\
++    for (i = 0; i != SAO_FILTER_N; ++i) {                                     \
++        hevcdsp->sao_band_filter_c[i] = FUNC(sao_band_filter_c, depth);       \
++        hevcdsp->sao_edge_filter_c[i] = FUNC(sao_edge_filter_c, depth);       \
++    }                                                                         \
++    hevcdsp->sao_edge_restore_c[0] = FUNC(sao_edge_restore_c_0, depth);       \
++    hevcdsp->sao_edge_restore_c[1] = FUNC(sao_edge_restore_c_1, depth)
++
++#define HEVC_DSP(depth)                                                     \
++    hevcdsp->put_pcm                = FUNC(put_pcm, depth);                 \
++    hevcdsp->add_residual[0]        = FUNC(add_residual4x4, depth);         \
++    hevcdsp->add_residual[1]        = FUNC(add_residual8x8, depth);         \
++    hevcdsp->add_residual[2]        = FUNC(add_residual16x16, depth);       \
++    hevcdsp->add_residual[3]        = FUNC(add_residual32x32, depth);       \
++    hevcdsp->add_residual_dc[0]     = FUNC(add_residual4x4_dc, depth);         \
++    hevcdsp->add_residual_dc[1]     = FUNC(add_residual8x8_dc, depth);         \
++    hevcdsp->add_residual_dc[2]     = FUNC(add_residual16x16_dc, depth);       \
++    hevcdsp->add_residual_dc[3]     = FUNC(add_residual32x32_dc, depth);       \
++    SLICED_ADD_RESIDUAL(depth);                                             \
++    hevcdsp->dequant                = FUNC(dequant, depth);                 \
++    hevcdsp->transform_rdpcm        = FUNC(transform_rdpcm, depth);         \
++    hevcdsp->transform_4x4_luma     = FUNC(transform_4x4_luma, depth);      \
++    hevcdsp->idct[0]                = FUNC(idct_4x4, depth);                \
++    hevcdsp->idct[1]                = FUNC(idct_8x8, depth);                \
++    hevcdsp->idct[2]                = FUNC(idct_16x16, depth);              \
++    hevcdsp->idct[3]                = FUNC(idct_32x32, depth);              \
++                                                                            \
++    hevcdsp->idct_dc[0]             = FUNC(idct_4x4_dc, depth);             \
++    hevcdsp->idct_dc[1]             = FUNC(idct_8x8_dc, depth);             \
++    hevcdsp->idct_dc[2]             = FUNC(idct_16x16_dc, depth);           \
++    hevcdsp->idct_dc[3]             = FUNC(idct_32x32_dc, depth);           \
++                                                                            \
++    for (i = 0; i != SAO_FILTER_N; ++i) {                                   \
++        hevcdsp->sao_band_filter[i] = FUNC(sao_band_filter, depth);         \
++        hevcdsp->sao_edge_filter[i] = FUNC(sao_edge_filter, depth);         \
++    }                                                                       \
++    hevcdsp->sao_edge_restore[0] = FUNC(sao_edge_restore_0, depth);            \
++    hevcdsp->sao_edge_restore[1] = FUNC(sao_edge_restore_1, depth);            \
++    SLICED_SAO(depth);                                                         \
++                                                                               \
++    QPEL_FUNCS(depth);                                                         \
++    QPEL_UNI_FUNCS(depth);                                                     \
++    QPEL_BI_FUNCS(depth);                                                      \
++    EPEL_FUNCS(depth);                                                         \
++    EPEL_UNI_FUNCS(depth);                                                     \
++    EPEL_BI_FUNCS(depth);                                                      \
++                                                                               \
++    SLICED_LOOP_FILTERS(depth);                                                \
++    hevcdsp->hevc_h_loop_filter_luma     = FUNC(hevc_h_loop_filter_luma, depth);   \
++    hevcdsp->hevc_v_loop_filter_luma     = FUNC(hevc_v_loop_filter_luma, depth);   \
++    hevcdsp->hevc_h_loop_filter_chroma   = FUNC(hevc_h_loop_filter_chroma, depth); \
++    hevcdsp->hevc_v_loop_filter_chroma   = FUNC(hevc_v_loop_filter_chroma, depth); \
++    hevcdsp->hevc_h_loop_filter_luma_c   = FUNC(hevc_h_loop_filter_luma, depth);   \
++    hevcdsp->hevc_v_loop_filter_luma_c   = FUNC(hevc_v_loop_filter_luma, depth);   \
++    hevcdsp->hevc_h_loop_filter_chroma_c = FUNC(hevc_h_loop_filter_chroma, depth); \
++    hevcdsp->hevc_v_loop_filter_chroma_c = FUNC(hevc_v_loop_filter_chroma, depth)
++int i = 0;
++
++    switch (bit_depth) {
++    case 9:
++        HEVC_DSP(9);
++        break;
++    case 10:
++        HEVC_DSP(10);
++        break;
++    case 12:
++        HEVC_DSP(12);
++        break;
++    default:
++        HEVC_DSP(8);
++        break;
++    }
++
++    hevcdsp->hevc_deblocking_boundary_strengths = hevc_deblocking_boundary_strengths;
++    hevcdsp->cpy_blk = cpy_blk;
++
++    if (ARCH_PPC)
++        ff_hevc_rpi_dsp_init_ppc(hevcdsp, bit_depth);
++    if (ARCH_X86)
++        ff_hevc_rpi_dsp_init_x86(hevcdsp, bit_depth);
++    if (ARCH_ARM)
++        ff_hevcdsp_rpi_init_arm(hevcdsp, bit_depth);
++    if (ARCH_MIPS)
++        ff_hevc_rpi_dsp_init_mips(hevcdsp, bit_depth);
++}
+--- /dev/null
++++ b/libavcodec/rpi_hevcdsp.h
+@@ -0,0 +1,177 @@
++/*
++ * HEVC video decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2013 - 2014 Pierre-Edouard Lepere
++ *
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_RPI_HEVCDSP_H
++#define AVCODEC_RPI_HEVCDSP_H
++
++#include "hevc.h"
++#include "get_bits.h"
++
++struct HEVCRpiMvField;
++
++#define MAX_PB_SIZE 64
++
++#define RPI_HEVC_SAO_BUF_STRIDE 160
++
++
++typedef struct RpiSAOParams {
++    uint8_t band_position[3];   ///< sao_band_position (Y,U,V)
++    uint8_t eo_class[3];        ///< sao_eo_class      (Y,U=V)
++    uint8_t type_idx[3];        ///< sao_type_idx      (Y,U=V)
++
++    int16_t offset_val[3][5];   ///<SaoOffsetVal       (Y,U,V)
++
++} RpiSAOParams;
++
++
++// This controls how many sao dsp functions there are
++// N=5 has width = 8, 16, 32, 48, 64
++// N=6 adds a function for width=24 (in fn array el 5 so existing code should
++// still work)
++#define SAO_FILTER_N 6
++
++
++typedef struct HEVCDSPContext {
++    void (*put_pcm)(uint8_t *_dst, ptrdiff_t _stride, int width, int height,
++                    struct GetBitContext *gb, int pcm_bit_depth);
++
++    void (*add_residual[4])(uint8_t *dst, int16_t *res, ptrdiff_t stride);
++    void (*add_residual_dc[4])(uint8_t *dst, ptrdiff_t stride, int dc);
++    void (*add_residual_u[4])(uint8_t *dst, const int16_t *res, ptrdiff_t stride, int dc_v);
++    void (*add_residual_v[4])(uint8_t *dst, const int16_t *res, ptrdiff_t stride, int dc_u);
++
++    void (*add_residual_c[4])(uint8_t *dst, const int16_t *res, ptrdiff_t stride);
++    void (*add_residual_dc_c[4])(uint8_t *dst, ptrdiff_t stride, int32_t dc_uv);
++    void (*put_pcm_c)(uint8_t *_dst, ptrdiff_t _stride, int width, int height,
++                    struct GetBitContext *gb, int pcm_bit_depth);
++
++    void (*dequant)(int16_t *coeffs, int16_t log2_size);
++
++    void (*transform_rdpcm)(int16_t *coeffs, int16_t log2_size, int mode);
++
++    void (*transform_4x4_luma)(int16_t *coeffs);
++
++    void (*idct[4])(int16_t *coeffs, int col_limit);
++
++    void (*idct_dc[4])(int16_t *coeffs);
++
++    void (*sao_band_filter[SAO_FILTER_N])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
++                               int16_t *sao_offset_val, int sao_left_class, int width, int height);
++    void (*sao_band_filter_c[SAO_FILTER_N])(uint8_t *_dst, const uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
++                               const int16_t *sao_offset_val_u, int sao_left_class_u,
++                               const int16_t *sao_offset_val_v, int sao_left_class_v,
++                               int width, int height);
++
++    /* implicit stride_src parameter has value of 2 * MAX_PB_SIZE + AV_INPUT_BUFFER_PADDING_SIZE */
++    void (*sao_edge_filter[SAO_FILTER_N])(uint8_t *_dst /* align 16 */, uint8_t *_src /* align 32 */, ptrdiff_t stride_dst,
++                               int16_t *sao_offset_val, int sao_eo_class, int width, int height);
++    void (*sao_edge_filter_c[SAO_FILTER_N])(uint8_t *_dst /* align 16 */, const uint8_t *_src /* align 32 */, ptrdiff_t stride_dst,
++                               const int16_t *sao_offset_val_u, const int16_t *sao_offset_val_v, int sao_eo_class, int width, int height);
++
++    void (*sao_edge_restore[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
++                                struct RpiSAOParams *sao, int *borders, int _width, int _height, int c_idx,
++                                uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge);
++    void (*sao_edge_restore_c[2])(uint8_t *_dst, uint8_t *_src, ptrdiff_t _stride_dst, ptrdiff_t _stride_src,
++                                struct RpiSAOParams *sao, int *borders, int _width, int _height, int c_idx,
++                                uint8_t *vert_edge, uint8_t *horiz_edge, uint8_t *diag_edge);
++
++    void (*put_hevc_qpel[10][2][2])(int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
++                                    int height, intptr_t mx, intptr_t my, int width);
++    void (*put_hevc_qpel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride,
++                                        int height, intptr_t mx, intptr_t my, int width);
++    void (*put_hevc_qpel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                          int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
++
++    void (*put_hevc_qpel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                       int16_t *src2,
++                                       int height, intptr_t mx, intptr_t my, int width);
++    void (*put_hevc_qpel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                         int16_t *src2,
++                                         int height, int denom, int wx0, int wx1,
++                                         int ox0, int ox1, intptr_t mx, intptr_t my, int width);
++    void (*put_hevc_epel[10][2][2])(int16_t *dst, uint8_t *src, ptrdiff_t srcstride,
++                                    int height, intptr_t mx, intptr_t my, int width);
++
++    void (*put_hevc_epel_uni[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                        int height, intptr_t mx, intptr_t my, int width);
++    void (*put_hevc_epel_uni_w[10][2][2])(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                          int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width);
++    void (*put_hevc_epel_bi[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                       int16_t *src2,
++                                       int height, intptr_t mx, intptr_t my, int width);
++    void (*put_hevc_epel_bi_w[10][2][2])(uint8_t *dst, ptrdiff_t dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                         int16_t *src2,
++                                         int height, int denom, int wx0, int ox0, int wx1,
++                                         int ox1, intptr_t mx, intptr_t my, int width);
++
++    void (*hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
++                                    int beta, int32_t *tc,
++                                    uint8_t *no_p, uint8_t *no_q);
++    void (*hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
++                                    int beta, int32_t *tc,
++                                    uint8_t *no_p, uint8_t *no_q);
++    void (*hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
++                                      int32_t *tc, uint8_t *no_p, uint8_t *no_q);
++    void (*hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
++                                      int32_t *tc, uint8_t *no_p, uint8_t *no_q);
++    void (*hevc_h_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
++                                      int beta, int32_t *tc,
++                                      uint8_t *no_p, uint8_t *no_q);
++    void (*hevc_v_loop_filter_luma_c)(uint8_t *pix, ptrdiff_t stride,
++                                      int beta, int32_t *tc,
++                                      uint8_t *no_p, uint8_t *no_q);
++    void (*hevc_h_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
++                                        int32_t *tc, uint8_t *no_p,
++                                        uint8_t *no_q);
++    void (*hevc_v_loop_filter_chroma_c)(uint8_t *pix, ptrdiff_t stride,
++                                        int32_t *tc, uint8_t *no_p,
++                                        uint8_t *no_q);
++    void (*hevc_h_loop_filter_luma2)(uint8_t * _pix_r,
++                                 unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f);
++    void (*hevc_v_loop_filter_luma2)(uint8_t * _pix_r,
++                                 unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f,
++                                 uint8_t * _pix_l);
++    void (*hevc_h_loop_filter_uv)(uint8_t * src, unsigned int stride, uint32_t tc4,
++                                 unsigned int no_f);
++    void (*hevc_v_loop_filter_uv2)(uint8_t * src_r, unsigned int stride, uint32_t tc4,
++                                 uint8_t * src_l,
++                                 unsigned int no_f);
++
++    uint32_t (*hevc_deblocking_boundary_strengths)(int pus, int dup, const struct HEVCRpiMvField *curr, const struct HEVCRpiMvField *neigh,
++                                               const int *curr_rpl0, const int *curr_rpl1, const int *neigh_rpl0, const int *neigh_rpl1,
++                                               int in_inc0, int inc_inc1);
++
++    void (* cpy_blk)(uint8_t * dst, unsigned int dst_stride, const uint8_t * src, unsigned int src_stride, unsigned int width, unsigned int height);
++} HEVCDSPContext;
++
++void ff_hevc_rpi_dsp_init(HEVCDSPContext *hpc, int bit_depth);
++
++extern const int8_t ff_hevc_rpi_epel_filters[7][4];
++extern const int8_t ff_hevc_rpi_qpel_filters[3][16];
++
++void ff_hevc_rpi_dsp_init_ppc(HEVCDSPContext *c, const int bit_depth);
++void ff_hevc_rpi_dsp_init_x86(HEVCDSPContext *c, const int bit_depth);
++void ff_hevcdsp_rpi_init_arm(HEVCDSPContext *c, const int bit_depth);
++void ff_hevc_rpi_dsp_init_mips(HEVCDSPContext *c, const int bit_depth);
++#endif /* AVCODEC_RPI_HEVCDSP_H */
+--- /dev/null
++++ b/libavcodec/rpi_hevcdsp_template.c
+@@ -0,0 +1,2279 @@
++/*
++ * HEVC video decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2018 John Cox for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "get_bits.h"
++#include "rpi_hevcdec.h"
++
++#include "bit_depth_template.c"
++#include "rpi_hevcdsp.h"
++
++#include "rpi_hevc_shader_template.h"
++
++static void FUNC(put_pcm)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
++                          GetBitContext *gb, int pcm_bit_depth)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++
++    stride /= sizeof(pixel);
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
++        dst += stride;
++    }
++}
++
++static void FUNC(put_pcm_c)(uint8_t *_dst, ptrdiff_t stride, int width, int height,
++                          GetBitContext *gb, int pcm_bit_depth)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++
++    stride /= sizeof(pixel);
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x*2] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
++        dst += stride;
++    }
++
++    dst = (pixel *)_dst + 1;
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x*2] = get_bits(gb, pcm_bit_depth) << (BIT_DEPTH - pcm_bit_depth);
++        dst += stride;
++    }
++}
++
++static av_always_inline void FUNC(add_residual)(uint8_t *_dst, int16_t *res,
++                                                ptrdiff_t stride, int size)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++
++    stride /= sizeof(pixel);
++
++    for (y = 0; y < size; y++) {
++        for (x = 0; x < size; x++) {
++            dst[x] = av_clip_pixel(dst[x] + *res);
++            res++;
++        }
++        dst += stride;
++    }
++}
++
++static av_always_inline void FUNC(add_residual_dc)(uint8_t *_dst, ptrdiff_t stride, const int dc, int size)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++
++    stride /= sizeof(pixel);
++
++    for (y = 0; y < size; y++) {
++        for (x = 0; x < size; x++) {
++            dst[x] = av_clip_pixel(dst[x] + dc);
++        }
++        dst += stride;
++    }
++}
++
++
++static av_always_inline void FUNC(add_residual_u)(uint8_t *_dst, const int16_t *res,
++                                                ptrdiff_t stride, const int dc_v, int size)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++
++    stride /= sizeof(pixel);
++
++    for (y = 0; y < size; y++) {
++        for (x = 0; x < size * 2; x += 2) {
++            dst[x] = av_clip_pixel(dst[x] + *res);
++            dst[x + 1] = av_clip_pixel(dst[x + 1] + dc_v);
++            res++;
++        }
++        dst += stride;
++    }
++}
++
++static av_always_inline void FUNC(add_residual_v)(uint8_t *_dst, const int16_t *res,
++                                                ptrdiff_t stride, const int dc_u, int size)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++
++    stride /= sizeof(pixel);
++
++    for (y = 0; y < size; y++) {
++        for (x = 0; x < size * 2; x += 2) {
++            dst[x] = av_clip_pixel(dst[x] + dc_u);
++            dst[x + 1] = av_clip_pixel(dst[x + 1] + *res);
++            res++;
++        }
++        dst += stride;
++    }
++}
++
++static av_always_inline void FUNC(add_residual_c)(uint8_t *_dst, const int16_t *res,
++                                                ptrdiff_t stride, unsigned int size)
++{
++    unsigned int x, y;
++    pixel *dst = (pixel *)_dst;
++    const int16_t * ru = res;
++    const int16_t * rv = res + size * size;
++
++//    rpi_sand_dump16("ARC In Pred", _dst, stride, 0, 0, 0, size, size, 1);
++//    rpi_sand_dump16("ARC In RU", ru, size * 2, 0, 0, 0, size, size, 0);
++//    rpi_sand_dump16("ARC In RV", rv, size * 2, 0, 0, 0, size, size, 0);
++
++    stride /= sizeof(pixel);
++
++    for (y = 0; y < size; y++) {
++        for (x = 0; x < size * 2; x += 2) {
++            dst[x + 0] = av_clip_pixel(dst[x + 0] + *ru++);
++            dst[x + 1] = av_clip_pixel(dst[x + 1] + *rv++);
++        }
++        dst += stride;
++    }
++
++//    rpi_sand_dump16("ARC Out", _dst, stride * 2, 0, 0, 0, size, size, 1);
++}
++
++
++static av_always_inline void FUNC(add_residual_dc_c)(uint8_t *_dst, ptrdiff_t stride, const int32_t dc, int size)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++    const int dc_v = dc >> 16;
++    const int dc_u = (dc << 16) >> 16;
++
++    stride /= sizeof(pixel);
++
++    for (y = 0; y < size; y++) {
++        for (x = 0; x < size * 2; x += 2) {
++            dst[x] = av_clip_pixel(dst[x] + dc_u);
++            dst[x + 1] = av_clip_pixel(dst[x + 1] + dc_v);
++        }
++        dst += stride;
++    }
++}
++
++
++static void FUNC(add_residual4x4)(uint8_t *_dst, int16_t *res,
++                                  ptrdiff_t stride)
++{
++    FUNC(add_residual)(_dst, res, stride, 4);
++}
++
++static void FUNC(add_residual8x8)(uint8_t *_dst, int16_t *res,
++                                  ptrdiff_t stride)
++{
++    FUNC(add_residual)(_dst, res, stride, 8);
++}
++
++static void FUNC(add_residual16x16)(uint8_t *_dst, int16_t *res,
++                                    ptrdiff_t stride)
++{
++    FUNC(add_residual)(_dst, res, stride, 16);
++}
++
++static void FUNC(add_residual32x32)(uint8_t *_dst, int16_t *res,
++                                    ptrdiff_t stride)
++{
++    FUNC(add_residual)(_dst, res, stride, 32);
++}
++
++static void FUNC(add_residual4x4_dc)(uint8_t *_dst, ptrdiff_t stride, int dc)
++{
++    FUNC(add_residual_dc)(_dst, stride, dc, 4);
++}
++
++static void FUNC(add_residual8x8_dc)(uint8_t *_dst, ptrdiff_t stride, int dc)
++{
++    FUNC(add_residual_dc)(_dst, stride, dc, 8);
++}
++
++static void FUNC(add_residual16x16_dc)(uint8_t *_dst, ptrdiff_t stride, int dc)
++{
++    FUNC(add_residual_dc)(_dst, stride, dc, 16);
++}
++
++static void FUNC(add_residual32x32_dc)(uint8_t *_dst, ptrdiff_t stride, int dc)
++{
++    FUNC(add_residual_dc)(_dst, stride, dc, 32);
++}
++
++// -- U -- (plaited)
++
++static void FUNC(add_residual4x4_u)(uint8_t *_dst, const int16_t * res,
++                                  ptrdiff_t stride, int dc_u)
++{
++    FUNC(add_residual_u)(_dst, res, stride, dc_u, 4);
++}
++
++static void FUNC(add_residual8x8_u)(uint8_t *_dst, const int16_t * res,
++                                  ptrdiff_t stride, int dc_u)
++{
++    FUNC(add_residual_u)(_dst, res, stride, dc_u, 8);
++}
++
++static void FUNC(add_residual16x16_u)(uint8_t *_dst, const int16_t * res,
++                                    ptrdiff_t stride, int dc_u)
++{
++    FUNC(add_residual_u)(_dst, res, stride, dc_u, 16);
++}
++
++static void FUNC(add_residual32x32_u)(uint8_t *_dst, const int16_t * res,
++                                    ptrdiff_t stride, int dc_u)
++{
++    // Should never occur for 420, which is all that sand supports
++    av_assert0(0);
++}
++
++// -- V -- (plaited)
++
++static void FUNC(add_residual4x4_v)(uint8_t *_dst, const int16_t * res,
++                                  ptrdiff_t stride, int dc_v)
++{
++    FUNC(add_residual_v)(_dst, res, stride, dc_v, 4);
++}
++
++static void FUNC(add_residual8x8_v)(uint8_t *_dst, const int16_t * res,
++                                  ptrdiff_t stride, int dc_v)
++{
++    FUNC(add_residual_v)(_dst, res, stride, dc_v, 8);
++}
++
++static void FUNC(add_residual16x16_v)(uint8_t *_dst, const int16_t * res,
++                                    ptrdiff_t stride, int dc_v)
++{
++    FUNC(add_residual_v)(_dst, res, stride, dc_v, 16);
++}
++
++static void FUNC(add_residual32x32_v)(uint8_t *_dst, const int16_t * res,
++                                    ptrdiff_t stride, int dc_v)
++{
++    // Should never occur for 420, which is all that sand supports
++    av_assert0(0);
++}
++
++// -- C -- (plaited - both U & V)
++
++static void FUNC(add_residual4x4_c)(uint8_t *_dst, const int16_t * res,
++                                  ptrdiff_t stride)
++{
++    FUNC(add_residual_c)(_dst, res, stride, 4);
++}
++
++static void FUNC(add_residual8x8_c)(uint8_t *_dst, const int16_t * res,
++                                  ptrdiff_t stride)
++{
++    FUNC(add_residual_c)(_dst, res, stride, 8);
++}
++
++static void FUNC(add_residual16x16_c)(uint8_t *_dst, const int16_t * res,
++                                    ptrdiff_t stride)
++{
++    FUNC(add_residual_c)(_dst, res, stride, 16);
++}
++
++static void FUNC(add_residual32x32_c)(uint8_t *_dst, const int16_t * res,
++                                    ptrdiff_t stride)
++{
++    // Should never occur for 420, which is all that sand supports
++    av_assert0(0);
++}
++
++static void FUNC(add_residual4x4_dc_c)(uint8_t *_dst, ptrdiff_t stride, int32_t dc)
++{
++    FUNC(add_residual_dc_c)(_dst, stride, dc, 4);
++}
++
++static void FUNC(add_residual8x8_dc_c)(uint8_t *_dst, ptrdiff_t stride, int32_t dc)
++{
++    FUNC(add_residual_dc_c)(_dst, stride, dc, 8);
++}
++
++static void FUNC(add_residual16x16_dc_c)(uint8_t *_dst, ptrdiff_t stride, int32_t dc)
++{
++    FUNC(add_residual_dc_c)(_dst, stride, dc, 16);
++}
++
++static void FUNC(add_residual32x32_dc_c)(uint8_t *_dst, ptrdiff_t stride, int32_t dc)
++{
++    // Should never occur for 420, which is all that sand supports
++    av_assert0(0);
++}
++
++
++static void FUNC(transform_rdpcm)(int16_t *_coeffs, int16_t log2_size, int mode)
++{
++    int16_t *coeffs = (int16_t *) _coeffs;
++    int x, y;
++    int size = 1 << log2_size;
++
++    if (mode) {
++        coeffs += size;
++        for (y = 0; y < size - 1; y++) {
++            for (x = 0; x < size; x++)
++                coeffs[x] += coeffs[x - size];
++            coeffs += size;
++        }
++    } else {
++        for (y = 0; y < size; y++) {
++            for (x = 1; x < size; x++)
++                coeffs[x] += coeffs[x - 1];
++            coeffs += size;
++        }
++    }
++}
++
++static void FUNC(dequant)(int16_t *coeffs, int16_t log2_size)
++{
++    int shift  = 15 - BIT_DEPTH - log2_size;
++    int x, y;
++    int size = 1 << log2_size;
++
++    if (shift > 0) {
++        int offset = 1 << (shift - 1);
++        for (y = 0; y < size; y++) {
++            for (x = 0; x < size; x++) {
++                *coeffs = (*coeffs + offset) >> shift;
++                coeffs++;
++            }
++        }
++    } else {
++        for (y = 0; y < size; y++) {
++            for (x = 0; x < size; x++) {
++                *coeffs = *coeffs << -shift;
++                coeffs++;
++            }
++        }
++    }
++}
++
++#define SET(dst, x)   (dst) = (x)
++#define SCALE(dst, x) (dst) = av_clip_int16(((x) + add) >> shift)
++
++#define TR_4x4_LUMA(dst, src, step, assign)                             \
++    do {                                                                \
++        int c0 = src[0 * step] + src[2 * step];                         \
++        int c1 = src[2 * step] + src[3 * step];                         \
++        int c2 = src[0 * step] - src[3 * step];                         \
++        int c3 = 74 * src[1 * step];                                    \
++                                                                        \
++        assign(dst[2 * step], 74 * (src[0 * step] -                     \
++                                    src[2 * step] +                     \
++                                    src[3 * step]));                    \
++        assign(dst[0 * step], 29 * c0 + 55 * c1 + c3);                  \
++        assign(dst[1 * step], 55 * c2 - 29 * c1 + c3);                  \
++        assign(dst[3 * step], 55 * c0 + 29 * c2 - c3);                  \
++    } while (0)
++
++static void FUNC(transform_4x4_luma)(int16_t *coeffs)
++{
++    int i;
++    int shift    = 7;
++    int add      = 1 << (shift - 1);
++    int16_t *src = coeffs;
++
++    for (i = 0; i < 4; i++) {
++        TR_4x4_LUMA(src, src, 4, SCALE);
++        src++;
++    }
++
++    shift = 20 - BIT_DEPTH;
++    add   = 1 << (shift - 1);
++    for (i = 0; i < 4; i++) {
++        TR_4x4_LUMA(coeffs, coeffs, 1, SCALE);
++        coeffs += 4;
++    }
++}
++
++#undef TR_4x4_LUMA
++
++#define TR_4(dst, src, dstep, sstep, assign, end)                 \
++    do {                                                          \
++        const int e0 = 64 * src[0 * sstep] + 64 * src[2 * sstep]; \
++        const int e1 = 64 * src[0 * sstep] - 64 * src[2 * sstep]; \
++        const int o0 = 83 * src[1 * sstep] + 36 * src[3 * sstep]; \
++        const int o1 = 36 * src[1 * sstep] - 83 * src[3 * sstep]; \
++                                                                  \
++        assign(dst[0 * dstep], e0 + o0);                          \
++        assign(dst[1 * dstep], e1 + o1);                          \
++        assign(dst[2 * dstep], e1 - o1);                          \
++        assign(dst[3 * dstep], e0 - o0);                          \
++    } while (0)
++
++#define TR_8(dst, src, dstep, sstep, assign, end)                 \
++    do {                                                          \
++        int i, j;                                                 \
++        int e_8[4];                                               \
++        int o_8[4] = { 0 };                                       \
++        for (i = 0; i < 4; i++)                                   \
++            for (j = 1; j < end; j += 2)                          \
++                o_8[i] += transform[4 * j][i] * src[j * sstep];   \
++        TR_4(e_8, src, 1, 2 * sstep, SET, 4);                     \
++                                                                  \
++        for (i = 0; i < 4; i++) {                                 \
++            assign(dst[i * dstep], e_8[i] + o_8[i]);              \
++            assign(dst[(7 - i) * dstep], e_8[i] - o_8[i]);        \
++        }                                                         \
++    } while (0)
++
++#define TR_16(dst, src, dstep, sstep, assign, end)                \
++    do {                                                          \
++        int i, j;                                                 \
++        int e_16[8];                                              \
++        int o_16[8] = { 0 };                                      \
++        for (i = 0; i < 8; i++)                                   \
++            for (j = 1; j < end; j += 2)                          \
++                o_16[i] += transform[2 * j][i] * src[j * sstep];  \
++        TR_8(e_16, src, 1, 2 * sstep, SET, 8);                    \
++                                                                  \
++        for (i = 0; i < 8; i++) {                                 \
++            assign(dst[i * dstep], e_16[i] + o_16[i]);            \
++            assign(dst[(15 - i) * dstep], e_16[i] - o_16[i]);     \
++        }                                                         \
++    } while (0)
++
++#define TR_32(dst, src, dstep, sstep, assign, end)                \
++    do {                                                          \
++        int i, j;                                                 \
++        int e_32[16];                                             \
++        int o_32[16] = { 0 };                                     \
++        for (i = 0; i < 16; i++)                                  \
++            for (j = 1; j < end; j += 2)                          \
++                o_32[i] += transform[j][i] * src[j * sstep];      \
++        TR_16(e_32, src, 1, 2 * sstep, SET, end / 2);             \
++                                                                  \
++        for (i = 0; i < 16; i++) {                                \
++            assign(dst[i * dstep], e_32[i] + o_32[i]);            \
++            assign(dst[(31 - i) * dstep], e_32[i] - o_32[i]);     \
++        }                                                         \
++    } while (0)
++
++#define IDCT_VAR4(H)                                              \
++    int limit2 = FFMIN(col_limit + 4, H)
++#define IDCT_VAR8(H)                                              \
++    int limit  = FFMIN(col_limit, H);                             \
++    int limit2 = FFMIN(col_limit + 4, H)
++#define IDCT_VAR16(H)   IDCT_VAR8(H)
++#define IDCT_VAR32(H)   IDCT_VAR8(H)
++
++#define IDCT(H)                                                   \
++static void FUNC(idct_ ## H ## x ## H )(int16_t *coeffs,          \
++                                        int col_limit)            \
++{                                                                 \
++    int i;                                                        \
++    int      shift = 7;                                           \
++    int      add   = 1 << (shift - 1);                            \
++    int16_t *src   = coeffs;                                      \
++    IDCT_VAR ## H(H);                                             \
++                                                                  \
++    for (i = 0; i < H; i++) {                                     \
++        TR_ ## H(src, src, H, H, SCALE, limit2);                  \
++        if (limit2 < H && i%4 == 0 && !!i)                        \
++            limit2 -= 4;                                          \
++        src++;                                                    \
++    }                                                             \
++                                                                  \
++    shift = 20 - BIT_DEPTH;                                       \
++    add   = 1 << (shift - 1);                                     \
++    for (i = 0; i < H; i++) {                                     \
++        TR_ ## H(coeffs, coeffs, 1, 1, SCALE, limit);             \
++        coeffs += H;                                              \
++    }                                                             \
++}
++
++#define IDCT_DC(H)                                                \
++static void FUNC(idct_ ## H ## x ## H ## _dc)(int16_t *coeffs)    \
++{                                                                 \
++    int i, j;                                                     \
++    int shift = 14 - BIT_DEPTH;                                   \
++    int add   = 1 << (shift - 1);                                 \
++    int coeff = (((coeffs[0] + 1) >> 1) + add) >> shift;          \
++                                                                  \
++    for (j = 0; j < H; j++) {                                     \
++        for (i = 0; i < H; i++) {                                 \
++            coeffs[i + j * H] = coeff;                            \
++        }                                                         \
++    }                                                             \
++}
++
++IDCT( 4)
++IDCT( 8)
++IDCT(16)
++IDCT(32)
++
++IDCT_DC( 4)
++IDCT_DC( 8)
++IDCT_DC(16)
++IDCT_DC(32)
++
++#undef TR_4
++#undef TR_8
++#undef TR_16
++#undef TR_32
++
++#undef SET
++#undef SCALE
++
++static void FUNC(sao_band_filter)(uint8_t *_dst, uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  int16_t *sao_offset_val, int sao_left_class,
++                                  int width, int height)
++{
++    pixel *dst = (pixel *)_dst;
++    pixel *src = (pixel *)_src;
++    int offset_table[32] = { 0 };
++    int k, y, x;
++    int shift  = BIT_DEPTH - 5;
++
++    stride_dst /= sizeof(pixel);
++    stride_src /= sizeof(pixel);
++
++    for (k = 0; k < 4; k++)
++        offset_table[(k + sao_left_class) & 31] = sao_offset_val[k + 1];
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(src[x] + offset_table[src[x] >> shift]);
++        dst += stride_dst;
++        src += stride_src;
++    }
++}
++
++#define CMP(a, b) (((a) > (b)) - ((a) < (b)))
++
++static void FUNC(sao_edge_filter)(uint8_t *_dst, uint8_t *_src, ptrdiff_t stride_dst, int16_t *sao_offset_val,
++                                  int eo, int width, int height) {
++
++    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
++    static const int8_t pos[4][2][2] = {
++        { { -1,  0 }, {  1, 0 } }, // horizontal
++        { {  0, -1 }, {  0, 1 } }, // vertical
++        { { -1, -1 }, {  1, 1 } }, // 45 degree
++        { {  1, -1 }, { -1, 1 } }, // 135 degree
++    };
++    pixel *dst = (pixel *)_dst;
++    pixel *src = (pixel *)_src;
++    int a_stride, b_stride;
++    int x, y;
++    const ptrdiff_t stride_src = RPI_HEVC_SAO_BUF_STRIDE / sizeof(pixel);
++    stride_dst /= sizeof(pixel);
++
++    a_stride = pos[eo][0][0] + pos[eo][0][1] * stride_src;
++    b_stride = pos[eo][1][0] + pos[eo][1][1] * stride_src;
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++) {
++            int diff0 = CMP(src[x], src[x + a_stride]);
++            int diff1 = CMP(src[x], src[x + b_stride]);
++            int offset_val        = edge_idx[2 + diff0 + diff1];
++            dst[x] = av_clip_pixel(src[x] + sao_offset_val[offset_val]);
++        }
++        src += stride_src;
++        dst += stride_dst;
++    }
++}
++
++
++#if BIT_DEPTH == 10
++// We need a 32 bit variation for the _c restores so hijack bit depth 10
++#undef pixel
++#undef BIT_DEPTH
++#define pixel uint32_t
++#define BIT_DEPTH 32
++// All 16 bit variations are the same
++#define sao_edge_restore_0_10 sao_edge_restore_0_9
++#define sao_edge_restore_1_10 sao_edge_restore_1_9
++#define sao_edge_restore_0_11 sao_edge_restore_0_9
++#define sao_edge_restore_1_11 sao_edge_restore_1_9
++#define sao_edge_restore_0_12 sao_edge_restore_0_9
++#define sao_edge_restore_1_12 sao_edge_restore_1_9
++#define sao_edge_restore_0_13 sao_edge_restore_0_9
++#define sao_edge_restore_1_13 sao_edge_restore_1_9
++#define sao_edge_restore_0_14 sao_edge_restore_0_9
++#define sao_edge_restore_1_14 sao_edge_restore_1_9
++#define sao_edge_restore_0_15 sao_edge_restore_0_9
++#define sao_edge_restore_1_15 sao_edge_restore_1_9
++#define sao_edge_restore_0_16 sao_edge_restore_0_9
++#define sao_edge_restore_1_16 sao_edge_restore_1_9
++#endif
++#if BIT_DEPTH <= 9 || BIT_DEPTH == 32
++static void FUNC(sao_edge_restore_0)(uint8_t *_dst, uint8_t *_src,
++                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, RpiSAOParams *sao,
++                                    int *borders, int _width, int _height,
++                                    int c_idx, uint8_t *vert_edge,
++                                    uint8_t *horiz_edge, uint8_t *diag_edge)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++    pixel *src = (pixel *)_src;
++    int sao_eo_class    = sao->eo_class[c_idx];
++    int init_x = 0, width = _width, height = _height;
++
++    stride_dst /= sizeof(pixel);
++    stride_src /= sizeof(pixel);
++
++    if (sao_eo_class != SAO_EO_VERT) {
++        if (borders[0]) {
++            for (y = 0; y < height; y++) {
++                dst[y * stride_dst] = src[y * stride_src];
++            }
++            init_x = 1;
++        }
++        if (borders[2]) {
++            int offset     = width - 1;
++            for (x = 0; x < height; x++) {
++                dst[x * stride_dst + offset] = src[x * stride_src + offset];
++            }
++            width--;
++        }
++    }
++    if (sao_eo_class != SAO_EO_HORIZ) {
++        if (borders[1]) {
++            for (x = init_x; x < width; x++)
++                dst[x] = src[x];
++        }
++        if (borders[3]) {
++            ptrdiff_t y_stride_dst = stride_dst * (height - 1);
++            ptrdiff_t y_stride_src = stride_src * (height - 1);
++            for (x = init_x; x < width; x++)
++                dst[x + y_stride_dst] = src[x + y_stride_src];
++            height--;
++        }
++    }
++}
++
++static void FUNC(sao_edge_restore_1)(uint8_t *_dst, uint8_t *_src,
++                                    ptrdiff_t stride_dst, ptrdiff_t stride_src, RpiSAOParams *sao,
++                                    int *borders, int _width, int _height,
++                                    int c_idx, uint8_t *vert_edge,
++                                    uint8_t *horiz_edge, uint8_t *diag_edge)
++{
++    int x, y;
++    pixel *dst = (pixel *)_dst;
++    pixel *src = (pixel *)_src;
++    int sao_eo_class    = sao->eo_class[c_idx];
++    int init_x = 0, init_y = 0, width = _width, height = _height;
++
++    stride_dst /= sizeof(pixel);
++    stride_src /= sizeof(pixel);
++
++    if (sao_eo_class != SAO_EO_VERT) {
++        if (borders[0]) {
++            for (y = 0; y < height; y++) {
++                dst[y * stride_dst] = src[y * stride_src];
++            }
++            init_x = 1;
++        }
++        if (borders[2]) {
++            int offset     = width - 1;
++            for (x = 0; x < height; x++) {
++                dst[x * stride_dst + offset] = src[x * stride_src + offset];
++            }
++            width--;
++        }
++    }
++    if (sao_eo_class != SAO_EO_HORIZ) {
++        if (borders[1]) {
++            for (x = init_x; x < width; x++)
++                dst[x] = src[x];
++            init_y = 1;
++        }
++        if (borders[3]) {
++            ptrdiff_t y_stride_dst = stride_dst * (height - 1);
++            ptrdiff_t y_stride_src = stride_src * (height - 1);
++            for (x = init_x; x < width; x++)
++                dst[x + y_stride_dst] = src[x + y_stride_src];
++            height--;
++        }
++    }
++
++    {
++        int save_upper_left  = !diag_edge[0] && sao_eo_class == SAO_EO_135D && !borders[0] && !borders[1];
++        int save_upper_right = !diag_edge[1] && sao_eo_class == SAO_EO_45D  && !borders[1] && !borders[2];
++        int save_lower_right = !diag_edge[2] && sao_eo_class == SAO_EO_135D && !borders[2] && !borders[3];
++        int save_lower_left  = !diag_edge[3] && sao_eo_class == SAO_EO_45D  && !borders[0] && !borders[3];
++
++        // Restore pixels that can't be modified
++        if(vert_edge[0] && sao_eo_class != SAO_EO_VERT) {
++            for(y = init_y+save_upper_left; y< height-save_lower_left; y++)
++                dst[y*stride_dst] = src[y*stride_src];
++        }
++        if(vert_edge[1] && sao_eo_class != SAO_EO_VERT) {
++            for(y = init_y+save_upper_right; y< height-save_lower_right; y++)
++                dst[y*stride_dst+width-1] = src[y*stride_src+width-1];
++        }
++
++        if(horiz_edge[0] && sao_eo_class != SAO_EO_HORIZ) {
++            for(x = init_x+save_upper_left; x < width-save_upper_right; x++)
++                dst[x] = src[x];
++        }
++        if(horiz_edge[1] && sao_eo_class != SAO_EO_HORIZ) {
++            for(x = init_x+save_lower_left; x < width-save_lower_right; x++)
++                dst[(height-1)*stride_dst+x] = src[(height-1)*stride_src+x];
++        }
++        if(diag_edge[0] && sao_eo_class == SAO_EO_135D)
++            dst[0] = src[0];
++        if(diag_edge[1] && sao_eo_class == SAO_EO_45D)
++            dst[width-1] = src[width-1];
++        if(diag_edge[2] && sao_eo_class == SAO_EO_135D)
++            dst[stride_dst*(height-1)+width-1] = src[stride_src*(height-1)+width-1];
++        if(diag_edge[3] && sao_eo_class == SAO_EO_45D)
++            dst[stride_dst*(height-1)] = src[stride_src*(height-1)];
++
++    }
++}
++#endif
++#if BIT_DEPTH == 32
++#undef BIT_DEPTH
++#undef pixel
++#define BIT_DEPTH 10
++#define pixel uint16_t
++#endif
++
++// --- Plaited chroma versions
++
++static void FUNC(sao_band_filter_c)(uint8_t *_dst, const uint8_t *_src,
++                                  ptrdiff_t stride_dst, ptrdiff_t stride_src,
++                                  const int16_t *sao_offset_val_u, int sao_left_class_u,
++                                  const int16_t *sao_offset_val_v, int sao_left_class_v,
++                                  int width, int height)
++{
++    pixel *dst = (pixel *)_dst;
++    pixel *src = (pixel *)_src;
++    int offset_table_u[32] = { 0 };
++    int offset_table_v[32] = { 0 };
++    int k, y, x;
++    int shift  = BIT_DEPTH - 5;
++
++    stride_dst /= sizeof(pixel);
++    stride_src /= sizeof(pixel);
++    width *= 2;
++
++    for (k = 0; k < 4; k++)
++    {
++        offset_table_u[(k + sao_left_class_u) & 31] = sao_offset_val_u[k + 1];
++        offset_table_v[(k + sao_left_class_v) & 31] = sao_offset_val_v[k + 1];
++    }
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x += 2)
++        {
++//            printf("dst=%p, src=%p, x=%d, shift=%d\n", dst, src, x, shift);
++//            printf("offsets=%x,%x\n", src[x + 0], src[x + 1]);
++            // *** & 31 shouldn't be wanted but just now we generate broken input that
++            // crashes us in 10-bit world
++            dst[x + 0] = av_clip_pixel(src[x + 0] + offset_table_u[(src[x + 0] >> shift) & 31]);
++            dst[x + 1] = av_clip_pixel(src[x + 1] + offset_table_v[(src[x + 1] >> shift) & 31]);
++        }
++        dst += stride_dst;
++        src += stride_src;
++    }
++}
++
++static void FUNC(sao_edge_filter_c)(uint8_t *_dst, const uint8_t *_src, ptrdiff_t stride_dst,
++                                  const int16_t *sao_offset_val_u, const int16_t *sao_offset_val_v,
++                                  int eo, int width, int height) {
++
++    static const uint8_t edge_idx[] = { 1, 2, 0, 3, 4 };
++    static const int8_t pos[4][2][2] = {
++        { { -1,  0 }, {  1, 0 } }, // horizontal
++        { {  0, -1 }, {  0, 1 } }, // vertical
++        { { -1, -1 }, {  1, 1 } }, // 45 degree
++        { {  1, -1 }, { -1, 1 } }, // 135 degree
++    };
++    pixel *dst = (pixel *)_dst;
++    pixel *src = (pixel *)_src;
++    int a_stride, b_stride;
++    int x, y;
++    const ptrdiff_t stride_src = RPI_HEVC_SAO_BUF_STRIDE / sizeof(pixel);
++
++    stride_dst /= sizeof(pixel);
++    width *= 2;
++
++    av_assert0(width <= 64);
++
++    a_stride = pos[eo][0][0] * 2 + pos[eo][0][1] * stride_src;
++    b_stride = pos[eo][1][0] * 2 + pos[eo][1][1] * stride_src;
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x += 2) {
++            int diff0u = CMP(src[x], src[x + a_stride]);
++            int diff1u = CMP(src[x], src[x + b_stride]);
++            int offset_valu        = edge_idx[2 + diff0u + diff1u];
++            int diff0v = CMP(src[x+1], src[x+1 + a_stride]);
++            int diff1v = CMP(src[x+1], src[x+1 + b_stride]);
++            int offset_valv        = edge_idx[2 + diff0v + diff1v];
++            dst[x] = av_clip_pixel(src[x] + sao_offset_val_u[offset_valu]);
++            dst[x+1] = av_clip_pixel(src[x+1] + sao_offset_val_v[offset_valv]);
++        }
++        src += stride_src;
++        dst += stride_dst;
++    }
++}
++
++// Do once
++#if BIT_DEPTH == 8
++// Any old 2 byte 'normal' restore will work for these
++#define sao_edge_restore_c_0_8  sao_edge_restore_0_16
++#define sao_edge_restore_c_1_8  sao_edge_restore_1_16
++// We need 32 bit for 9 bit+
++#define sao_edge_restore_c_0_9  sao_edge_restore_0_32
++#define sao_edge_restore_c_1_9  sao_edge_restore_1_32
++#define sao_edge_restore_c_0_10 sao_edge_restore_0_32
++#define sao_edge_restore_c_1_10 sao_edge_restore_1_32
++#define sao_edge_restore_c_0_11 sao_edge_restore_0_32
++#define sao_edge_restore_c_1_11 sao_edge_restore_1_32
++#define sao_edge_restore_c_0_12 sao_edge_restore_0_32
++#define sao_edge_restore_c_1_12 sao_edge_restore_1_32
++#define sao_edge_restore_c_0_13 sao_edge_restore_0_32
++#define sao_edge_restore_c_1_13 sao_edge_restore_1_32
++#define sao_edge_restore_c_0_14 sao_edge_restore_0_32
++#define sao_edge_restore_c_1_14 sao_edge_restore_1_32
++#define sao_edge_restore_c_0_15 sao_edge_restore_0_32
++#define sao_edge_restore_c_1_15 sao_edge_restore_1_32
++#define sao_edge_restore_c_0_16 sao_edge_restore_0_32
++#define sao_edge_restore_c_1_16 sao_edge_restore_1_32
++#endif
++
++#undef CMP
++
++////////////////////////////////////////////////////////////////////////////////
++//
++////////////////////////////////////////////////////////////////////////////////
++static void FUNC(put_hevc_pel_pixels)(int16_t *dst,
++                                      uint8_t *_src, ptrdiff_t _srcstride,
++                                      int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src          = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = src[x] << (14 - BIT_DEPTH);
++        src += srcstride;
++        dst += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_pel_uni_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                          int height, intptr_t mx, intptr_t my, int width)
++{
++    int y;
++    pixel *src          = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++
++    for (y = 0; y < height; y++) {
++        memcpy(dst, src, width * sizeof(pixel));
++        src += srcstride;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_pel_bi_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                         int16_t *src2,
++                                         int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src          = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++
++    int shift = 14  + 1 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((src[x] << (14 - BIT_DEPTH)) + src2[x] + offset) >> shift);
++        src  += srcstride;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_pel_uni_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                            int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src          = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    int shift = denom + 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    ox     = ox * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel((((src[x] << (14 - BIT_DEPTH)) * wx + offset) >> shift) + ox);
++        src += srcstride;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_pel_bi_w_pixels)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                           int16_t *src2,
++                                           int height, int denom, int wx0, int wx1,
++                                           int ox0, int ox1, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src          = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++
++    int shift = 14  + 1 - BIT_DEPTH;
++    int log2Wd = denom + shift - 1;
++
++    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
++    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++) {
++            dst[x] = av_clip_pixel(( (src[x] << (14 - BIT_DEPTH)) * wx1 + src2[x] * wx0 + (ox0 + ox1 + 1) * (1 << log2Wd)) >> (log2Wd + 1));
++        }
++        src  += srcstride;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++////////////////////////////////////////////////////////////////////////////////
++//
++////////////////////////////////////////////////////////////////////////////////
++#define QPEL_FILTER(src, stride)                                               \
++    (filter[0] * src[x - 3 * stride] +                                         \
++     filter[1] * src[x - 2 * stride] +                                         \
++     filter[2] * src[x -     stride] +                                         \
++     filter[3] * src[x             ] +                                         \
++     filter[4] * src[x +     stride] +                                         \
++     filter[5] * src[x + 2 * stride] +                                         \
++     filter[6] * src[x + 3 * stride] +                                         \
++     filter[7] * src[x + 4 * stride])
++
++static void FUNC(put_hevc_qpel_h)(int16_t *dst,
++                                  uint8_t *_src, ptrdiff_t _srcstride,
++                                  int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[mx - 1];
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        dst += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_qpel_v)(int16_t *dst,
++                                  uint8_t *_src, ptrdiff_t _srcstride,
++                                  int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[my - 1];
++    for (y = 0; y < height; y++)  {
++        for (x = 0; x < width; x++)
++            dst[x] = QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        dst += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_qpel_hv)(int16_t *dst,
++                                   uint8_t *_src,
++                                   ptrdiff_t _srcstride,
++                                   int height, intptr_t mx,
++                                   intptr_t my, int width)
++{
++    int x, y;
++    const int8_t *filter;
++    pixel *src = (pixel*)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++
++    src   -= QPEL_EXTRA_BEFORE * srcstride;
++    filter = ff_hevc_rpi_qpel_filters[mx - 1];
++    for (y = 0; y < height + QPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_qpel_filters[my - 1];
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
++        tmp += MAX_PB_SIZE;
++        dst += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_qpel_uni_h)(uint8_t *_dst,  ptrdiff_t _dststride,
++                                      uint8_t *_src, ptrdiff_t _srcstride,
++                                      int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[mx - 1];
++    int shift = 14 - BIT_DEPTH;
++
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
++        src += srcstride;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_qpel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                     int16_t *src2,
++                                     int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[mx - 1];
++
++    int shift = 14  + 1 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
++        src  += srcstride;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_qpel_uni_v)(uint8_t *_dst,  ptrdiff_t _dststride,
++                                     uint8_t *_src, ptrdiff_t _srcstride,
++                                     int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[my - 1];
++    int shift = 14 - BIT_DEPTH;
++
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
++        src += srcstride;
++        dst += dststride;
++    }
++}
++
++
++static void FUNC(put_hevc_qpel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                     int16_t *src2,
++                                     int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[my - 1];
++
++    int shift = 14 + 1 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
++        src  += srcstride;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_qpel_uni_hv)(uint8_t *_dst,  ptrdiff_t _dststride,
++                                       uint8_t *_src, ptrdiff_t _srcstride,
++                                       int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    const int8_t *filter;
++    pixel *src = (pixel*)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++    int shift =  14 - BIT_DEPTH;
++
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    src   -= QPEL_EXTRA_BEFORE * srcstride;
++    filter = ff_hevc_rpi_qpel_filters[mx - 1];
++    for (y = 0; y < height + QPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_qpel_filters[my - 1];
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
++        tmp += MAX_PB_SIZE;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_qpel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                      int16_t *src2,
++                                      int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    const int8_t *filter;
++    pixel *src = (pixel*)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++    int shift = 14 + 1 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    src   -= QPEL_EXTRA_BEFORE * srcstride;
++    filter = ff_hevc_rpi_qpel_filters[mx - 1];
++    for (y = 0; y < height + QPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_qpel_filters[my - 1];
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
++        tmp  += MAX_PB_SIZE;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_qpel_uni_w_h)(uint8_t *_dst,  ptrdiff_t _dststride,
++                                        uint8_t *_src, ptrdiff_t _srcstride,
++                                        int height, int denom, int wx, int ox,
++                                        intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[mx - 1];
++    int shift = denom + 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    ox = ox * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel((((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
++        src += srcstride;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_qpel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                       int16_t *src2,
++                                       int height, int denom, int wx0, int wx1,
++                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[mx - 1];
++
++    int shift = 14  + 1 - BIT_DEPTH;
++    int log2Wd = denom + shift - 1;
++
++    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
++    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
++                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
++        src  += srcstride;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_qpel_uni_w_v)(uint8_t *_dst,  ptrdiff_t _dststride,
++                                        uint8_t *_src, ptrdiff_t _srcstride,
++                                        int height, int denom, int wx, int ox,
++                                        intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[my - 1];
++    int shift = denom + 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    ox = ox * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel((((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
++        src += srcstride;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_qpel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                       int16_t *src2,
++                                       int height, int denom, int wx0, int wx1,
++                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel        *src       = (pixel*)_src;
++    ptrdiff_t     srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++
++    const int8_t *filter    = ff_hevc_rpi_qpel_filters[my - 1];
++
++    int shift = 14 + 1 - BIT_DEPTH;
++    int log2Wd = denom + shift - 1;
++
++    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
++    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
++                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
++        src  += srcstride;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_qpel_uni_w_hv)(uint8_t *_dst,  ptrdiff_t _dststride,
++                                         uint8_t *_src, ptrdiff_t _srcstride,
++                                         int height, int denom, int wx, int ox,
++                                         intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    const int8_t *filter;
++    pixel *src = (pixel*)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++    int shift = denom + 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    src   -= QPEL_EXTRA_BEFORE * srcstride;
++    filter = ff_hevc_rpi_qpel_filters[mx - 1];
++    for (y = 0; y < height + QPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_qpel_filters[my - 1];
++
++    ox = ox * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel((((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
++        tmp += MAX_PB_SIZE;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_qpel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                        int16_t *src2,
++                                        int height, int denom, int wx0, int wx1,
++                                        int ox0, int ox1, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    const int8_t *filter;
++    pixel *src = (pixel*)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    int16_t tmp_array[(MAX_PB_SIZE + QPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++    int shift = 14 + 1 - BIT_DEPTH;
++    int log2Wd = denom + shift - 1;
++
++    src   -= QPEL_EXTRA_BEFORE * srcstride;
++    filter = ff_hevc_rpi_qpel_filters[mx - 1];
++    for (y = 0; y < height + QPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = QPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp    = tmp_array + QPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_qpel_filters[my - 1];
++
++    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
++    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((QPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
++                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
++        tmp  += MAX_PB_SIZE;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++////////////////////////////////////////////////////////////////////////////////
++//
++////////////////////////////////////////////////////////////////////////////////
++#define EPEL_FILTER(src, stride)                                               \
++    (filter[0] * src[x - stride] +                                             \
++     filter[1] * src[x]          +                                             \
++     filter[2] * src[x + stride] +                                             \
++     filter[3] * src[x + 2 * stride])
++
++static void FUNC(put_hevc_epel_h)(int16_t *dst,
++                                  uint8_t *_src, ptrdiff_t _srcstride,
++                                  int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        dst += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_epel_v)(int16_t *dst,
++                                  uint8_t *_src, ptrdiff_t _srcstride,
++                                  int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[my - 1];
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        dst += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_epel_hv)(int16_t *dst,
++                                   uint8_t *_src, ptrdiff_t _srcstride,
++                                   int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++
++    src -= EPEL_EXTRA_BEFORE * srcstride;
++
++    for (y = 0; y < height + EPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_epel_filters[my - 1];
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6;
++        tmp += MAX_PB_SIZE;
++        dst += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_epel_uni_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                      int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int shift = 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + offset) >> shift);
++        src += srcstride;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_epel_bi_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                     int16_t *src2,
++                                     int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int shift = 14 + 1 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++) {
++            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
++        }
++        dst  += dststride;
++        src  += srcstride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_epel_uni_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                      int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[my - 1];
++    int shift = 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + offset) >> shift);
++        src += srcstride;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_epel_bi_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                     int16_t *src2,
++                                     int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[my - 1];
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    int shift = 14 + 1 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) + src2[x] + offset) >> shift);
++        dst  += dststride;
++        src  += srcstride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_epel_uni_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                       int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++    int shift = 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    src -= EPEL_EXTRA_BEFORE * srcstride;
++
++    for (y = 0; y < height + EPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_epel_filters[my - 1];
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + offset) >> shift);
++        tmp += MAX_PB_SIZE;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_epel_bi_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                      int16_t *src2,
++                                      int height, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++    int shift = 14 + 1 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    src -= EPEL_EXTRA_BEFORE * srcstride;
++
++    for (y = 0; y < height + EPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_epel_filters[my - 1];
++
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) + src2[x] + offset) >> shift);
++        tmp  += MAX_PB_SIZE;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_epel_uni_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                        int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int shift = denom + 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    ox     = ox * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++) {
++            dst[x] = av_clip_pixel((((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
++        }
++        dst += dststride;
++        src += srcstride;
++    }
++}
++
++static void FUNC(put_hevc_epel_bi_w_h)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                       int16_t *src2,
++                                       int height, int denom, int wx0, int wx1,
++                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int shift = 14 + 1 - BIT_DEPTH;
++    int log2Wd = denom + shift - 1;
++
++    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
++    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
++                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
++        src  += srcstride;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_epel_uni_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                        int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[my - 1];
++    int shift = denom + 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    ox     = ox * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++) {
++            dst[x] = av_clip_pixel((((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx + offset) >> shift) + ox);
++        }
++        dst += dststride;
++        src += srcstride;
++    }
++}
++
++static void FUNC(put_hevc_epel_bi_w_v)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                       int16_t *src2,
++                                       int height, int denom, int wx0, int wx1,
++                                       int ox0, int ox1, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride  = _srcstride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[my - 1];
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    int shift = 14 + 1 - BIT_DEPTH;
++    int log2Wd = denom + shift - 1;
++
++    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
++    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((EPEL_FILTER(src, srcstride) >> (BIT_DEPTH - 8)) * wx1 + src2[x] * wx0 +
++                                    ((ox0 + ox1 + 1) << log2Wd)) >> (log2Wd + 1));
++        src  += srcstride;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++static void FUNC(put_hevc_epel_uni_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                         int height, int denom, int wx, int ox, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++    int shift = denom + 14 - BIT_DEPTH;
++#if BIT_DEPTH < 14
++    int offset = 1 << (shift - 1);
++#else
++    int offset = 0;
++#endif
++
++    src -= EPEL_EXTRA_BEFORE * srcstride;
++
++    for (y = 0; y < height + EPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_epel_filters[my - 1];
++
++    ox     = ox * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel((((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx + offset) >> shift) + ox);
++        tmp += MAX_PB_SIZE;
++        dst += dststride;
++    }
++}
++
++static void FUNC(put_hevc_epel_bi_w_hv)(uint8_t *_dst, ptrdiff_t _dststride, uint8_t *_src, ptrdiff_t _srcstride,
++                                        int16_t *src2,
++                                        int height, int denom, int wx0, int wx1,
++                                        int ox0, int ox1, intptr_t mx, intptr_t my, int width)
++{
++    int x, y;
++    pixel *src = (pixel *)_src;
++    ptrdiff_t srcstride = _srcstride / sizeof(pixel);
++    pixel *dst          = (pixel *)_dst;
++    ptrdiff_t dststride = _dststride / sizeof(pixel);
++    const int8_t *filter = ff_hevc_rpi_epel_filters[mx - 1];
++    int16_t tmp_array[(MAX_PB_SIZE + EPEL_EXTRA) * MAX_PB_SIZE];
++    int16_t *tmp = tmp_array;
++    int shift = 14 + 1 - BIT_DEPTH;
++    int log2Wd = denom + shift - 1;
++
++    src -= EPEL_EXTRA_BEFORE * srcstride;
++
++    for (y = 0; y < height + EPEL_EXTRA; y++) {
++        for (x = 0; x < width; x++)
++            tmp[x] = EPEL_FILTER(src, 1) >> (BIT_DEPTH - 8);
++        src += srcstride;
++        tmp += MAX_PB_SIZE;
++    }
++
++    tmp      = tmp_array + EPEL_EXTRA_BEFORE * MAX_PB_SIZE;
++    filter = ff_hevc_rpi_epel_filters[my - 1];
++
++    ox0     = ox0 * (1 << (BIT_DEPTH - 8));
++    ox1     = ox1 * (1 << (BIT_DEPTH - 8));
++    for (y = 0; y < height; y++) {
++        for (x = 0; x < width; x++)
++            dst[x] = av_clip_pixel(((EPEL_FILTER(tmp, MAX_PB_SIZE) >> 6) * wx1 + src2[x] * wx0 +
++                                    ((ox0 + ox1 + 1) * (1 << log2Wd))) >> (log2Wd + 1));
++        tmp  += MAX_PB_SIZE;
++        dst  += dststride;
++        src2 += MAX_PB_SIZE;
++    }
++}
++
++// line zero
++#define P3 pix[-4 * xstride]
++#define P2 pix[-3 * xstride]
++#define P1 pix[-2 * xstride]
++#define P0 pix[-1 * xstride]
++#define Q0 pix[0 * xstride]
++#define Q1 pix[1 * xstride]
++#define Q2 pix[2 * xstride]
++#define Q3 pix[3 * xstride]
++
++// line three. used only for deblocking decision
++#define TP3 pix[-4 * xstride + 3 * ystride]
++#define TP2 pix[-3 * xstride + 3 * ystride]
++#define TP1 pix[-2 * xstride + 3 * ystride]
++#define TP0 pix[-1 * xstride + 3 * ystride]
++#define TQ0 pix[0  * xstride + 3 * ystride]
++#define TQ1 pix[1  * xstride + 3 * ystride]
++#define TQ2 pix[2  * xstride + 3 * ystride]
++#define TQ3 pix[3  * xstride + 3 * ystride]
++
++static void FUNC(hevc_loop_filter_luma)(uint8_t *_pix,
++                                        ptrdiff_t _xstride, ptrdiff_t _ystride,
++                                        int beta, int *_tc,
++                                        uint8_t *_no_p, uint8_t *_no_q)
++{
++    int d, j;
++    pixel *pix        = (pixel *)_pix;
++    ptrdiff_t xstride = _xstride / sizeof(pixel);
++    ptrdiff_t ystride = _ystride / sizeof(pixel);
++
++    beta <<= BIT_DEPTH - 8;
++
++    for (j = 0; j < 2; j++) {
++        const int dp0  = abs(P2  - 2 * P1  + P0);
++        const int dq0  = abs(Q2  - 2 * Q1  + Q0);
++        const int dp3  = abs(TP2 - 2 * TP1 + TP0);
++        const int dq3  = abs(TQ2 - 2 * TQ1 + TQ0);
++        const int d0   = dp0 + dq0;
++        const int d3   = dp3 + dq3;
++        const int tc   = _tc[j]   << (BIT_DEPTH - 8);
++        const int no_p = _no_p[j];
++        const int no_q = _no_q[j];
++
++        if (d0 + d3 >= beta) {
++            pix += 4 * ystride;
++            continue;
++        } else {
++            const int beta_3 = beta >> 3;
++            const int beta_2 = beta >> 2;
++            const int tc25   = ((tc * 5 + 1) >> 1);
++
++            if (abs(P3  -  P0) + abs(Q3  -  Q0) < beta_3 && abs(P0  -  Q0) < tc25 &&
++                abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
++                                      (d0 << 1) < beta_2 &&      (d3 << 1) < beta_2) {
++                // strong filtering
++                const int tc2 = tc << 1;
++                for (d = 0; d < 4; d++) {
++                    const int p3 = P3;
++                    const int p2 = P2;
++                    const int p1 = P1;
++                    const int p0 = P0;
++                    const int q0 = Q0;
++                    const int q1 = Q1;
++                    const int q2 = Q2;
++                    const int q3 = Q3;
++                    if (!no_p) {
++                        P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
++                        P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
++                        P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
++                    }
++                    if (!no_q) {
++                        Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
++                        Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
++                        Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
++                    }
++                    pix += ystride;
++                }
++            } else { // normal filtering
++                int nd_p = 1;
++                int nd_q = 1;
++                const int tc_2 = tc >> 1;
++                if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
++                    nd_p = 2;
++                if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
++                    nd_q = 2;
++
++                for (d = 0; d < 4; d++) {
++                    const int p2 = P2;
++                    const int p1 = P1;
++                    const int p0 = P0;
++                    const int q0 = Q0;
++                    const int q1 = Q1;
++                    const int q2 = Q2;
++                    int delta0   = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
++                    if (abs(delta0) < 10 * tc) {
++                        delta0 = av_clip(delta0, -tc, tc);
++                        if (!no_p)
++                            P0 = av_clip_pixel(p0 + delta0);
++                        if (!no_q)
++                            Q0 = av_clip_pixel(q0 - delta0);
++                        if (!no_p && nd_p > 1) {
++                            const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
++                            P1 = av_clip_pixel(p1 + deltap1);
++                        }
++                        if (!no_q && nd_q > 1) {
++                            const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
++                            Q1 = av_clip_pixel(q1 + deltaq1);
++                        }
++                    }
++                    pix += ystride;
++                }
++            }
++        }
++    }
++}
++
++static void FUNC(hevc_loop_filter_chroma)(uint8_t *_pix, ptrdiff_t _xstride,
++                                          ptrdiff_t _ystride, int *_tc,
++                                          uint8_t *_no_p, uint8_t *_no_q)
++{
++    int d, j, no_p, no_q;
++    pixel *pix        = (pixel *)_pix;
++    ptrdiff_t xstride = _xstride / sizeof(pixel);
++    ptrdiff_t ystride = _ystride / sizeof(pixel);
++
++    for (j = 0; j < 2; j++) {
++        const int tc = _tc[j] << (BIT_DEPTH - 8);
++        if (tc <= 0) {
++            pix += 4 * ystride;
++            continue;
++        }
++        no_p = _no_p[j];
++        no_q = _no_q[j];
++
++        for (d = 0; d < 4; d++) {
++            int delta0;
++            const int p1 = P1;
++            const int p0 = P0;
++            const int q0 = Q0;
++            const int q1 = Q1;
++            delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
++            if (!no_p)
++                P0 = av_clip_pixel(p0 + delta0);
++            if (!no_q)
++                Q0 = av_clip_pixel(q0 - delta0);
++            pix += ystride;
++        }
++    }
++}
++
++static void FUNC(hevc_h_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
++                                            int32_t *tc, uint8_t *no_p,
++                                            uint8_t *no_q)
++{
++    FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel), tc, no_p, no_q);
++}
++
++static void FUNC(hevc_v_loop_filter_chroma)(uint8_t *pix, ptrdiff_t stride,
++                                            int32_t *tc, uint8_t *no_p,
++                                            uint8_t *no_q)
++{
++    FUNC(hevc_loop_filter_chroma)(pix, sizeof(pixel), stride, tc, no_p, no_q);
++}
++
++static void FUNC(hevc_h_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
++                                          int beta, int32_t *tc, uint8_t *no_p,
++                                          uint8_t *no_q)
++{
++    FUNC(hevc_loop_filter_luma)(pix, stride, sizeof(pixel),
++                                beta, tc, no_p, no_q);
++}
++
++static void FUNC(hevc_v_loop_filter_luma)(uint8_t *pix, ptrdiff_t stride,
++                                          int beta, int32_t *tc, uint8_t *no_p,
++                                          uint8_t *no_q)
++{
++    FUNC(hevc_loop_filter_luma)(pix, sizeof(pixel), stride,
++                                beta, tc, no_p, no_q);
++}
++
++#undef P3
++#undef P2
++#undef P1
++#undef P0
++#undef Q0
++#undef Q1
++#undef Q2
++#undef Q3
++
++#undef TP3
++#undef TP2
++#undef TP1
++#undef TP0
++#undef TQ0
++#undef TQ1
++#undef TQ2
++#undef TQ3
++
++// line zero
++#define P3 pix_l[0 * xstride]
++#define P2 pix_l[1 * xstride]
++#define P1 pix_l[2 * xstride]
++#define P0 pix_l[3 * xstride]
++#define Q0 pix_r[0 * xstride]
++#define Q1 pix_r[1 * xstride]
++#define Q2 pix_r[2 * xstride]
++#define Q3 pix_r[3 * xstride]
++
++// line three. used only for deblocking decision
++#define TP3 pix_l[0 * xstride + 3 * ystride]
++#define TP2 pix_l[1 * xstride + 3 * ystride]
++#define TP1 pix_l[2 * xstride + 3 * ystride]
++#define TP0 pix_l[3 * xstride + 3 * ystride]
++#define TQ0 pix_r[0 * xstride + 3 * ystride]
++#define TQ1 pix_r[1 * xstride + 3 * ystride]
++#define TQ2 pix_r[2 * xstride + 3 * ystride]
++#define TQ3 pix_r[3 * xstride + 3 * ystride]
++
++// This is identical to hevc_loop_filter_luma except that the P/Q
++// components are on separate pointers
++static void FUNC(hevc_v_loop_filter_luma2)(uint8_t * _pix_r,
++                                 unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f,
++                                 uint8_t * _pix_l)
++{
++    int d, j;
++    pixel *pix_l        = (pixel *)_pix_l;
++    pixel *pix_r        = (pixel *)_pix_r;
++    const ptrdiff_t xstride = 1;
++    const ptrdiff_t ystride = _stride / sizeof(pixel);
++
++    beta <<= BIT_DEPTH - 8;
++
++    for (j = 0; j < 2; j++) {
++        const int dp0  = abs(P2  - 2 * P1  + P0);
++        const int dq0  = abs(Q2  - 2 * Q1  + Q0);
++        const int dp3  = abs(TP2 - 2 * TP1 + TP0);
++        const int dq3  = abs(TQ2 - 2 * TQ1 + TQ0);
++        const int d0   = dp0 + dq0;
++        const int d3   = dp3 + dq3;
++        const int tc   = ((tc2 >> (j << 4)) & 0xffff) << (BIT_DEPTH - 8);
++        const int no_p = no_f & 1;
++        const int no_q = no_f & 2;
++
++        if (d0 + d3 >= beta) {
++            pix_l += 4 * ystride;
++            pix_r += 4 * ystride;
++            continue;
++        } else {
++            const int beta_3 = beta >> 3;
++            const int beta_2 = beta >> 2;
++            const int tc25   = ((tc * 5 + 1) >> 1);
++
++            if (abs(P3  -  P0) + abs(Q3  -  Q0) < beta_3 && abs(P0  -  Q0) < tc25 &&
++                abs(TP3 - TP0) + abs(TQ3 - TQ0) < beta_3 && abs(TP0 - TQ0) < tc25 &&
++                                      (d0 << 1) < beta_2 &&      (d3 << 1) < beta_2) {
++                // strong filtering
++                const int tc2 = tc << 1;
++                for (d = 0; d < 4; d++) {
++                    const int p3 = P3;
++                    const int p2 = P2;
++                    const int p1 = P1;
++                    const int p0 = P0;
++                    const int q0 = Q0;
++                    const int q1 = Q1;
++                    const int q2 = Q2;
++                    const int q3 = Q3;
++                    if (!no_p) {
++                        P0 = p0 + av_clip(((p2 + 2 * p1 + 2 * p0 + 2 * q0 + q1 + 4) >> 3) - p0, -tc2, tc2);
++                        P1 = p1 + av_clip(((p2 + p1 + p0 + q0 + 2) >> 2) - p1, -tc2, tc2);
++                        P2 = p2 + av_clip(((2 * p3 + 3 * p2 + p1 + p0 + q0 + 4) >> 3) - p2, -tc2, tc2);
++                    }
++                    if (!no_q) {
++                        Q0 = q0 + av_clip(((p1 + 2 * p0 + 2 * q0 + 2 * q1 + q2 + 4) >> 3) - q0, -tc2, tc2);
++                        Q1 = q1 + av_clip(((p0 + q0 + q1 + q2 + 2) >> 2) - q1, -tc2, tc2);
++                        Q2 = q2 + av_clip(((2 * q3 + 3 * q2 + q1 + q0 + p0 + 4) >> 3) - q2, -tc2, tc2);
++                    }
++                    pix_l += ystride;
++                    pix_r += ystride;
++                }
++            } else { // normal filtering
++                int nd_p = 1;
++                int nd_q = 1;
++                const int tc_2 = tc >> 1;
++                if (dp0 + dp3 < ((beta + (beta >> 1)) >> 3))
++                    nd_p = 2;
++                if (dq0 + dq3 < ((beta + (beta >> 1)) >> 3))
++                    nd_q = 2;
++
++                for (d = 0; d < 4; d++) {
++                    const int p2 = P2;
++                    const int p1 = P1;
++                    const int p0 = P0;
++                    const int q0 = Q0;
++                    const int q1 = Q1;
++                    const int q2 = Q2;
++                    int delta0   = (9 * (q0 - p0) - 3 * (q1 - p1) + 8) >> 4;
++                    if (abs(delta0) < 10 * tc) {
++                        delta0 = av_clip(delta0, -tc, tc);
++                        if (!no_p)
++                            P0 = av_clip_pixel(p0 + delta0);
++                        if (!no_q)
++                            Q0 = av_clip_pixel(q0 - delta0);
++                        if (!no_p && nd_p > 1) {
++                            const int deltap1 = av_clip((((p2 + p0 + 1) >> 1) - p1 + delta0) >> 1, -tc_2, tc_2);
++                            P1 = av_clip_pixel(p1 + deltap1);
++                        }
++                        if (!no_q && nd_q > 1) {
++                            const int deltaq1 = av_clip((((q2 + q0 + 1) >> 1) - q1 - delta0) >> 1, -tc_2, tc_2);
++                            Q1 = av_clip_pixel(q1 + deltaq1);
++                        }
++                    }
++                    pix_l += ystride;
++                    pix_r += ystride;
++                }
++            }
++        }
++    }
++}
++
++static void FUNC(hevc_h_loop_filter_luma2)(uint8_t * _pix_r,
++                                 unsigned int _stride, unsigned int beta, unsigned int tc2, unsigned int no_f)
++{
++    // Just call the non-2 function having massaged the parameters
++    int32_t tc[2] = {tc2 & 0xffff, tc2 >> 16};
++    uint8_t no_p[2] = {no_f & 1, no_f & 1};
++    uint8_t no_q[2] = {no_f & 2, no_f & 2};
++    FUNC(hevc_h_loop_filter_luma)(_pix_r, _stride, beta, tc, no_p, no_q);
++}
++
++#undef TP3
++#undef TP2
++#undef TP1
++#undef TP0
++#undef TQ0
++#undef TQ1
++#undef TQ2
++#undef TQ3
++
++#undef P3
++#undef P2
++#undef P1
++#undef P0
++#undef Q0
++#undef Q1
++#undef Q2
++#undef Q3
++
++#define P1 pix_l[0 * xstride]
++#define P0 pix_l[1 * xstride]
++#define Q0 pix_r[0 * xstride]
++#define Q1 pix_r[1 * xstride]
++
++static void FUNC(hevc_loop_filter_uv2)(uint8_t *_pix_l, ptrdiff_t _xstride,
++                                          ptrdiff_t _ystride, const int32_t *_tc,
++                                          const uint8_t *_no_p, const uint8_t *_no_q, uint8_t *_pix_r)
++{
++    int d, j, no_p, no_q;
++    pixel *pix_l        = (pixel *)_pix_l;
++    pixel *pix_r        = (pixel *)_pix_r;
++    ptrdiff_t xstride = _xstride / sizeof(pixel);
++    ptrdiff_t ystride = _ystride / sizeof(pixel);
++
++    for (j = 0; j < 2; j++) {
++        const int tc = _tc[j] << (BIT_DEPTH - 8);
++        if (tc <= 0) {
++            pix_l += 4 * ystride;
++            pix_r += 4 * ystride;
++            continue;
++        }
++        no_p = _no_p[j];
++        no_q = _no_q[j];
++
++        for (d = 0; d < 4; d++) {
++            int delta0;
++            const int p1 = P1;
++            const int p0 = P0;
++            const int q0 = Q0;
++            const int q1 = Q1;
++            delta0 = av_clip((((q0 - p0) * 4) + p1 - q1 + 4) >> 3, -tc, tc);
++            if (!no_p)
++                P0 = av_clip_pixel(p0 + delta0);
++            if (!no_q)
++                Q0 = av_clip_pixel(q0 - delta0);
++            pix_l += ystride;
++            pix_r += ystride;
++        }
++    }
++}
++
++static void FUNC(hevc_h_loop_filter_uv)(uint8_t * pix, unsigned int stride, uint32_t tc4,
++                                 unsigned int no_f)
++{
++    uint8_t no_p[2] = {no_f & 1, no_f & 2};
++    uint8_t no_q[2] = {no_f & 4, no_f & 8};
++    int32_t tc[4] = {tc4 & 0xff, (tc4 >> 8) & 0xff, (tc4 >> 16) & 0xff, tc4 >> 24};
++    FUNC(hevc_loop_filter_chroma)(pix, stride, sizeof(pixel) * 2, tc, no_p, no_q);
++    FUNC(hevc_loop_filter_chroma)(pix + sizeof(pixel), stride, sizeof(pixel) * 2, tc + 2, no_p, no_q);
++}
++
++static void FUNC(hevc_v_loop_filter_uv2)(uint8_t * src_r, unsigned int stride, uint32_t tc4,
++                                 uint8_t * src_l,
++                                 unsigned int no_f)
++{
++    uint8_t no_p[2] = {no_f & 1, no_f & 2};
++    uint8_t no_q[2] = {no_f & 4, no_f & 8};
++    int32_t tc[4] = {tc4 & 0xff, (tc4 >> 8) & 0xff, (tc4 >> 16) & 0xff, tc4 >> 24};
++    FUNC(hevc_loop_filter_uv2)(src_l, sizeof(pixel) * 2, stride, tc, no_p, no_q, src_r);
++    FUNC(hevc_loop_filter_uv2)(src_l + sizeof(pixel), sizeof(pixel) * 2, stride, tc + 2, no_p, no_q, src_r + sizeof(pixel));
++}
++
++#undef P1
++#undef P0
++#undef Q0
++#undef Q1
++
+--- /dev/null
++++ b/libavcodec/rpi_hevcpred.c
+@@ -0,0 +1,161 @@
++/*
++ * HEVC video Decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ * Copyright (C) 2018 John Cox for Raspberry Pi (Trading)
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "rpi_hevcdec.h"
++
++#include "rpi_hevcpred.h"
++#if (ARCH_ARM)
++#include "arm/rpi_hevcpred_arm.h"
++#endif
++
++#define PRED_C 0
++#define BIT_DEPTH 8
++#include "rpi_hevcpred_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 9
++#include "rpi_hevcpred_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 10
++#include "rpi_hevcpred_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 12
++#include "rpi_hevcpred_template.c"
++#undef BIT_DEPTH
++#undef PRED_C
++
++#define PRED_C 1
++#define BIT_DEPTH 8
++#include "rpi_hevcpred_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 9
++#include "rpi_hevcpred_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 10
++#include "rpi_hevcpred_template.c"
++#undef BIT_DEPTH
++
++#define BIT_DEPTH 12
++#include "rpi_hevcpred_template.c"
++#undef BIT_DEPTH
++#undef PRED_C
++
++void ff_hevc_rpi_pred_init(HEVCRpiPredContext *hpc, int bit_depth)
++{
++#undef FUNC
++#define FUNC(a, depth) a ## _ ## depth
++
++#undef FUNCC
++#define FUNCC(a, depth) a ## _ ## depth ## _c
++
++#define HEVC_PRED_Y(depth)                                \
++    hpc->intra_pred      = FUNC(intra_pred, depth);     \
++    hpc->intra_filter[0] = FUNC(intra_filter_2, depth); \
++    hpc->intra_filter[1] = FUNC(intra_filter_3, depth); \
++    hpc->intra_filter[2] = FUNC(intra_filter_4, depth); \
++    hpc->intra_filter[3] = FUNC(intra_filter_5, depth); \
++    hpc->pred_planar[0]  = FUNC(pred_planar_0, depth);  \
++    hpc->pred_planar[1]  = FUNC(pred_planar_1, depth);  \
++    hpc->pred_planar[2]  = FUNC(pred_planar_2, depth);  \
++    hpc->pred_planar[3]  = FUNC(pred_planar_3, depth);  \
++    hpc->pred_dc[0]      = FUNC(pred_dc_0, depth);      \
++    hpc->pred_dc[1]      = FUNC(pred_dc_1, depth);      \
++    hpc->pred_dc[2]      = FUNC(pred_dc_2, depth);      \
++    hpc->pred_dc[3]      = FUNC(pred_dc_3, depth);      \
++    hpc->pred_vertical[0] = FUNC(pred_angular_0, depth); \
++    hpc->pred_vertical[1] = FUNC(pred_angular_1, depth); \
++    hpc->pred_vertical[2] = FUNC(pred_angular_2, depth); \
++    hpc->pred_vertical[3] = FUNC(pred_angular_3, depth); \
++    hpc->pred_horizontal[0] = FUNC(pred_angular_0, depth); \
++    hpc->pred_horizontal[1] = FUNC(pred_angular_1, depth); \
++    hpc->pred_horizontal[2] = FUNC(pred_angular_2, depth); \
++    hpc->pred_horizontal[3] = FUNC(pred_angular_3, depth); \
++    hpc->pred_angular[0] = FUNC(pred_angular_0, depth); \
++    hpc->pred_angular[1] = FUNC(pred_angular_1, depth); \
++    hpc->pred_angular[2] = FUNC(pred_angular_2, depth); \
++    hpc->pred_angular[3] = FUNC(pred_angular_3, depth); \
++    hpc->pred_dc0[0]     = FUNC(pred_dc0_0, depth);     \
++    hpc->pred_dc0[1]     = FUNC(pred_dc0_1, depth);     \
++    hpc->pred_dc0[2]     = FUNC(pred_dc0_2, depth);     \
++    hpc->pred_dc0[3]     = FUNC(pred_dc0_3, depth);
++
++#define HEVC_PRED_C(depth)                                \
++    hpc->intra_pred_c      = FUNCC(intra_pred, depth);     \
++	hpc->intra_filter_c[0] = FUNCC(intra_filter_2, depth); \
++	hpc->intra_filter_c[1] = FUNCC(intra_filter_3, depth); \
++	hpc->intra_filter_c[2] = FUNCC(intra_filter_4, depth); \
++	hpc->intra_filter_c[3] = FUNCC(intra_filter_5, depth); \
++    hpc->pred_planar_c[0]  = FUNCC(pred_planar_0, depth);  \
++    hpc->pred_planar_c[1]  = FUNCC(pred_planar_1, depth);  \
++    hpc->pred_planar_c[2]  = FUNCC(pred_planar_2, depth);  \
++    hpc->pred_planar_c[3]  = FUNCC(pred_planar_3, depth);  \
++    hpc->pred_dc_c[0]      = FUNCC(pred_dc_0, depth);      \
++    hpc->pred_dc_c[1]      = FUNCC(pred_dc_1, depth);      \
++    hpc->pred_dc_c[2]      = FUNCC(pred_dc_2, depth);      \
++    hpc->pred_dc_c[3]      = FUNCC(pred_dc_3, depth);      \
++    hpc->pred_vertical_c[0] = FUNCC(pred_angular_0, depth); \
++    hpc->pred_vertical_c[1] = FUNCC(pred_angular_1, depth); \
++    hpc->pred_vertical_c[2] = FUNCC(pred_angular_2, depth); \
++    hpc->pred_vertical_c[3] = FUNCC(pred_angular_3, depth); \
++    hpc->pred_horizontal_c[0] = FUNCC(pred_angular_0, depth); \
++    hpc->pred_horizontal_c[1] = FUNCC(pred_angular_1, depth); \
++    hpc->pred_horizontal_c[2] = FUNCC(pred_angular_2, depth); \
++    hpc->pred_horizontal_c[3] = FUNCC(pred_angular_3, depth); \
++    hpc->pred_angular_c[0] = FUNCC(pred_angular_0, depth); \
++    hpc->pred_angular_c[1] = FUNCC(pred_angular_1, depth); \
++    hpc->pred_angular_c[2] = FUNCC(pred_angular_2, depth); \
++    hpc->pred_angular_c[3] = FUNCC(pred_angular_3, depth); \
++    hpc->pred_dc0_c[0]     = FUNCC(pred_dc0_0, depth);     \
++    hpc->pred_dc0_c[1]     = FUNCC(pred_dc0_1, depth);     \
++    hpc->pred_dc0_c[2]     = FUNCC(pred_dc0_2, depth);     \
++    hpc->pred_dc0_c[3]     = FUNCC(pred_dc0_3, depth);
++
++#define HEVC_PRED(depth) \
++    HEVC_PRED_Y(depth); \
++    HEVC_PRED_C(depth);
++
++    switch (bit_depth) {
++    case 9:
++        HEVC_PRED(9);
++        break;
++    case 10:
++        HEVC_PRED(10);
++        break;
++    case 12:
++        HEVC_PRED(12);
++        break;
++    default:
++        HEVC_PRED(8);
++        break;
++    }
++
++#if (ARCH_ARM)
++    ff_hevc_rpi_pred_init_arm(hpc, bit_depth);
++#elif (ARCH_MIPS)
++    ff_hevc_rpi_pred_init_mips(hpc, bit_depth);
++#endif
++}
+--- /dev/null
++++ b/libavcodec/rpi_hevcpred.h
+@@ -0,0 +1,123 @@
++/*
++ * HEVC video Decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#ifndef AVCODEC_RPI_HEVCPRED_H
++#define AVCODEC_RPI_HEVCPRED_H
++
++#include <stddef.h>
++#include <stdint.h>
++#include "config.h"
++
++struct HEVCRpiContext;
++struct HEVCRpiLocalContext;
++
++enum IntraPredMode {
++    INTRA_PLANAR = 0,
++    INTRA_DC,
++    INTRA_ANGULAR_2,
++    INTRA_ANGULAR_3,
++    INTRA_ANGULAR_4,
++    INTRA_ANGULAR_5,
++    INTRA_ANGULAR_6,
++    INTRA_ANGULAR_7,
++    INTRA_ANGULAR_8,
++    INTRA_ANGULAR_9,
++    INTRA_ANGULAR_10,
++    INTRA_ANGULAR_11,
++    INTRA_ANGULAR_12,
++    INTRA_ANGULAR_13,
++    INTRA_ANGULAR_14,
++    INTRA_ANGULAR_15,
++    INTRA_ANGULAR_16,
++    INTRA_ANGULAR_17,
++    INTRA_ANGULAR_18,
++    INTRA_ANGULAR_19,
++    INTRA_ANGULAR_20,
++    INTRA_ANGULAR_21,
++    INTRA_ANGULAR_22,
++    INTRA_ANGULAR_23,
++    INTRA_ANGULAR_24,
++    INTRA_ANGULAR_25,
++    INTRA_ANGULAR_26,
++    INTRA_ANGULAR_27,
++    INTRA_ANGULAR_28,
++    INTRA_ANGULAR_29,
++    INTRA_ANGULAR_30,
++    INTRA_ANGULAR_31,
++    INTRA_ANGULAR_32,
++    INTRA_ANGULAR_33,
++    INTRA_ANGULAR_34,
++};
++#define INTRA_ANGULAR_HORIZONTAL INTRA_ANGULAR_10
++#define INTRA_ANGULAR_VERTICAL   INTRA_ANGULAR_26
++
++typedef void intra_filter_fn_t(
++        uint8_t * const left, uint8_t * const top,
++        const unsigned int req, const unsigned int avail,
++        const uint8_t * const src_l, const uint8_t * const src_u, const uint8_t * const src_ur,
++        const unsigned int stride,
++        const unsigned int top_right_size, const unsigned int down_left_size);
++
++typedef struct HEVCRpiPredContext {
++    void (*intra_pred)(const struct HEVCRpiContext * const s,
++                          const enum IntraPredMode mode, const unsigned int x0, const unsigned int y0,
++                          const unsigned int avail, const unsigned int log2_size);
++
++    intra_filter_fn_t *intra_filter[4];
++    void (*pred_planar[4])(uint8_t *src, const uint8_t *top,
++                           const uint8_t *left, ptrdiff_t stride);
++    void (*pred_dc[4])(uint8_t *src, const uint8_t *top, const uint8_t *left,
++                    ptrdiff_t stride);
++    void (*pred_angular[4])(uint8_t *src, const uint8_t *top,
++                            const uint8_t *left, ptrdiff_t stride,
++                            int mode);
++    void (*pred_vertical[4])(uint8_t *src, const uint8_t *top,
++                            const uint8_t *left, ptrdiff_t stride,
++                            int mode);
++    void (*pred_horizontal[4])(uint8_t *src, const uint8_t *top,
++                            const uint8_t *left, ptrdiff_t stride,
++                            int mode);
++    void (*pred_dc0[4])(uint8_t *src, ptrdiff_t stride);
++
++    void (*intra_pred_c)(const struct HEVCRpiContext * const s,
++                          const enum IntraPredMode mode, const unsigned int x0, const unsigned int y0,
++                          const unsigned int avail, const unsigned int log2_size);
++    intra_filter_fn_t *intra_filter_c[4];
++    void (*pred_planar_c[4])(uint8_t *src, const uint8_t *top,
++                           const uint8_t *left, ptrdiff_t stride);
++    void (*pred_dc_c[4])(uint8_t *src, const uint8_t *top, const uint8_t *left,
++                    ptrdiff_t stride);
++    void (*pred_angular_c[4])(uint8_t *src, const uint8_t *top,
++                            const uint8_t *left, ptrdiff_t stride,
++                            int mode);
++    void (*pred_vertical_c[4])(uint8_t *src, const uint8_t *top,
++                            const uint8_t *left, ptrdiff_t stride,
++                            int mode);
++    void (*pred_horizontal_c[4])(uint8_t *src, const uint8_t *top,
++                            const uint8_t *left, ptrdiff_t stride,
++                            int mode);
++    void (*pred_dc0_c[4])(uint8_t *src, ptrdiff_t stride);
++} HEVCRpiPredContext;
++
++void ff_hevc_rpi_pred_init(HEVCRpiPredContext *hpc, int bit_depth);
++
++#endif /* AVCODEC_RPI_HEVCPRED_H */
+--- /dev/null
++++ b/libavcodec/rpi_hevcpred_template.c
+@@ -0,0 +1,1407 @@
++/*
++ * HEVC video decoder
++ *
++ * Copyright (C) 2012 - 2013 Guillaume Martres
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++#include "config.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/rpi_sand_fns.h"
++#include "bit_depth_template.c"
++
++#include "rpi_hevcdec.h"
++#include "rpi_hevcpred.h"
++
++#define DUMP_PRED 0
++
++#define POS(x, y) src[(x) + stride * (y)]
++
++// INCLUDED_ONCE defined at EOF
++#ifndef INCLUDED_ONCE
++typedef uint8_t (* c8_dst_ptr_t)[2];
++typedef const uint8_t (* c8_src_ptr_t)[2];
++typedef uint16_t (* c16_dst_ptr_t)[2];
++typedef const uint16_t (* c16_src_ptr_t)[2];
++
++// *** On ARM make these NEON registers
++typedef struct pixel4_16 {
++    uint16_t x[4];
++} pixel4_16;
++typedef struct pixel4_32 {
++    uint32_t x[4];
++} pixel4_32;
++static inline pixel4_16 PIXEL_SPLAT_X4_16(const uint16_t x)
++{
++    pixel4_16 t = {{x, x, x, x}};
++    return t;
++}
++static inline pixel4_32 PIXEL_SPLAT_X4_32(const uint32_t x)
++{
++    pixel4_32 t = {{x, x, x, x}};
++    return t;
++}
++#endif
++
++#if PRED_C
++// For chroma we double pixel size so we copy pairs
++#undef pixel
++#undef pixel2
++#undef pixel4
++#undef dctcoef
++#undef INIT_CLIP
++#undef no_rnd_avg_pixel4
++#undef rnd_avg_pixel4
++#undef AV_RN2P
++#undef AV_RN4P
++#undef AV_RN4PA
++#undef AV_WN2P
++#undef AV_WN4P
++#undef AV_WN4PA
++#undef CLIP
++#undef FUNC
++#undef FUNCC
++#undef av_clip_pixel
++#undef PIXEL_SPLAT_X4
++
++#if BIT_DEPTH == 8
++#define pixel uint16_t
++#define pixel4 pixel4_16
++#define PIXEL_SPLAT_X4 PIXEL_SPLAT_X4_16
++#define cpel uint8_t
++#define c_src_ptr_t  c8_src_ptr_t
++#define c_dst_ptr_t  c8_dst_ptr_t
++#else
++#define pixel uint32_t
++#define pixel4 pixel4_32
++#define PIXEL_SPLAT_X4 PIXEL_SPLAT_X4_32
++#define cpel uint16_t
++#define c_src_ptr_t c16_dst_ptr_t
++#define c_dst_ptr_t c16_dst_ptr_t
++#endif
++#define AV_RN4P(p) (*(pixel4*)(p))
++#define AV_WN4P(p,x) (*(pixel4*)(p) = (x))
++#define FUNC(a) FUNC2(a, BIT_DEPTH, _c)
++#endif
++
++
++// Get PW prior to horrid PRED_C trickery
++#if BIT_DEPTH == 8
++#define PW 1
++#else
++#define PW 2
++#endif
++
++
++#if DUMP_PRED && !defined(INCLUDED_ONCE)
++static void dump_pred_uv(const uint8_t * data, const unsigned int stride, const unsigned int size)
++{
++    for (unsigned int y = 0; y != size; y++, data += stride * 2) {
++        for (unsigned int x = 0; x != size; x++) {
++            printf("%4d", data[x * 2]);
++        }
++        printf("\n");
++    }
++    printf("\n");
++}
++#endif
++
++#ifndef INCLUDED_ONCE
++static inline void extend_8(void * ptr, const unsigned int v, unsigned int n)
++{
++    if ((n >>= 2) != 0) {
++        uint32_t v4 = v | (v << 8);
++        uint32_t * p = (uint32_t *)ptr;
++        v4 = v4 | (v4 << 16);
++        do {
++            *p++ = v4;
++        } while (--n != 0);
++    }
++}
++
++static inline void extend_16(void * ptr, const unsigned int v, unsigned int n)
++{
++    if ((n >>= 2) != 0) {
++        uint32_t v2 = v | (v << 16);
++        uint32_t * p = (uint32_t *)ptr;
++        do {
++            *p++ = v2;
++            *p++ = v2;
++        } while (--n != 0);
++    }
++}
++
++static inline void extend_32(void * ptr, const unsigned int v, unsigned int n)
++{
++    if ((n >>= 2) != 0) {
++        uint32_t * p = (uint32_t *)ptr;
++        do {
++            *p++ = v;
++            *p++ = v;
++            *p++ = v;
++            *p++ = v;
++        } while (--n != 0);
++    }
++}
++
++// Beware that this inverts the avail ordering
++// For CIP it seems easier this way round
++static unsigned int cip_avail_l(const uint8_t * is_intra, const int i_stride, const unsigned int i_mask,
++                                const unsigned int log2_intra_bits, const unsigned int avail, unsigned int size,
++                              unsigned int s0, unsigned int odd_s)
++{
++    const unsigned int n = 1 << log2_intra_bits;
++    unsigned int fa = 0;
++    unsigned int i;
++
++    size >>= 2;   // Now in 4-pel units
++    s0 >>= 2;
++
++    if ((avail & AVAIL_DL) != 0)
++        fa |= ((1 << s0) - 1) << (size - s0);
++    if ((avail & AVAIL_L) != 0)
++        fa |= ((1 << size) - 1) << size;
++    if ((avail & AVAIL_UL) != 0)
++        fa |= 1 << (size << 1);
++
++    if (odd_s) {
++        if ((fa & 1) != 0 && (*is_intra & i_mask) == 0)
++            fa &= ~1;
++        is_intra += i_stride;
++    }
++
++    for (i = odd_s; (fa >> i) != 0; i += n, is_intra += i_stride) {
++        const unsigned int m = ((1 << n) - 1) << i;
++        if ((fa & m) != 0 && (*is_intra & i_mask) == 0)
++            fa &= ~m;
++    }
++
++    return fa;
++}
++
++static unsigned int cip_avail_u(const uint8_t * is_intra, unsigned int i_shift,
++                                const unsigned int log2_intra_bits, const unsigned int avail, unsigned int size,
++                                unsigned int s1, unsigned int odd_s)
++{
++    if ((avail & (AVAIL_U | AVAIL_UR)) == 0)
++    {
++        return 0;
++    }
++    else
++    {
++        const unsigned int n = 1 << log2_intra_bits;
++        unsigned int fa = 0;
++        unsigned int i;
++        unsigned int im = ((is_intra[1] << 8) | (is_intra[0])) >> i_shift;
++
++        size >>= 2;   // Now in 4-pel units
++        s1 >>= 2;
++
++        if ((avail & AVAIL_U) != 0)
++            fa |= ((1 << size) - 1);
++        if ((avail & AVAIL_UR) != 0)
++            fa |= ((1 << s1) - 1) << size;
++
++        if (odd_s) {
++            fa &= im | ~1;
++            im >>= 1;
++        }
++
++        for (i = odd_s; (fa >> i) != 0; i += n, im >>= 1) {
++            const unsigned int m = ((1 << n) - 1) << i;
++            if ((im & 1) == 0)
++                fa &= ~m;
++        }
++        return fa;
++    }
++}
++
++
++
++static inline unsigned int rmbd(unsigned int x)
++{
++#if 1
++    return __builtin_ctz(x);
++#else
++    unsigned int n = 0;
++    if ((x & 0xffff) == 0) {
++        x >>= 16;
++        n += 16;
++    }
++    if ((x & 0xff) == 0) {
++        x >>= 8;
++        n += 8;
++    }
++    if ((x & 0xf) == 0) {
++        x >>= 4;
++        n += 4;
++    }
++    if ((x & 0x3) == 0) {
++        x >>= 2;
++        n += 2;
++    }
++
++    return (x & 1) == 0 ? n + 1 : n;
++#endif
++}
++#endif
++
++
++static void FUNC(cip_fill)(pixel * const left, pixel * const top,
++    const unsigned int avail_l, const unsigned int avail_u,
++    const pixel * const src_l, const pixel * const src_u, const pixel * const src_ur,
++    const unsigned int stride,
++    const unsigned int size)
++{
++    pixel a;
++    unsigned int i;
++
++    // 1st find DL value
++    if ((avail_l & 1) == 0) {
++        if (avail_l != 0)
++            a = src_l[((int)size * 2 - 1 - (int)rmbd(avail_l)*4) * (int)stride];
++        else
++        {
++            // (avail_l | avail_u) != 0 so this must be good
++            const unsigned int n = rmbd(avail_u)*4;
++            a = (n >= size) ? src_ur[n - size] : src_u[n];
++        }
++    }
++
++    // L
++    {
++        pixel * d = left + size * 2 - 1;
++        const pixel * s = src_l + (size * 2 - 1) * stride;
++        unsigned int x = avail_l;
++        for (i = 0; i < size * 2; i += 4, x >>= 1)
++        {
++            if ((x & 1) != 0) {
++                // Avail
++                *d-- = *s;
++                s -= stride;
++                *d-- = *s;
++                s -= stride;
++                *d-- = *s;
++                s -= stride;
++                *d-- = a = *s;
++                s -= stride;
++            }
++            else
++            {
++                *d-- = a;
++                *d-- = a;
++                *d-- = a;
++                *d-- = a;
++                s -= stride * 4;
++            }
++        }
++        // UL
++        *d = a = (x & 1) != 0 ? *s : a;
++    }
++
++    // U
++    {
++        pixel * d = top;
++        const pixel * s = src_u;
++        unsigned int x = avail_u;
++
++        for (i = 0; i < size; i += 4, x >>= 1)
++        {
++            if ((x & 1) != 0) {
++                // Avail
++                *d++ = *s++;
++                *d++ = *s++;
++                *d++ = *s++;
++                *d++ = a = *s++;
++            }
++            else
++            {
++                *d++ = a;
++                *d++ = a;
++                *d++ = a;
++                *d++ = a;
++                s += 4;
++            }
++        }
++
++        // UR
++        s = src_ur;
++        for (i = 0; i < size; i += 4, x >>= 1)
++        {
++            if ((x & 1) != 0) {
++                // Avail
++                *d++ = *s++;
++                *d++ = *s++;
++                *d++ = *s++;
++                *d++ = a = *s++;
++            }
++            else
++            {
++                *d++ = a;
++                *d++ = a;
++                *d++ = a;
++                *d++ = a;
++                s += 4;
++            }
++        }
++    }
++}
++
++
++#if !PRED_C && PW == 1
++#define EXTEND(ptr, val, len) extend_8(ptr, val, len)
++#elif (!PRED_C && PW == 2) || (PRED_C && PW == 1)
++#define EXTEND(ptr, val, len) extend_16(ptr, val, len)
++#else
++#define EXTEND(ptr, val, len) extend_32(ptr, val, len)
++#endif
++
++// Reqs:
++//
++// Planar:  DL[0], L, ul, U, UR[0]
++// DC:         dl, L, ul, U, ur
++// A2-9:       DL, L, ul, u, ur
++// A10:        dl, L, ul, u, ur
++// A11-17      dl, L, UL, U, ur
++// A18-25      dl, L, Ul, U, ur
++// A26         dl, l, ul, U, ur
++// A27-34      dl, l, ul, U, UR
++
++#ifndef INCLUDED_ONCE
++
++intra_filter_fn_t ff_hevc_rpi_intra_filter_8_neon_8;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_4_neon_16;
++intra_filter_fn_t ff_hevc_rpi_intra_filter_8_neon_16;
++
++static const uint8_t req_avail_c[35] =
++{
++    AVAIL_DL | AVAIL_L | 0         |  AVAIL_U | AVAIL_UR,  // Planar (DL[0] & UR[0] only needed)
++               AVAIL_L | 0         |  AVAIL_U,             // DC
++    AVAIL_DL | AVAIL_L,                                    // 2
++    AVAIL_DL | AVAIL_L,                                    // 3
++    AVAIL_DL | AVAIL_L,                                    // 4
++    AVAIL_DL | AVAIL_L,                                    // 5
++    AVAIL_DL | AVAIL_L,                                    // 6
++    AVAIL_DL | AVAIL_L,                                    // 7
++    AVAIL_DL | AVAIL_L,                                    // 8
++    AVAIL_DL | AVAIL_L,                                    // 9
++               AVAIL_L,                                    // 10 (H)
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 11
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 12
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 13
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 14
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 15
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 16
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 17
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 18
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 19
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 20
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 21
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 22
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 23
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 24
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 25
++                                    AVAIL_U,               // 26 (V)
++                                    AVAIL_U | AVAIL_UR,    // 27
++                                    AVAIL_U | AVAIL_UR,    // 28
++                                    AVAIL_U | AVAIL_UR,    // 29
++                                    AVAIL_U | AVAIL_UR,    // 30
++                                    AVAIL_U | AVAIL_UR,    // 31
++                                    AVAIL_U | AVAIL_UR,    // 32
++                                    AVAIL_U | AVAIL_UR,    // 33
++                                    AVAIL_U | AVAIL_UR     // 34
++};
++
++static const uint8_t req_avail[4][35] = {
++{
++    AVAIL_DL | AVAIL_L | 0         |  AVAIL_U | AVAIL_UR,  // Planar (DL[0] & UR[0] only needed)
++               AVAIL_L | 0         |  AVAIL_U,             // DC
++    AVAIL_DL | AVAIL_L,                                    // 2
++    AVAIL_DL | AVAIL_L,                                    // 3
++    AVAIL_DL | AVAIL_L,                                    // 4
++    AVAIL_DL | AVAIL_L,                                    // 5
++    AVAIL_DL | AVAIL_L,                                    // 6
++    AVAIL_DL | AVAIL_L,                                    // 7
++    AVAIL_DL | AVAIL_L,                                    // 8
++    AVAIL_DL | AVAIL_L,                                    // 9
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 10 (H)
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 11
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 12
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 13
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 14
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 15
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 16
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 17
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 18
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 19
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 20
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 21
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 22
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 23
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 24
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 25
++               AVAIL_L | AVAIL_UL | AVAIL_U,               // 26 (V)
++                                    AVAIL_U | AVAIL_UR,    // 27
++                                    AVAIL_U | AVAIL_UR,    // 28
++                                    AVAIL_U | AVAIL_UR,    // 29
++                                    AVAIL_U | AVAIL_UR,    // 30
++                                    AVAIL_U | AVAIL_UR,    // 31
++                                    AVAIL_U | AVAIL_UR,    // 32
++                                    AVAIL_U | AVAIL_UR,    // 33
++                                    AVAIL_U | AVAIL_UR     // 34
++},
++{  // 3
++    AVAIL_DL | AVAIL_L | 0        | AVAIL_U | AVAIL_UR | FILTER_LIGHT,  // Planar (DL[0] & UR[0] only needed)
++               AVAIL_L | 0        | AVAIL_U,                            // DC
++    AVAIL_DL | AVAIL_L                                 | FILTER_LIGHT,  // 2
++    AVAIL_DL | AVAIL_L                                 | 0,             // 3
++    AVAIL_DL | AVAIL_L                                 | 0,             // 4
++    AVAIL_DL | AVAIL_L                                 | 0,             // 5
++    AVAIL_DL | AVAIL_L                                 | 0,             // 6
++    AVAIL_DL | AVAIL_L                                 | 0,             // 7
++    AVAIL_DL | AVAIL_L                                 | 0,             // 8
++    AVAIL_DL | AVAIL_L                                 | 0,             // 9
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 10 (H)
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 11
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 12
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 13
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 14
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 15
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 16
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 17
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 18
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 19
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 20
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 21
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 22
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 23
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 24
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 25
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 26 (V)
++                                    AVAIL_U | AVAIL_UR | 0,             // 27
++                                    AVAIL_U | AVAIL_UR | 0,             // 28
++                                    AVAIL_U | AVAIL_UR | 0,             // 29
++                                    AVAIL_U | AVAIL_UR | 0,             // 30
++                                    AVAIL_U | AVAIL_UR | 0,             // 31
++                                    AVAIL_U | AVAIL_UR | 0,             // 32
++                                    AVAIL_U | AVAIL_UR | 0,             // 33
++                                    AVAIL_U | AVAIL_UR | FILTER_LIGHT   // 34
++},
++{  // 4
++    AVAIL_DL | AVAIL_L | 0        | AVAIL_U | AVAIL_UR | FILTER_LIGHT,  // Planar (DL[0] & UR[0] only needed)
++               AVAIL_L | 0        | AVAIL_U,                            // DC
++    AVAIL_DL | AVAIL_L                                 | FILTER_LIGHT,  // 2
++    AVAIL_DL | AVAIL_L                                 | FILTER_LIGHT,  // 3
++    AVAIL_DL | AVAIL_L                                 | FILTER_LIGHT,  // 4
++    AVAIL_DL | AVAIL_L                                 | FILTER_LIGHT,  // 5
++    AVAIL_DL | AVAIL_L                                 | FILTER_LIGHT,  // 6
++    AVAIL_DL | AVAIL_L                                 | FILTER_LIGHT,  // 7
++    AVAIL_DL | AVAIL_L                                 | FILTER_LIGHT,  // 8
++    AVAIL_DL | AVAIL_L                                 | 0,             // 9
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 10 (H)
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 11
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 12
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 13
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 14
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 15
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 16
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 17
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 18
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 19
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 20
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 21
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 22
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 23
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_LIGHT,  // 24
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 25
++               AVAIL_L | AVAIL_UL | AVAIL_U            | 0,             // 26 (V)
++                                    AVAIL_U | AVAIL_UR | 0,             // 27
++                                    AVAIL_U | AVAIL_UR | FILTER_LIGHT,  // 28
++                                    AVAIL_U | AVAIL_UR | FILTER_LIGHT,  // 29
++                                    AVAIL_U | AVAIL_UR | FILTER_LIGHT,  // 30
++                                    AVAIL_U | AVAIL_UR | FILTER_LIGHT,  // 31
++                                    AVAIL_U | AVAIL_UR | FILTER_LIGHT,  // 32
++                                    AVAIL_U | AVAIL_UR | FILTER_LIGHT,  // 33
++                                    AVAIL_U | AVAIL_UR | FILTER_LIGHT   // 34
++},
++{  // 5
++    AVAIL_DL | AVAIL_L | 0        | AVAIL_U | AVAIL_UR | FILTER_EITHER, // Planar (DL[0] & UR[0] only needed)
++               AVAIL_L | 0        | AVAIL_U,                            // DC
++    AVAIL_DL | AVAIL_L                                 | FILTER_EITHER, // 2
++    AVAIL_DL | AVAIL_L                                 | FILTER_EITHER, // 3
++    AVAIL_DL | AVAIL_L                                 | FILTER_EITHER, // 4
++    AVAIL_DL | AVAIL_L                                 | FILTER_EITHER, // 5
++    AVAIL_DL | AVAIL_L                                 | FILTER_EITHER, // 6
++    AVAIL_DL | AVAIL_L                                 | FILTER_EITHER, // 7
++    AVAIL_DL | AVAIL_L                                 | FILTER_EITHER, // 8
++    AVAIL_DL | AVAIL_L                                 | FILTER_EITHER, // 9
++               AVAIL_L                                 | 0,             // 10 (H)
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 11
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 12
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 13
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 14
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 15
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 16
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 17
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 18
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 19
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 20
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 21
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 22
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 23
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 24
++               AVAIL_L | AVAIL_UL | AVAIL_U            | FILTER_EITHER, // 25
++                                    AVAIL_U            | 0,             // 26 (V)
++                                    AVAIL_U | AVAIL_UR | FILTER_EITHER, // 27
++                                    AVAIL_U | AVAIL_UR | FILTER_EITHER, // 28
++                                    AVAIL_U | AVAIL_UR | FILTER_EITHER, // 29
++                                    AVAIL_U | AVAIL_UR | FILTER_EITHER, // 30
++                                    AVAIL_U | AVAIL_UR | FILTER_EITHER, // 31
++                                    AVAIL_U | AVAIL_UR | FILTER_EITHER, // 32
++                                    AVAIL_U | AVAIL_UR | FILTER_EITHER, // 33
++                                    AVAIL_U | AVAIL_UR | FILTER_EITHER  // 34
++}
++};
++
++
++#endif
++
++#define filter_light1 FUNC(filter_light1)
++static inline pixel filter_light1(pixel a, pixel b, pixel c)
++{
++    return (a + b*2 + c + 2) >> 2;
++}
++
++#define filter_light FUNC(filter_light)
++static inline void filter_light(pixel * dst, pixel p1, const pixel * src, const pixel pn, const int sstride, const unsigned int n)
++{
++    pixel p0;
++    pixel p2 = *src;
++    // Allow for final pel - it is just clearer to to have the call take the actual number of output pels
++    unsigned int n_minus_1 = n - 1;
++
++    do
++    {
++        src += sstride;
++        p0 = p1;
++        p1 = p2;
++        p2 = *src;
++        *dst++ = filter_light1(p0, p1, p2);
++    } while (--n_minus_1 != 0);
++    *dst = filter_light1(p1, p2, pn);
++}
++
++#define filter_strong FUNC(filter_strong)
++static inline void filter_strong(pixel * dst, const unsigned int p0, const unsigned int p1, unsigned int n)
++{
++    unsigned int a = 64 * p0 + 32;
++    const int v = p1 - p0;
++
++    do
++    {
++        *dst++ = (a += v) >> 6;
++    } while (--n != 0);
++}
++
++#define intra_filter FUNC(intra_filter)
++static av_always_inline void intra_filter(
++    pixel * const left, pixel * const top,
++    const unsigned int req, const unsigned int avail,
++    const pixel * const src_l, const pixel * const src_u, const pixel * const src_ur,
++    const unsigned int stride,
++    const unsigned int top_right_size, const unsigned int down_left_size,
++    const unsigned int log2_size)
++{
++    const unsigned int strong_threshold = 1 << (BIT_DEPTH - 5);
++    const unsigned int size = 1 << log2_size;
++
++    // a_ is the first pel in a section working round dl -> ur
++    // b_ is the last
++    // Beware that top & left work out from UL so usage of a_ & b_ may
++    // swap between them.  It is a bad naming scheme but I have found no
++    // better
++    const pixel * a_dl = src_l + (down_left_size + size - 1) * stride;
++    const pixel * b_dl = src_l + size * stride;
++    const pixel * a_l  = src_l + (size - 1) * stride;
++    const pixel * b_l  = src_l;
++    const pixel * ab_ul = src_l - stride;
++    const pixel * a_u = src_u;
++    const pixel * b_u = src_u + size - 1;
++    const pixel * a_ur = src_ur;
++    const pixel * b_ur = src_ur + top_right_size - 1;
++
++    const unsigned int want = req & ~avail;
++    const unsigned int have = req & avail;
++    unsigned int i;
++
++    if ((avail & AVAIL_DL) == 0)
++    {
++        a_dl = a_ur;
++        if ((avail & AVAIL_U) != 0)
++            a_dl = a_u;
++        if ((avail & AVAIL_UL) != 0)
++            a_dl = ab_ul;
++        if ((avail & AVAIL_L) != 0)
++            a_dl = a_l;
++        b_dl = a_dl;
++    }
++
++    if ((avail & AVAIL_L) == 0)
++    {
++        a_l = b_dl;
++        b_l = b_dl;
++    }
++    if ((avail & AVAIL_UL) == 0)
++    {
++        ab_ul = b_l;
++    }
++    if ((avail & AVAIL_U) == 0)
++    {
++        a_u = ab_ul;
++        b_u = ab_ul;
++    }
++    if ((avail & AVAIL_UR) == 0)
++    {
++        a_ur = b_u;
++        b_ur = b_u;
++    }
++
++    if ((req & FILTER_LIGHT) == 0 || PRED_C || log2_size == 2)  // PRED_C, log2_size compiler opt hints
++    {
++        if ((req & AVAIL_UL) != 0)
++            left[-1] = *ab_ul;
++
++        if ((want & AVAIL_L) != 0)
++            EXTEND(left, *a_l, size);
++        if ((want & AVAIL_DL) != 0)
++            EXTEND(left + size, *a_dl, size);
++        if ((want & AVAIL_U) != 0)
++            EXTEND(top, *a_u, size);
++        if ((want & AVAIL_UR) != 0)
++            EXTEND(top + size, *a_ur, size);
++
++        if ((have & AVAIL_U) != 0)
++            // Always good - even with sand
++            memcpy(top, a_u, size * sizeof(pixel));
++        if ((have & AVAIL_UR) != 0)
++        {
++            memcpy(top + size, a_ur, top_right_size * sizeof(pixel));
++            EXTEND(top + size + top_right_size, *b_ur,
++                   size - top_right_size);
++        }
++        if ((have & AVAIL_L) != 0)
++        {
++            for (i = 0; i < size; i++)
++                left[i] = b_l[stride * i];
++        }
++        if ((have & AVAIL_DL) != 0)
++        {
++            for (i = 0; i < down_left_size; i++)
++                left[i + size] = b_dl[stride * i];
++            EXTEND(left + size + down_left_size, *a_dl,
++                   size - down_left_size);
++        }
++    }
++    else if ((req & FILTER_STRONG) != 0 && log2_size == 5 && // log2_size compiler opt hint
++            FFABS((int)(*a_dl - *a_l * 2 + *ab_ul)) < strong_threshold &&
++            FFABS((int)(*ab_ul - *b_u * 2 + *b_ur)) < strong_threshold)
++    {
++        if ((req & (AVAIL_U | AVAIL_UR)) != 0)
++            filter_strong(top, *ab_ul, *b_ur, size * 2);
++        left[-1] = *ab_ul;
++        if ((req & (AVAIL_L | AVAIL_DL)) != 0)
++            filter_strong(left, *ab_ul, *a_dl, size*2);
++    }
++    else
++    {
++        // Same code for both have & want for UL
++        if ((req & AVAIL_UL) != 0)
++        {
++            left[-1] = filter_light1(*b_l, *ab_ul, *a_u);
++        }
++
++        if ((want & AVAIL_L) != 0)
++        {
++            EXTEND(left, *a_l, size);
++            left[0] = (*a_l * 3 + *ab_ul + 2) >> 2;
++        }
++        if ((want & AVAIL_DL) != 0)
++        {
++            // If we want DL then it cannot be avail so a_dl = a_l so no edge rounding
++            EXTEND(left + size, *a_l, size);
++        }
++        if ((want & AVAIL_U) != 0)
++        {
++            EXTEND(top, *a_u, size);
++            top[size - 1] = (*a_u * 3 + *a_ur + 2) >> 2;
++        }
++        if ((want & AVAIL_UR) != 0)
++        {
++            // If we want UR then it cannot be avail so a_ur = b_u so no edge rounding
++            EXTEND(top + size, *a_ur, size);
++        }
++
++        if ((have & AVAIL_U) != 0)
++        {
++            filter_light(top, *ab_ul, a_u, *a_ur, 1, size);
++        }
++        if ((have & AVAIL_UR) != 0) {
++            filter_light(top + size, *b_u, a_ur, *b_ur, 1, top_right_size);
++            top[size*2 - 1] = *b_ur;
++            EXTEND(top + size + top_right_size, *b_ur, size - top_right_size);
++        }
++        if ((have & AVAIL_L) != 0)
++        {
++            filter_light(left, *ab_ul, b_l, *b_dl, stride, size);
++        }
++        if ((have & AVAIL_DL) != 0)
++        {
++            filter_light(left + size, *a_l, b_dl, *a_dl, stride, down_left_size);
++            left[size*2 - 1] = *a_dl;
++            EXTEND(left + size + down_left_size, *a_dl, size - down_left_size);
++        }
++    }
++}
++
++#define INTRA_FILTER(log2_size) \
++static void FUNC(intra_filter_ ## log2_size)( \
++     uint8_t * const left, uint8_t * const top, \
++     const unsigned int req, const unsigned int avail, \
++     const uint8_t * const src_l, const uint8_t * const src_u, const uint8_t * const src_ur, \
++     const unsigned int stride, \
++     const unsigned int top_right_size, const unsigned int down_left_size) \
++{ \
++    intra_filter((pixel *)left, (pixel *)top, req, avail, \
++        (const pixel *)src_l, (const pixel *)src_u, (const pixel *)src_ur, stride / sizeof(pixel), top_right_size, down_left_size, log2_size); \
++}
++
++INTRA_FILTER(2)
++INTRA_FILTER(3)
++INTRA_FILTER(4)
++INTRA_FILTER(5)
++
++#undef intra_filter
++#undef INTRA_FILTER
++
++static void FUNC(intra_pred)(const HEVCRpiContext * const s,
++                                              const enum IntraPredMode mode, const unsigned int x0, const unsigned int y0, const unsigned int avail,
++                                              const unsigned int log2_size)
++{
++    // c_idx will alaways be 1 for _c versions and 0 for y
++    const unsigned int c_idx = PRED_C;
++    const unsigned int hshift = ctx_hshift(s, c_idx);
++    const unsigned int vshift = ctx_vshift(s, c_idx);
++    const unsigned int size = (1 << log2_size);
++    const unsigned int x = x0 >> hshift;
++    const unsigned int y = y0 >> vshift;
++
++    const ptrdiff_t stride = frame_stride1(s->frame, c_idx) / sizeof(pixel);
++    pixel *const src = c_idx == 0 ?
++        (pixel *)av_rpi_sand_frame_pos_y(s->frame, x, y) :
++        (pixel *)av_rpi_sand_frame_pos_c(s->frame, x, y);
++
++    // Align so we can do multiple loads in the asm
++    // Padded to 16 byte boundary so as not to confuse anything
++    DECLARE_ALIGNED(16, pixel, top[2 * MAX_TB_SIZE]);
++    DECLARE_ALIGNED(16, pixel, left_array[2 * MAX_TB_SIZE + 16 / sizeof(pixel)]);
++
++    pixel  * const left  = left_array  + 16 / sizeof(pixel);
++    const pixel * top_pred = top;
++
++    const pixel * src_l = src - 1;
++    const pixel * src_u = src - stride;
++    const pixel * src_ur = src_u + size;
++#if !PRED_C
++    const unsigned int req = req_avail[log2_size - 2][mode] & ~s->ps.sps->intra_filters_disable;
++#else
++    const unsigned int req = req_avail_c[mode];
++#endif
++
++    // If we have nothing to pred from then fill with grey
++    // This isn't a common case but dealing with it here means we don't have to
++    // test for it later
++    if (avail == 0)
++    {
++dc_only:
++#if !PRED_C
++        s->hpc.pred_dc0[log2_size - 2]((uint8_t *)src, stride);
++#else
++        s->hpc.pred_dc0_c[log2_size - 2]((uint8_t *)src, stride);
++#endif
++        return;
++    }
++
++    {
++        // N.B. stride is in pixels (not bytes) or in the case of chroma pixel-pairs
++        const AVFrame * const frame = s->frame;
++        const unsigned int mask = stride - 1; // For chroma pixel=uint16 so stride_c is stride_y / 2
++        const unsigned int stripe_adj = (av_rpi_sand_frame_stride2(frame) - 1) * stride;
++        if ((x & mask) == 0)
++            src_l -= stripe_adj;
++        if (((x + size) & mask) == 0)
++            src_ur += stripe_adj;
++    }
++
++    // Can deal with I-slices in 'normal' code even if CIP
++    // This also means that we don't need to generate (elsewhere) is_intra
++    // for IRAP frames
++    if (s->ps.pps->constrained_intra_pred_flag == 1 &&
++        s->sh.slice_type != HEVC_SLICE_I)
++    {
++        // * If we ever actually care about CIP performance then we should
++        //   special case out size 4 stuff (can be done by 'normal') and
++        //   have 8-pel avail masks
++        unsigned int avail_l = cip_avail_l(s->is_intra + ((y + size * 2 - 1) >> (3 - vshift)) * s->ps.sps->pcm_width + ((x - 1) >> (6 - hshift)),
++                                           -(int)(s->ps.sps->pcm_width),
++                                           1 << (((x - 1) >> (3 - hshift)) & 7),
++                                           1 - hshift,
++                                           avail,
++                                           size,
++                                           FFMIN(size, ((s->ps.sps->height - y0) >> vshift) - size),
++                                           vshift != 0 ? 0 : (y >> 2) & 1);
++
++        unsigned int avail_u = cip_avail_u(s->is_intra + ((y - 1) >> (3 - vshift)) * s->ps.sps->pcm_width + (x >> (6 - hshift)),
++                                           (x >> (3 - hshift)) & 7,
++                                           1 - hshift,
++                                           avail,
++                                           size,
++                                           FFMIN(size, ((s->ps.sps->width - x0) >> hshift) - size),
++                                           hshift != 0 ? 0 : (x >> 2) & 1);
++
++        // Anything left?
++        if ((avail_l | avail_u) == 0)
++            goto dc_only;
++
++        FUNC(cip_fill)(left, top, avail_l, avail_u, src_l, src_u, src_ur, stride, size);
++
++#if !PRED_C
++        if ((req & FILTER_LIGHT) != 0)
++        {
++            const unsigned threshold = 1 << (BIT_DEPTH - 5);
++            if ((req & FILTER_STRONG) != 0 &&
++                (int)(FFABS(left[-1]  + top[63] - 2 * top[31]))  < threshold &&
++                (int)(FFABS(left[-1] + left[63] - 2 * left[31])) < threshold)
++            {
++                filter_strong(top, left[-1], top[63], 64);
++                filter_strong(left, left[-1], left[63], 64);
++            } else
++            {
++                // LHS writes UL too so copy for top
++                const pixel p_ul = left[-1];
++                filter_light(left - 1, top[0], left - 1, left[2*size - 1], 1, 2*size);
++                filter_light(top, p_ul, top, top[2*size - 1], 1, 2*size - 1);
++            }
++        }
++#endif
++    }
++    else
++    {
++        const unsigned int ur_size = FFMIN(size, ((s->ps.sps->width - x0) >> hshift) - size);
++        if ((req & ~((AVAIL_UR | AVAIL_U) & avail)) == 0 &&
++            ((req & AVAIL_UR) == 0 || src_u + 2*size == src_ur + ur_size))
++        {
++            top_pred = src_u;
++        }
++        else
++        {
++#if !PRED_C
++            s->hpc.intra_filter[log2_size - 2]
++#else
++            s->hpc.intra_filter_c[log2_size - 2]
++#endif
++                ((uint8_t *)left, (uint8_t *)top, req, avail,
++                 (const uint8_t *)src_l, (const uint8_t *)src_u, (const uint8_t *)src_ur, stride * sizeof(pixel),
++                              ur_size,
++                              FFMIN(size, ((s->ps.sps->height - y0) >> vshift) - size));
++        }
++    }
++
++
++#if !PRED_C
++    switch (mode) {
++    case INTRA_PLANAR:
++        s->hpc.pred_planar[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                                          (uint8_t *)left, stride);
++        break;
++    case INTRA_DC:
++        s->hpc.pred_dc[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                       (uint8_t *)left, stride);
++        break;
++    case INTRA_ANGULAR_HORIZONTAL:
++        s->hpc.pred_horizontal[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                                           (uint8_t *)left, stride,
++                                           mode);
++        break;
++    case INTRA_ANGULAR_VERTICAL:
++        s->hpc.pred_vertical[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                                           (uint8_t *)left, stride,
++                                           mode);
++        break;
++    default:
++        s->hpc.pred_angular[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                                           (uint8_t *)left, stride,
++                                           mode);
++        break;
++    }
++#else
++    switch (mode) {
++    case INTRA_PLANAR:
++        s->hpc.pred_planar_c[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                                          (uint8_t *)left, stride);
++        break;
++    case INTRA_DC:
++        s->hpc.pred_dc_c[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                       (uint8_t *)left, stride);
++        break;
++    case INTRA_ANGULAR_HORIZONTAL:
++        s->hpc.pred_horizontal_c[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                                           (uint8_t *)left, stride,
++                                           mode);
++        break;
++    case INTRA_ANGULAR_VERTICAL:
++        s->hpc.pred_vertical_c[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                                           (uint8_t *)left, stride,
++                                           mode);
++        break;
++    default:
++        s->hpc.pred_angular_c[log2_size - 2]((uint8_t *)src, (uint8_t *)top_pred,
++                                           (uint8_t *)left, stride,
++                                           mode);
++        break;
++    }
++
++#if DUMP_PRED
++    printf("U pred @ %d, %d: mode=%d\n", x, y, mode);
++    dump_pred_uv((uint8_t *)src, stride, 1 << log2_size);
++    printf("V pred @ %d, %d: mode=%d\n", x, y, mode);
++    dump_pred_uv((uint8_t *)src + 1, stride, 1 << log2_size);
++#endif
++#endif
++}
++
++#if !PRED_C
++static av_always_inline void FUNC(pred_planar)(uint8_t *_src, const uint8_t *_top,
++                                  const uint8_t *_left, ptrdiff_t stride,
++                                  int trafo_size)
++{
++    int x, y;
++    pixel *src        = (pixel *)_src;
++    const pixel *top  = (const pixel *)_top;
++    const pixel *left = (const pixel *)_left;
++    int size = 1 << trafo_size;
++    for (y = 0; y < size; y++)
++        for (x = 0; x < size; x++)
++            POS(x, y) = ((size - 1 - x) * left[y] + (x + 1) * top[size]  +
++                         (size - 1 - y) * top[x]  + (y + 1) * left[size] + size) >> (trafo_size + 1);
++}
++#else
++static av_always_inline void FUNC(pred_planar)(uint8_t * _src, const uint8_t * _top,
++                                  const uint8_t * _left, ptrdiff_t stride,
++                                  int trafo_size)
++{
++    int x, y;
++    int size = 1 << trafo_size;
++    c_dst_ptr_t src = (c_dst_ptr_t)_src;
++    const c_src_ptr_t top = (c_src_ptr_t)_top;
++    const c_src_ptr_t left = (c_src_ptr_t)_left;
++
++    for (y = 0; y < size; y++, src += stride)
++    {
++        for (x = 0; x < size; x++)
++        {
++            src[x][0] = ((size - 1 - x) * left[y][0] + (x + 1) * top[size][0]  +
++                         (size - 1 - y) * top[x][0]  + (y + 1) * left[size][0] + size) >> (trafo_size + 1);
++            src[x][1] = ((size - 1 - x) * left[y][1] + (x + 1) * top[size][1]  +
++                         (size - 1 - y) * top[x][1]  + (y + 1) * left[size][1] + size) >> (trafo_size + 1);
++        }
++    }
++}
++#endif
++
++#define PRED_PLANAR(size)\
++static void FUNC(pred_planar_ ## size)(uint8_t *src, const uint8_t *top,        \
++                                       const uint8_t *left, ptrdiff_t stride)   \
++{                                                                               \
++    FUNC(pred_planar)(src, top, left, stride, size + 2);                        \
++}
++
++PRED_PLANAR(0)
++PRED_PLANAR(1)
++PRED_PLANAR(2)
++PRED_PLANAR(3)
++
++#undef PRED_PLANAR
++
++#if !PRED_C
++static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top,
++                          const uint8_t *_left,
++                          ptrdiff_t stride, int log2_size)
++{
++    int i, j, x, y;
++    int size          = (1 << log2_size);
++    pixel *src        = (pixel *)_src;
++    const pixel *top  = (const pixel *)_top;
++    const pixel *left = (const pixel *)_left;
++    int dc            = size;
++    pixel4 a;
++    for (i = 0; i < size; i++)
++        dc += left[i] + top[i];
++
++    dc >>= log2_size + 1;
++
++    a = PIXEL_SPLAT_X4(dc);
++
++    for (i = 0; i < size; i++)
++        for (j = 0; j < size; j+=4)
++            AV_WN4P(&POS(j, i), a);
++
++//    if (c_idx == 0 && size < 32)
++// As we now have separate fns for y & c - no need to test that
++    if (size < 32)
++    {
++        POS(0, 0) = (left[0] + 2 * dc + top[0] + 2) >> 2;
++        for (x = 1; x < size; x++)
++            POS(x, 0) = (top[x] + 3 * dc + 2) >> 2;
++        for (y = 1; y < size; y++)
++            POS(0, y) = (left[y] + 3 * dc + 2) >> 2;
++    }
++}
++#else
++static void FUNC(pred_dc)(uint8_t *_src, const uint8_t *_top,
++                          const uint8_t *_left,
++                          ptrdiff_t stride, int log2_size)
++{
++    unsigned int i, j;
++    const unsigned int size = (1 << log2_size);
++    c_dst_ptr_t src = (c_dst_ptr_t)_src;
++    const c_src_ptr_t top = (c_src_ptr_t)_top;
++    const c_src_ptr_t left = (c_src_ptr_t)_left;
++    unsigned int dc0 = size;
++    unsigned int dc1 = size;
++
++    for (i = 0; i < size; i++)
++    {
++        dc0 += left[i][0] + top[i][0];
++        dc1 += left[i][1] + top[i][1];
++    }
++
++    dc0 >>= log2_size + 1;
++    dc1 >>= log2_size + 1;
++
++    for (i = 0; i < size; i++, src += stride)
++    {
++        for (j = 0; j < size; ++j)
++        {
++            src[j][0] = dc0;
++            src[j][1] = dc1;
++
++        }
++    }
++}
++#endif
++
++#define PRED_DC(size)\
++static void FUNC(pred_dc_ ## size)(uint8_t *src, const uint8_t *top,        \
++                                       const uint8_t *left, ptrdiff_t stride)   \
++{                                                                               \
++    FUNC(pred_dc)(src, top, left, stride, size + 2);                        \
++}
++
++PRED_DC(0)
++PRED_DC(1)
++PRED_DC(2)
++PRED_DC(3)
++
++#undef PRED_DC
++
++
++
++
++#if !PRED_C
++static void FUNC(pred_dc0)(uint8_t *_src, ptrdiff_t stride, int log2_size)
++{
++    int i, j;
++    int size          = (1 << log2_size);
++    pixel *src        = (pixel *)_src;
++    pixel4 a = PIXEL_SPLAT_X4(1 << (BIT_DEPTH - 1));
++
++    for (i = 0; i < size; i++)
++        for (j = 0; j < size; j+=4)
++            AV_WN4P(&POS(j, i), a);
++}
++#else
++static void FUNC(pred_dc0)(uint8_t *_src, ptrdiff_t stride, int log2_size)
++{
++    unsigned int i, j;
++    const unsigned int size = (1 << log2_size);
++    c_dst_ptr_t src = (c_dst_ptr_t)_src;
++    const pixel a = (1 << (BIT_DEPTH - 1));
++
++    for (i = 0; i < size; i++, src += stride)
++    {
++        for (j = 0; j < size; ++j)
++        {
++            src[j][0] = a;
++            src[j][1] = a;
++        }
++    }
++}
++#endif
++
++#define PRED_DC0(size)\
++static void FUNC(pred_dc0_ ## size)(uint8_t *src, ptrdiff_t stride)   \
++{                                                                               \
++    FUNC(pred_dc0)(src, stride, size + 2);                        \
++}
++
++PRED_DC0(0)
++PRED_DC0(1)
++PRED_DC0(2)
++PRED_DC0(3)
++
++#undef PRED_DC0
++
++
++
++
++#ifndef ANGLE_CONSTS
++#define ANGLE_CONSTS
++static const int intra_pred_angle[] = {
++     32,  26,  21,  17, 13,  9,  5, 2, 0, -2, -5, -9, -13, -17, -21, -26, -32,
++    -26, -21, -17, -13, -9, -5, -2, 0, 2,  5,  9, 13,  17,  21,  26,  32
++};
++static const int inv_angle[] = {
++    -4096, -1638, -910, -630, -482, -390, -315, -256, -315, -390, -482,
++    -630, -910, -1638, -4096
++};
++#endif
++
++#if !PRED_C
++static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
++                                                const uint8_t *_top,
++                                                const uint8_t *_left,
++                                                ptrdiff_t stride,
++                                                int mode, int size)
++{
++    int x, y;
++    pixel *src        = (pixel *)_src;
++    const pixel *top  = (const pixel *)_top;
++    const pixel *left = (const pixel *)_left;
++
++    int angle = intra_pred_angle[mode - 2];
++    pixel ref_array[3 * MAX_TB_SIZE + 4];
++    pixel *ref_tmp = ref_array + size;
++    const pixel *ref;
++    int last = (size * angle) >> 5;
++
++    if (mode >= 18) {
++        ref = top - 1;
++
++        if (angle < 0)
++        {
++            memcpy(ref_tmp + 1, top, size * PW);
++            ref_tmp[0] = left[-1];
++
++            for (x = last; x <= -1; x++)
++                ref_tmp[x] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
++            ref = ref_tmp;
++        }
++
++        for (y = 0; y < size; y++) {
++            int idx  = ((y + 1) * angle) >> 5;
++            int fact = ((y + 1) * angle) & 31;
++            if (fact) {
++                for (x = 0; x < size; x += 4) {
++                    POS(x    , y) = ((32 - fact) * ref[x + idx + 1] +
++                                           fact  * ref[x + idx + 2] + 16) >> 5;
++                    POS(x + 1, y) = ((32 - fact) * ref[x + 1 + idx + 1] +
++                                           fact  * ref[x + 1 + idx + 2] + 16) >> 5;
++                    POS(x + 2, y) = ((32 - fact) * ref[x + 2 + idx + 1] +
++                                           fact  * ref[x + 2 + idx + 2] + 16) >> 5;
++                    POS(x + 3, y) = ((32 - fact) * ref[x + 3 + idx + 1] +
++                                           fact  * ref[x + 3 + idx + 2] + 16) >> 5;
++                }
++            } else {
++                for (x = 0; x < size; x += 4)
++                    AV_WN4P(&POS(x, y), AV_RN4P(&ref[x + idx + 1]));
++            }
++        }
++        if (mode == 26 && size < 32) {
++            for (y = 0; y < size; y++)
++                POS(0, y) = av_clip_pixel(top[0] + ((left[y] - left[-1]) >> 1));
++        }
++
++    } else {
++        ref = left - 1;
++        if (angle < 0 && last < -1) {
++            for (x = 0; x <= size; x += 4)
++                AV_WN4P(&ref_tmp[x], AV_RN4P(&left[x - 1]));
++            // Inv angle <= -256 so top offset >= 0
++            for (x = last; x <= -1; x++)
++                ref_tmp[x] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)];
++            ref = ref_tmp;
++        }
++
++        for (x = 0; x < size; x++) {
++            int idx  = ((x + 1) * angle) >> 5;
++            int fact = ((x + 1) * angle) & 31;
++            if (fact) {
++                for (y = 0; y < size; y++) {
++                    POS(x, y) = ((32 - fact) * ref[y + idx + 1] +
++                                       fact  * ref[y + idx + 2] + 16) >> 5;
++                }
++            } else {
++                for (y = 0; y < size; y++)
++                    POS(x, y) = ref[y + idx + 1];
++            }
++        }
++        if (mode == 10 && size < 32) {
++            for (x = 0; x < size; x += 4) {
++                POS(x,     0) = av_clip_pixel(left[0] + ((top[x    ] - left[-1]) >> 1));
++                POS(x + 1, 0) = av_clip_pixel(left[0] + ((top[x + 1] - left[-1]) >> 1));
++                POS(x + 2, 0) = av_clip_pixel(left[0] + ((top[x + 2] - left[-1]) >> 1));
++                POS(x + 3, 0) = av_clip_pixel(left[0] + ((top[x + 3] - left[-1]) >> 1));
++            }
++        }
++    }
++}
++#else
++static av_always_inline void FUNC(pred_angular)(uint8_t *_src,
++                                                const uint8_t *_top,
++                                                const uint8_t *_left,
++                                                ptrdiff_t stride,
++                                                int mode, int size)
++{
++    int x, y;
++    c_dst_ptr_t src  = (c_dst_ptr_t)_src;
++    c_src_ptr_t top  = (c_src_ptr_t)_top;
++    c_src_ptr_t left = (c_src_ptr_t)_left;
++
++    const int angle = intra_pred_angle[mode - 2];
++    cpel ref_array[3 * MAX_TB_SIZE + 4][2];
++    c_dst_ptr_t ref_tmp = ref_array + size;
++    c_src_ptr_t ref;
++    const int last = (size * angle) >> 5;
++
++    if (mode >= 18) {
++        ref = top - 1;
++        if (angle < 0) {
++            memcpy(ref_tmp + 1, top, size * 2 * PW);
++            ref_tmp[0][0] = left[-1][0];
++            ref_tmp[0][1] = left[-1][1];
++            for (x = last; x <= -1; x++)
++            {
++                ref_tmp[x][0] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)][0];
++                ref_tmp[x][1] = left[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)][1];
++            }
++            ref = (c_src_ptr_t)ref_tmp;
++        }
++
++        for (y = 0; y < size; y++, src += stride) {
++            const int idx  = ((y + 1) * angle) >> 5;
++            const int fact = ((y + 1) * angle) & 31;
++            if (fact) {
++                for (x = 0; x < size; ++x) {
++                    src[x][0] = ((32 - fact) * ref[x + idx + 1][0] +
++                                       fact  * ref[x + idx + 2][0] + 16) >> 5;
++                    src[x][1] = ((32 - fact) * ref[x + idx + 1][1] +
++                                       fact  * ref[x + idx + 2][1] + 16) >> 5;
++                }
++            } else {
++                memcpy(src, ref + idx + 1, size * 2 * PW);
++            }
++        }
++    } else {
++        ref = left - 1;
++        if (angle < 0 && last < -1) {
++            memcpy(ref_tmp, left - 1, (size + 1) * 2 * PW);
++            for (x = last; x <= -1; x++)
++            {
++                ref_tmp[x][0] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)][0];
++                ref_tmp[x][1] = top[-1 + ((x * inv_angle[mode - 11] + 128) >> 8)][1];
++            }
++            ref = (c_src_ptr_t)ref_tmp;
++        }
++
++        for (x = 0; x < size; x++, src++) {
++            const int idx  = ((x + 1) * angle) >> 5;
++            const int fact = ((x + 1) * angle) & 31;
++            if (fact) {
++                for (y = 0; y < size; y++) {
++                    src[y * stride][0] = ((32 - fact) * ref[y + idx + 1][0] +
++                                       fact  * ref[y + idx + 2][0] + 16) >> 5;
++                    src[y * stride][1] = ((32 - fact) * ref[y + idx + 1][1] +
++                                       fact  * ref[y + idx + 2][1] + 16) >> 5;
++                }
++            } else {
++                for (y = 0; y < size; y++)
++                {
++                    src[y * stride][0] = ref[y + idx + 1][0];
++                    src[y * stride][1] = ref[y + idx + 1][1];
++                }
++            }
++        }
++    }
++}
++#endif
++
++static void FUNC(pred_angular_0)(uint8_t *src, const uint8_t *top,
++                                 const uint8_t *left,
++                                 ptrdiff_t stride, int mode)
++{
++    FUNC(pred_angular)(src, top, left, stride, mode, 1 << 2);
++}
++
++static void FUNC(pred_angular_1)(uint8_t *src, const uint8_t *top,
++                                 const uint8_t *left,
++                                 ptrdiff_t stride, int mode)
++{
++    FUNC(pred_angular)(src, top, left, stride, mode, 1 << 3);
++}
++
++static void FUNC(pred_angular_2)(uint8_t *src, const uint8_t *top,
++                                 const uint8_t *left,
++                                 ptrdiff_t stride, int mode)
++{
++    FUNC(pred_angular)(src, top, left, stride, mode, 1 << 4);
++}
++
++static void FUNC(pred_angular_3)(uint8_t *src, const uint8_t *top,
++                                 const uint8_t *left,
++                                 ptrdiff_t stride, int mode)
++{
++    FUNC(pred_angular)(src, top, left, stride, mode, 1 << 5);
++}
++
++#undef cpel
++#undef c_src_ptr_t
++#undef c_dst_ptr_t
++
++#undef EXTEND
++#undef POS
++#undef PW
++
++#undef filter_light1
++#undef filter_light
++#undef filter_strong
++#undef ref_gen
++
++#ifndef INCLUDED_ONCE
++#define INCLUDED_ONCE
++#endif
++
+--- /dev/null
++++ b/libavcodec/rpi_mailbox.c
+@@ -0,0 +1,155 @@
++/*
++Copyright (c) 2012, Broadcom Europe Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++*/
++
++#include <stdio.h>
++#include <string.h>
++#include <stdlib.h>
++#include <fcntl.h>
++#include <unistd.h>
++#include <assert.h>
++#include <stdint.h>
++#include <sys/ioctl.h>
++
++#include <linux/ioctl.h>
++
++#define MAJOR_NUM 100
++#define IOCTL_MBOX_PROPERTY _IOWR(MAJOR_NUM, 0, char *)
++#define DEVICE_FILE_NAME "/dev/vcio"
++
++#include "rpi_mailbox.h"
++//#include <interface/vctypes/vc_image_structs.h>
++
++/*
++ * use ioctl to send mbox property message
++ */
++
++static int mbox_property(int file_desc, void *buf)
++{
++   int ret_val = ioctl(file_desc, IOCTL_MBOX_PROPERTY, buf);
++
++   if (ret_val < 0) {
++      printf("ioctl_set_msg failed:%d\n", ret_val);
++   }
++
++#ifdef DEBUG
++   unsigned *p = buf; int i; unsigned size = *(unsigned *)buf;
++   for (i=0; i<size/4; i++)
++      printf("%04x: 0x%08x\n", i*sizeof *p, p[i]);
++#endif
++   return ret_val;
++}
++
++#define GET_VCIMAGE_PARAMS 0x30044
++
++int mbox_get_image_params(int fd, VC_IMAGE_T * img)
++{
++    uint32_t buf[sizeof(*img) / sizeof(uint32_t) + 32];
++    uint32_t * p = buf;
++    void * rimg;
++    int rv;
++
++    *p++ = 0; // size
++    *p++ = 0; // process request
++    *p++ = GET_VCIMAGE_PARAMS;
++    *p++ = sizeof(*img);
++    *p++ = sizeof(*img);
++    rimg = p;
++    memcpy(p, img, sizeof(*img));
++    p += sizeof(*img) / sizeof(*p);
++    *p++ = 0;  // End tag
++    buf[0] = (p - buf) * sizeof(*p);
++
++    rv = mbox_property(fd, buf);
++    memcpy(img, rimg, sizeof(*img));
++
++    return rv;
++}
++
++
++#define SET_CLOCK_RATE 0x00038002
++#define GET_MAX_CLOCK 0x00030004
++#define CLOCK_HEVC 11
++
++static int mbox_property_generic(int fd, unsigned command, unsigned *word0, unsigned *word1)
++{
++    uint32_t buf[32];
++    uint32_t * p = buf;
++    int rv;
++
++    *p++ = 0; // size
++    *p++ = 0; // process request
++    *p++ = command;
++    *p++ = 8;
++    *p++ = 8;
++    *p++ = *word0;
++    *p++ = *word1;
++    *p++ = 0;  // End tag
++    buf[0] = (p - buf) * sizeof(*p);
++
++    rv = mbox_property(fd, buf);
++    *word0 = buf[6];
++    *word1 = buf[7];
++    return rv;
++}
++
++int mbox_open() {
++   int file_desc;
++
++   // open a char device file used for communicating with kernel mbox driver
++   file_desc = open(DEVICE_FILE_NAME, 0);
++   if (file_desc < 0) {
++      printf("Can't open device file: %s\n", DEVICE_FILE_NAME);
++      printf("Try creating a device file with: sudo mknod %s c %d 0\n", DEVICE_FILE_NAME, MAJOR_NUM);
++   }
++   return file_desc;
++}
++
++void mbox_close(int file_desc) {
++  close(file_desc);
++}
++
++int mbox_request_clock(int fd) {
++   int rv;
++   unsigned word0, word1 = 0;
++   word0 = CLOCK_HEVC;
++   rv = mbox_property_generic(fd, GET_MAX_CLOCK, &word0, &word1);
++   if (rv != 0)
++      return rv;
++   word1 = word0;
++   word0 = CLOCK_HEVC;
++   rv = mbox_property_generic(fd, SET_CLOCK_RATE, &word0, &word1);
++   return rv;
++}
++
++int mbox_release_clock(int fd) {
++  int rv;
++  unsigned word0, word1 = 0;
++  word0 = CLOCK_HEVC;
++  word1 = 0;
++  rv = mbox_property_generic(fd, SET_CLOCK_RATE, &word0, &word1);
++  return rv;
++}
+--- /dev/null
++++ b/libavcodec/rpi_mailbox.h
+@@ -0,0 +1,58 @@
++#ifndef RPI_MAILBOX_H
++#define RPI_MAILBOX_H
++
++/* The image structure. */
++typedef struct vc_image_extra_uv_s {
++  void *u, *v;
++  int vpitch;
++} VC_IMAGE_EXTRA_UV_T;
++
++typedef union {
++    VC_IMAGE_EXTRA_UV_T uv;
++//  VC_IMAGE_EXTRA_RGBA_T rgba;
++//  VC_IMAGE_EXTRA_PAL_T pal;
++//  VC_IMAGE_EXTRA_TF_T tf;
++//  VC_IMAGE_EXTRA_BAYER_T bayer;
++//  VC_IMAGE_EXTRA_MSBAYER_T msbayer;
++//  VC_IMAGE_EXTRA_CODEC_T codec;
++//  VC_IMAGE_EXTRA_OPENGL_T opengl;
++} VC_IMAGE_EXTRA_T;
++
++
++typedef struct VC_IMAGE_T {
++  unsigned short                  type;           /* should restrict to 16 bits */
++  unsigned short                  info;           /* format-specific info; zero for VC02 behaviour */
++  unsigned short                  width;          /* width in pixels */
++  unsigned short                  height;         /* height in pixels */
++  int                             pitch;          /* pitch of image_data array in bytes */
++  int                             size;           /* number of bytes available in image_data array */
++  void                           *image_data;     /* pixel data */
++  VC_IMAGE_EXTRA_T                extra;          /* extra data like palette pointer */
++  void                           *metadata;       /* metadata header for the image */
++  void                           *pool_object;    /* nonNULL if image was allocated from a vc_pool */
++  int                             mem_handle;     /* the mem handle for relocatable memory storage */
++  int                             metadata_size;  /* size of metadata of each channel in bytes */
++  int                             channel_offset; /* offset of consecutive channels in bytes */
++  uint32_t                        video_timestamp;/* 90000 Hz RTP times domain - derived from audio timestamp */
++  uint8_t                         num_channels;   /* number of channels (2 for stereo) */
++  uint8_t                         current_channel;/* the channel this header is currently pointing to */
++  uint8_t                         linked_multichann_flag;/* Indicate the header has the linked-multichannel structure*/
++  uint8_t                         is_channel_linked;     /* Track if the above structure is been used to link the header
++                                                            into a linked-mulitchannel image */
++  uint8_t                         channel_index;         /* index of the channel this header represents while
++                                                            it is being linked. */
++  uint8_t                         _dummy[3];      /* pad struct to 64 bytes */
++} VC_IMAGE_T;
++
++typedef int vc_image_t_size_check[(sizeof(VC_IMAGE_T) == 64) * 2 - 1];
++
++
++extern int mbox_open(void);
++extern void mbox_close(int file_desc);
++
++int mbox_get_image_params(int fd, VC_IMAGE_T * img);
++
++int mbox_request_clock(int fd);
++int mbox_release_clock(int fd);
++
++#endif
+--- /dev/null
++++ b/libavcodec/rpi_mem.c
+@@ -0,0 +1,326 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++
++#include <stdlib.h>
++#include <string.h>
++#include <stddef.h>
++#include <stdint.h>
++
++#include "config.h"
++
++#include "libavutil/avassert.h"
++#include "libavutil/rpi_sand_fns.h"
++
++#pragma GCC diagnostic push
++// Many many redundant decls in the header files
++#pragma GCC diagnostic ignored "-Wredundant-decls"
++#include <bcm_host.h>
++#include <interface/vctypes/vc_image_types.h>
++#include <interface/vcsm/user-vcsm.h>
++#pragma GCC diagnostic pop
++
++#include "rpi_mem.h"
++#include "rpi_zc_frames.h"
++
++
++#define OPT_PREFER_CMA 0
++
++struct rpi_cache_flush_env_s {
++  struct vcsm_user_clean_invalid2_s v;
++};
++
++
++// GPU memory alloc fns (internal)
++
++static void gpu_free_internal(GPU_MEM_PTR_T * const p)
++{
++    if (p->arm != NULL)
++        vcsm_unlock_ptr(p->arm);
++    if (p->vcsm_handle != 0)
++        vcsm_free(p->vcsm_handle);
++    memset(p, 0, sizeof(*p));  // Ensure we crash hard if we try and use this again
++}
++
++
++static int gpu_malloc_internal(GPU_MEM_PTR_T * const p,
++    const int numbytes, const unsigned int cache_type, const char * const name)
++{
++    memset(p, 0, sizeof(*p));
++    p->numbytes = (numbytes + 255) & ~255;  // Round up
++
++    if ((p->vcsm_handle = vcsm_malloc_cache(p->numbytes, cache_type | 0x80, (char *)name)) == 0)
++    {
++        av_log(NULL, AV_LOG_ERROR, "Unable to alloc %d bytes from VCSM for %s\n", p->numbytes, name);
++        goto fail;
++    }
++    if ((p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle)) == 0)
++    {
++        av_log(NULL, AV_LOG_ERROR, "Unable to VC handle from VCSM for %s\n", name);
++        goto fail;
++    }
++    if ((p->arm = vcsm_lock(p->vcsm_handle)) == NULL)
++    {
++        av_log(NULL, AV_LOG_ERROR, "Unable to lock handle from VCSM for %s\n", name);
++        goto fail;
++    }
++    if ((p->vc = vcsm_vc_addr_from_hdl(p->vcsm_handle)) == 0)
++    {
++        av_log(NULL, AV_LOG_ERROR, "Unable to get VC addr from VCSM for %s\n", name);
++        goto fail;
++    }
++
++    return 0;
++
++fail:
++    gpu_free_internal(p);
++    return AVERROR(ENOMEM);
++}
++
++// Public gpu fns
++
++// Allocate memory on GPU
++// Fills in structure <p> containing ARM pointer, videocore handle, videocore memory address, numbytes
++// Returns 0 on success.
++// This allocates memory that will not be cached in ARM's data cache.
++// Therefore safe to use without data cache flushing.
++int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p)
++{
++    return gpu_malloc_internal(p, numbytes, VCSM_CACHE_TYPE_NONE, "ffmpeg uncached");
++}
++
++// This allocates data that will be
++//    Cached in ARM L2
++//    Uncached in VPU L2
++int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p)
++{
++    return gpu_malloc_internal(p, numbytes, VCSM_CACHE_TYPE_HOST, "ffmpeg cached");
++}
++
++void gpu_free(GPU_MEM_PTR_T * const p) {
++    gpu_free_internal(p);
++}
++
++void rpi_mem_gpu_uninit(void)
++{
++    vcsm_exit();
++    bcm_host_deinit();
++}
++
++int rpi_mem_gpu_init(const unsigned int flags)
++{
++    const int wants_cma = bcm_host_is_fkms_active();
++    int use_cma;
++
++    (void)flags;
++
++    if (vcsm_init_ex(wants_cma ? 1 : 0, -1) == 0)
++        use_cma = 1;
++    else if (vcsm_init_ex(wants_cma ? 0 : 1, -1) == 0)
++        use_cma = 0;
++    else
++        return AVERROR(EINVAL);
++
++    bcm_host_init();
++
++    return use_cma + 1;
++}
++
++// ----------------------------------------------------------------------------
++//
++// Cache flush functions
++
++#define CACHE_EL_MAX ((sizeof(rpi_cache_buf_t) - sizeof (struct vcsm_user_clean_invalid2_s)) / sizeof (struct vcsm_user_clean_invalid2_block_s))
++
++rpi_cache_flush_env_t * rpi_cache_flush_init(rpi_cache_buf_t * const buf)
++{
++  rpi_cache_flush_env_t * const rfe = (rpi_cache_flush_env_t *)buf;
++  *rfe = (rpi_cache_flush_env_t){.v={.op_count = 0}};
++  return rfe;
++}
++
++void rpi_cache_flush_abort(rpi_cache_flush_env_t * const rfe)
++{
++  // Nothing needed
++}
++
++int rpi_cache_flush_execute(rpi_cache_flush_env_t * const rfe)
++{
++    int rc = 0;
++    if (rfe->v.op_count != 0) {
++        if (vcsm_clean_invalid2(&rfe->v) != 0)
++        {
++          const int err = errno;
++          av_log(NULL, AV_LOG_ERROR, "vcsm_clean_invalid2 failed: errno=%d\n", err);
++          rc = AVERROR(err);
++        }
++        rfe->v.op_count = 0;
++    }
++    return rc;
++}
++
++int rpi_cache_flush_finish(rpi_cache_flush_env_t * const rfe)
++{
++  int rc = rpi_cache_flush_execute(rfe);;
++
++  return rc;
++}
++
++inline void rpi_cache_flush_add_gm_blocks(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const unsigned int mode,
++  const unsigned int offset0, const unsigned int block_size, const unsigned int blocks, const unsigned int block_stride)
++{
++  struct vcsm_user_clean_invalid2_block_s * const b = rfe->v.s + rfe->v.op_count++;
++
++  av_assert1(rfe->v.op_count <= CACHE_EL_MAX);
++
++  b->invalidate_mode = mode;
++  b->block_count = blocks;
++  b->start_address = gm->arm + offset0;
++  b->block_size = block_size;
++  b->inter_block_stride = block_stride;
++}
++
++void rpi_cache_flush_add_gm_range(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const unsigned int mode,
++  const unsigned int offset, const unsigned int size)
++{
++  // Deal with empty pointer trivially
++  if (gm == NULL || size == 0)
++    return;
++
++  av_assert1(offset <= gm->numbytes);
++  av_assert1(size <= gm->numbytes);
++  av_assert1(offset + size <= gm->numbytes);
++
++  rpi_cache_flush_add_gm_blocks(rfe, gm, mode, offset, size, 1, 0);
++}
++
++void rpi_cache_flush_add_gm_ptr(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const unsigned int mode)
++{
++  rpi_cache_flush_add_gm_blocks(rfe, gm, mode, 0, gm->numbytes, 1, 0);
++}
++
++
++void rpi_cache_flush_add_frame(rpi_cache_flush_env_t * const rfe, const AVFrame * const frame, const unsigned int mode)
++{
++#if !RPI_ONE_BUF
++#error Fixme! (NIF)
++#endif
++  if (gpu_is_buf1(frame)) {
++    rpi_cache_flush_add_gm_ptr(rfe, gpu_buf1_gmem(frame), mode);
++  }
++  else
++  {
++    rpi_cache_flush_add_gm_ptr(rfe, gpu_buf3_gmem(frame, 0), mode);
++    rpi_cache_flush_add_gm_ptr(rfe, gpu_buf3_gmem(frame, 1), mode);
++    rpi_cache_flush_add_gm_ptr(rfe, gpu_buf3_gmem(frame, 2), mode);
++  }
++}
++
++// Flush an area of a frame
++// Width, height, x0, y0 in luma pels
++void rpi_cache_flush_add_frame_block(rpi_cache_flush_env_t * const rfe, const AVFrame * const frame, const unsigned int mode,
++  const unsigned int x0, const unsigned int y0, const unsigned int width, const unsigned int height,
++  const unsigned int uv_shift, const int do_luma, const int do_chroma)
++{
++  const unsigned int y_offset = frame->linesize[0] * y0;
++  const unsigned int y_size = frame->linesize[0] * height;
++  // Round UV up/down to get everything
++  const unsigned int uv_rnd = (1U << uv_shift) >> 1;
++  const unsigned int uv_offset = frame->linesize[1] * (y0 >> uv_shift);
++  const unsigned int uv_size = frame->linesize[1] * ((y0 + height + uv_rnd) >> uv_shift) - uv_offset;
++
++#if 0
++  // *** frame->height is cropped height so not good
++  // As all unsigned they will also reject -ve
++  // Test individually as well as added to reject overflow
++  av_assert0(start_line <= (unsigned int)frame->height);  // ***** frame height cropped
++  av_assert0(n <= (unsigned int)frame->height);
++  av_assert0(start_line + n <= (unsigned int)frame->height);
++#endif
++
++  if (!gpu_is_buf1(frame))
++  {
++    if (do_luma) {
++      rpi_cache_flush_add_gm_range(rfe, gpu_buf3_gmem(frame, 0), mode, y_offset, y_size);
++    }
++    if (do_chroma) {
++      rpi_cache_flush_add_gm_range(rfe, gpu_buf3_gmem(frame, 1), mode, uv_offset, uv_size);
++      rpi_cache_flush_add_gm_range(rfe, gpu_buf3_gmem(frame, 2), mode, uv_offset, uv_size);
++    }
++  }
++  else if (!av_rpi_is_sand_frame(frame))
++  {
++    const GPU_MEM_PTR_T * const gm = gpu_buf1_gmem(frame);
++    if (do_luma) {
++      rpi_cache_flush_add_gm_range(rfe, gm, mode, (frame->data[0] - gm->arm) + y_offset, y_size);
++    }
++    if (do_chroma) {
++      rpi_cache_flush_add_gm_range(rfe, gm, mode, (frame->data[1] - gm->arm) + uv_offset, uv_size);
++      rpi_cache_flush_add_gm_range(rfe, gm, mode, (frame->data[2] - gm->arm) + uv_offset, uv_size);
++    }
++  }
++  else
++  {
++    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
++    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
++    const unsigned int xshl = av_rpi_sand_frame_xshl(frame);
++    const unsigned int xleft = x0 & ~((stride1 >> xshl) - 1);
++    const unsigned int block_count = (((x0 + width - xleft) << xshl) + stride1 - 1) / stride1;  // Same for Y & C
++    av_assert1(rfe->v.op_count + do_chroma + do_luma < CACHE_EL_MAX);
++
++    if (do_chroma)
++    {
++      struct vcsm_user_clean_invalid2_block_s * const b = rfe->v.s + rfe->v.op_count++;
++      b->invalidate_mode = mode;
++      b->block_count = block_count;
++      b->start_address = av_rpi_sand_frame_pos_c(frame, xleft >> 1, y0 >> 1);
++      b->block_size = uv_size;
++      b->inter_block_stride = stride1 * stride2;
++    }
++    if (do_luma)
++    {
++      struct vcsm_user_clean_invalid2_block_s * const b = rfe->v.s + rfe->v.op_count++;
++      b->invalidate_mode = mode;
++      b->block_count = block_count;
++      b->start_address = av_rpi_sand_frame_pos_y(frame, xleft, y0);
++      b->block_size = y_size;
++      b->inter_block_stride = stride1 * stride2;
++    }
++  }
++}
++
++// Call this to clean and invalidate a region of memory
++void rpi_cache_flush_one_gm_ptr(const GPU_MEM_PTR_T *const p, const rpi_cache_flush_mode_t mode)
++{
++  rpi_cache_buf_t cbuf;
++  rpi_cache_flush_env_t * rfe = rpi_cache_flush_init(&cbuf);
++  rpi_cache_flush_add_gm_ptr(rfe, p, mode);
++  rpi_cache_flush_finish(rfe);
++}
++
+--- /dev/null
++++ b/libavcodec/rpi_mem.h
+@@ -0,0 +1,88 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++#ifndef RPI_MEM_H
++#define RPI_MEM_H
++
++typedef struct gpu_mem_ptr_s {
++  unsigned char *arm; // Pointer to memory mapped on ARM side
++  int vc_handle;   // Videocore handle of relocatable memory
++  int vcsm_handle; // Handle for use by VCSM
++  int vc;       // Address for use in GPU code
++  int numbytes; // Size of memory block
++} GPU_MEM_PTR_T;
++
++// General GPU functions
++
++#define GPU_INIT_GPU 1
++#define GPU_INIT_CMA 2
++
++extern int gpu_malloc_cached(int numbytes, GPU_MEM_PTR_T *p);
++extern int gpu_malloc_uncached(int numbytes, GPU_MEM_PTR_T *p);
++extern void gpu_free(GPU_MEM_PTR_T * const p);
++int rpi_mem_gpu_init(const unsigned int flags);
++void rpi_mem_gpu_uninit(void);
++
++// Cache flush stuff
++
++struct rpi_cache_flush_env_s;
++typedef struct rpi_cache_flush_env_s rpi_cache_flush_env_t;
++
++typedef struct {uint32_t t[33];} rpi_cache_buf_t;
++
++rpi_cache_flush_env_t * rpi_cache_flush_init(rpi_cache_buf_t * const buf);
++// Free env without flushing
++void rpi_cache_flush_abort(rpi_cache_flush_env_t * const rfe);
++// Do the accumulated flush & clear but do not free the env
++int rpi_cache_flush_execute(rpi_cache_flush_env_t * const rfe);
++// Do the accumulated flush & free the env
++int rpi_cache_flush_finish(rpi_cache_flush_env_t * const rfe);
++
++typedef enum
++{
++    RPI_CACHE_FLUSH_MODE_INVALIDATE     = 1,
++    RPI_CACHE_FLUSH_MODE_WRITEBACK      = 2,
++    RPI_CACHE_FLUSH_MODE_WB_INVALIDATE  = 3
++} rpi_cache_flush_mode_t;
++
++struct AVFrame;
++void rpi_cache_flush_add_gm_ptr(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const rpi_cache_flush_mode_t mode);
++void rpi_cache_flush_add_gm_range(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const rpi_cache_flush_mode_t mode,
++  const unsigned int offset, const unsigned int size);
++void rpi_cache_flush_add_gm_blocks(rpi_cache_flush_env_t * const rfe, const GPU_MEM_PTR_T * const gm, const unsigned int mode,
++  const unsigned int offset0, const unsigned int block_size, const unsigned int blocks, const unsigned int block_stride);
++void rpi_cache_flush_add_frame(rpi_cache_flush_env_t * const rfe, const struct AVFrame * const frame, const rpi_cache_flush_mode_t mode);
++void rpi_cache_flush_add_frame_block(rpi_cache_flush_env_t * const rfe, const struct AVFrame * const frame, const rpi_cache_flush_mode_t mode,
++  const unsigned int x0, const unsigned int y0, const unsigned int width, const unsigned int height,
++  const unsigned int uv_shift, const int do_luma, const int do_chroma);
++
++// init, add, finish for one gm ptr
++void rpi_cache_flush_one_gm_ptr(const GPU_MEM_PTR_T * const p, const rpi_cache_flush_mode_t mode);
++
++#endif
+--- /dev/null
++++ b/libavcodec/rpi_qpu.c
+@@ -0,0 +1,776 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <string.h>
++#include <stddef.h>
++#include <stdint.h>
++#include "libavutil/avassert.h"
++
++#include "config.h"
++
++#include <pthread.h>
++#include <time.h>
++
++#include <interface/vcsm/user-vcsm.h>
++
++#include "rpi_mailbox.h"
++#include "rpi_mem.h"
++#include "rpi_qpu.h"
++#include "rpi_hevc_shader.h"
++#include "rpi_hevc_transform8.h"
++#include "rpi_hevc_transform10.h"
++#include "libavutil/rpi_sand_fns.h"
++
++// Trace time spent waiting for GPU (VPU/QPU) (1=Yes, 0=No)
++#define RPI_TRACE_TIME_VPU_QPU_WAIT     0
++
++// Add profile flags to all QPU requests - generates output in "vcdbg log msg"
++// Beware this is expensive and will probably throw off all other timing by >10%
++#define RPI_TRACE_QPU_PROFILE_ALL       0
++
++// QPU "noflush" flags
++// a mixture of flushing & profiling
++
++#define QPU_FLAGS_NO_FLUSH_VPU          1       // If unset VPU cache will be flushed
++#define QPU_FLAGS_PROF_CLEAR_AND_ENABLE 2       // Clear & Enable detailed QPU profiling registers
++#define QPU_FLAGS_PROF_OUTPUT_COUNTS    4       // Print the results
++#define QPU_FLAGS_OUTPUT_QPU_TIMES      8       // Print QPU times - independant of the profiling
++#define QPU_FLAGS_NO_FLUSH_QPU          16      // If unset flush QPU caches & TMUs (uniforms always flushed)
++
++#define vcos_verify_ge0(x) ((x)>=0)
++
++// Size in 32bit words
++#define QPU_CODE_SIZE 4098
++#define VPU_CODE_SIZE 16384
++
++static const short rpi_transMatrix2even[32][16] = { // Even rows first
++{64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64,  64},
++{90,  87,  80,  70,  57,  43,  25,   9,  -9, -25, -43, -57, -70, -80, -87, -90},
++{89,  75,  50,  18, -18, -50, -75, -89, -89, -75, -50, -18,  18,  50,  75,  89},
++{87,  57,   9, -43, -80, -90, -70, -25,  25,  70,  90,  80,  43,  -9, -57, -87},
++{83,  36, -36, -83, -83, -36,  36,  83,  83,  36, -36, -83, -83, -36,  36,  83},
++{80,   9, -70, -87, -25,  57,  90,  43, -43, -90, -57,  25,  87,  70,  -9, -80},
++{75, -18, -89, -50,  50,  89,  18, -75, -75,  18,  89,  50, -50, -89, -18,  75},
++{70, -43, -87,   9,  90,  25, -80, -57,  57,  80, -25, -90,  -9,  87,  43, -70},
++{64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64,  64, -64, -64,  64},
++{57, -80, -25,  90,  -9, -87,  43,  70, -70, -43,  87,   9, -90,  25,  80, -57},
++{50, -89,  18,  75, -75, -18,  89, -50, -50,  89, -18, -75,  75,  18, -89,  50},
++{43, -90,  57,  25, -87,  70,   9, -80,  80,  -9, -70,  87, -25, -57,  90, -43},
++{36, -83,  83, -36, -36,  83, -83,  36,  36, -83,  83, -36, -36,  83, -83,  36},
++{25, -70,  90, -80,  43,   9, -57,  87, -87,  57,  -9, -43,  80, -90,  70, -25},
++{18, -50,  75, -89,  89, -75,  50, -18, -18,  50, -75,  89, -89,  75, -50,  18},
++{ 9, -25,  43, -57,  70, -80,  87, -90,  90, -87,  80, -70,  57, -43,  25,  -9},
++// Odd rows
++{90,  90,  88,  85,  82,  78,  73,  67,  61,  54,  46,  38,  31,  22,  13,   4},
++{90,  82,  67,  46,  22,  -4, -31, -54, -73, -85, -90, -88, -78, -61, -38, -13},
++{88,  67,  31, -13, -54, -82, -90, -78, -46,  -4,  38,  73,  90,  85,  61,  22},
++{85,  46, -13, -67, -90, -73, -22,  38,  82,  88,  54,  -4, -61, -90, -78, -31},
++{82,  22, -54, -90, -61,  13,  78,  85,  31, -46, -90, -67,   4,  73,  88,  38},
++{78,  -4, -82, -73,  13,  85,  67, -22, -88, -61,  31,  90,  54, -38, -90, -46},
++{73, -31, -90, -22,  78,  67, -38, -90, -13,  82,  61, -46, -88,  -4,  85,  54},
++{67, -54, -78,  38,  85, -22, -90,   4,  90,  13, -88, -31,  82,  46, -73, -61},
++{61, -73, -46,  82,  31, -88, -13,  90,  -4, -90,  22,  85, -38, -78,  54,  67},
++{54, -85,  -4,  88, -46, -61,  82,  13, -90,  38,  67, -78, -22,  90, -31, -73},
++{46, -90,  38,  54, -90,  31,  61, -88,  22,  67, -85,  13,  73, -82,   4,  78},
++{38, -88,  73,  -4, -67,  90, -46, -31,  85, -78,  13,  61, -90,  54,  22, -82},
++{31, -78,  90, -61,   4,  54, -88,  82, -38, -22,  73, -90,  67, -13, -46,  85},
++{22, -61,  85, -90,  73, -38,  -4,  46, -78,  90, -82,  54, -13, -31,  67, -88},
++{13, -38,  61, -78,  88, -90,  85, -73,  54, -31,   4,  22, -46,  67, -82,  90},
++{ 4, -13,  22, -31,  38, -46,  54, -61,  67, -73,  78, -82,  85, -88,  90, -90}
++};
++
++// Code/constants on GPU
++struct GPU
++{
++//  unsigned int qpu_code[QPU_CODE_SIZE];
++    unsigned int vpu_code8[VPU_CODE_SIZE];
++    unsigned int vpu_code10[VPU_CODE_SIZE];
++    short transMatrix2even[16*16*2];
++};
++
++#define WAIT_COUNT_MAX 16
++
++typedef struct trace_time_one_s
++{
++    int count;
++    int64_t start[WAIT_COUNT_MAX];
++    int64_t total[WAIT_COUNT_MAX];
++} trace_time_one_t;
++
++typedef struct trace_time_wait_s
++{
++    unsigned int jcount;
++    int64_t start0;
++    int64_t last_update;
++    trace_time_one_t active;
++    trace_time_one_t wait;
++} trace_time_wait_t;
++
++typedef struct vq_wait_s
++{
++    sem_t sem;
++    struct vq_wait_s * next;
++} vq_wait_t;
++
++#define VQ_WAIT_POOL_SIZE 16
++typedef struct vq_wait_pool_s
++{
++    vq_wait_t * head;
++    vq_wait_t pool[VQ_WAIT_POOL_SIZE];
++} vq_wait_pool_t;
++
++static void vq_wait_pool_init(vq_wait_pool_t * const pool);
++static void vq_wait_pool_deinit(vq_wait_pool_t * const pool);
++
++typedef struct gpu_env_s
++{
++    int open_count;
++    int init_count;
++    int vpu_i_cache_flushed;
++    GPU_MEM_PTR_T qpu_code_gm_ptr;
++    GPU_MEM_PTR_T code_gm_ptr;
++    GPU_MEM_PTR_T dummy_gm_ptr;
++    vq_wait_pool_t wait_pool;
++#if RPI_TRACE_TIME_VPU_QPU_WAIT
++    trace_time_wait_t ttw;
++#endif
++} gpu_env_t;
++
++// Stop more than one thread trying to allocate memory or use the processing resources at once
++static pthread_mutex_t gpu_mutex = PTHREAD_MUTEX_INITIALIZER;
++static gpu_env_t * gpu = NULL;
++
++#if RPI_TRACE_TIME_VPU_QPU_WAIT
++
++static int64_t ns_time(void)
++{
++    struct timespec ts;
++    clock_gettime(CLOCK_MONOTONIC, &ts);
++    return (int64_t)ts.tv_sec * (int64_t)1000000000 + ts.tv_nsec;
++}
++
++
++#define WAIT_TIME_PRINT_PERIOD (int64_t)2000000000
++
++#define T_MS(t) ((unsigned int)((t)/(int64_t)1000000) % 1000U)
++#define T_SEC(t) (unsigned int)((t)/(int64_t)1000000000)
++#define T_ARG(t) T_SEC(t), T_MS(t)
++#define T_FMT "%u.%03u"
++
++static void tto_print(trace_time_one_t * tto, const int64_t now, const int64_t start0, const char * const prefix)
++{
++    // Update totals for levels that are still pending
++    for (int i = 0; i < tto->count; ++i) {
++        tto->total[i] += now - tto->start[i];
++        tto->start[i] = now;
++    }
++
++    printf("%s: Idle:" T_FMT ", 1:" T_FMT ", 2:" T_FMT ", 3:" T_FMT ", 4:" T_FMT "\n",
++         prefix,
++         T_ARG(now - start0 - tto->total[0]),
++         T_ARG(tto->total[0]),
++         T_ARG(tto->total[1]),
++         T_ARG(tto->total[2]),
++         T_ARG(tto->total[3]));
++}
++
++
++static void tto_start(trace_time_one_t * const tto, const int64_t now)
++{
++    av_assert0(tto->count < WAIT_COUNT_MAX);
++    tto->start[tto->count++] = now;
++}
++
++static void tto_end(trace_time_one_t * const tto, const int64_t now)
++{
++    const int n = --tto->count;
++    av_assert0(n >= 0);
++    tto->total[n] += now - tto->start[n];
++}
++
++static void ttw_print(trace_time_wait_t * const ttw, const int64_t now)
++{
++    printf("Jobs:%d, Total time=" T_FMT "\n", ttw->jcount, T_ARG(now - ttw->start0));
++    tto_print(&ttw->active, now, ttw->start0, "Active");
++    tto_print(&ttw->wait,   now, ttw->start0, "  Wait");
++}
++
++#endif
++
++// GPU memory alloc fns (internal)
++
++static void gpu_free_internal(GPU_MEM_PTR_T * const p)
++{
++    if (p->arm != NULL)
++        vcsm_unlock_ptr(p->arm);
++    if (p->vcsm_handle != 0)
++        vcsm_free(p->vcsm_handle);
++    memset(p, 0, sizeof(*p));  // Ensure we crash hard if we try and use this again
++}
++
++
++static int gpu_malloc_internal(GPU_MEM_PTR_T * const p,
++    const int numbytes, const unsigned int cache_type, const char * const name)
++{
++    memset(p, 0, sizeof(*p));
++    p->numbytes = (numbytes + 255) & ~255;  // Round up
++
++    if ((p->vcsm_handle = vcsm_malloc_cache(p->numbytes, cache_type | 0x80, (char *)name)) == 0 ||
++        (p->vc_handle = vcsm_vc_hdl_from_hdl(p->vcsm_handle)) == 0 ||
++        (p->arm = vcsm_lock(p->vcsm_handle)) == NULL ||
++        (p->vc = vcsm_vc_addr_from_hdl(p->vcsm_handle)) == 0)
++    {
++        gpu_free_internal(p);
++        return AVERROR(ENOMEM);
++    }
++    return 0;
++}
++
++
++// GPU init, free, lock, unlock
++
++static void gpu_term(void)
++{
++    gpu_env_t * const ge = gpu;
++
++    // We have to hope that eveything has terminated...
++    gpu = NULL;
++
++    vc_gpuserv_deinit();
++
++    gpu_free_internal(&ge->code_gm_ptr);
++    gpu_free_internal(&ge->qpu_code_gm_ptr);
++    gpu_free_internal(&ge->dummy_gm_ptr);
++
++    vcsm_exit();
++
++    vq_wait_pool_deinit(&ge->wait_pool);
++
++    free(ge);
++}
++
++
++// Connect to QPU, returns 0 on success.
++static int gpu_init(gpu_env_t ** const gpu) {
++    volatile struct GPU* ptr;
++    gpu_env_t * const ge = calloc(1, sizeof(gpu_env_t));
++    int rv;
++    *gpu = NULL;
++
++    if (ge == NULL)
++        return -1;
++
++    vq_wait_pool_init(&ge->wait_pool);
++
++    vcsm_init();
++
++    // Now copy over the QPU code into GPU memory
++    if ((rv = gpu_malloc_internal(&ge->qpu_code_gm_ptr, QPU_CODE_SIZE * 4, VCSM_CACHE_TYPE_NONE, "ffmpeg qpu code")) != 0)
++      return rv;
++
++    {
++        int num_bytes = (char *)mc_end - (char *)ff_hevc_rpi_shader;
++        av_assert0(num_bytes<=QPU_CODE_SIZE*sizeof(unsigned int));
++        memcpy(ge->qpu_code_gm_ptr.arm, ff_hevc_rpi_shader, num_bytes);
++        memset(ge->qpu_code_gm_ptr.arm + num_bytes, 0, QPU_CODE_SIZE*4 - num_bytes);
++    }
++
++    // And the VPU code
++    if ((rv = gpu_malloc_internal(&ge->code_gm_ptr, sizeof(struct GPU), VCSM_CACHE_TYPE_VC, "ffmpeg vpu code")) != 0)
++        return rv;
++    ptr = (volatile struct GPU*)ge->code_gm_ptr.arm;
++
++    // Zero everything so we have zeros between the code bits
++    memset((void *)ptr, 0, sizeof(*ptr));
++    {
++        int num_bytes = sizeof(rpi_hevc_transform8);
++        av_assert0(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int));
++        memcpy((void*)ptr->vpu_code8, rpi_hevc_transform8, num_bytes);
++    }
++    {
++        int num_bytes = sizeof(rpi_hevc_transform10);
++        av_assert0(num_bytes<=VPU_CODE_SIZE*sizeof(unsigned int));
++        memcpy((void*)ptr->vpu_code10, rpi_hevc_transform10, num_bytes);
++    }
++    // And the transform coefficients
++    memcpy((void*)ptr->transMatrix2even, rpi_transMatrix2even, sizeof(rpi_transMatrix2even));
++
++    // Generate a dummy "frame" & fill with 0x80
++    // * Could reset to 1 <<bit_depth?
++    if ((rv = gpu_malloc_internal(&ge->dummy_gm_ptr, 0x4000, VCSM_CACHE_TYPE_NONE, "ffmpeg dummy frame")) != 0)
++        return rv;
++    memset(ge->dummy_gm_ptr.arm, 0x80, 0x4000);
++
++    *gpu = ge;
++    return 0;
++}
++
++
++
++static void gpu_unlock(void) {
++    pthread_mutex_unlock(&gpu_mutex);
++}
++
++// Make sure we have exclusive access to the mailbox, and enable qpu if necessary.
++static gpu_env_t * gpu_lock(void) {
++    pthread_mutex_lock(&gpu_mutex);
++
++    av_assert1(gpu != NULL);
++    return gpu;
++}
++
++static gpu_env_t * gpu_lock_ref(void)
++{
++    pthread_mutex_lock(&gpu_mutex);
++
++    if (gpu == NULL) {
++        int rv = gpu_init(&gpu);
++        if (rv != 0) {
++            gpu_unlock();
++            return NULL;
++        }
++    }
++
++    ++gpu->open_count;
++    return gpu;
++}
++
++static void gpu_unlock_unref(gpu_env_t * const ge)
++{
++    if (--ge->open_count == 0)
++        gpu_term();
++
++    gpu_unlock();
++}
++
++static inline gpu_env_t * gpu_ptr(void)
++{
++    av_assert1(gpu != NULL);
++    return gpu;
++}
++
++unsigned int vpu_get_fn(const unsigned int bit_depth) {
++  uint32_t a = 0;
++
++  // Make sure that the gpu is initialized
++  av_assert1(gpu != NULL);
++  switch (bit_depth){
++    case 8:
++      a = gpu->code_gm_ptr.vc + offsetof(struct GPU, vpu_code8);
++      break;
++    case 10:
++      a = gpu->code_gm_ptr.vc + offsetof(struct GPU, vpu_code10);
++      break;
++    default:
++      av_assert0(0);
++  }
++  return a;
++}
++
++unsigned int vpu_get_constants(void) {
++  av_assert1(gpu != NULL);
++  return (gpu->code_gm_ptr.vc + offsetof(struct GPU,transMatrix2even));
++}
++
++void gpu_ref(void)
++{
++  gpu_lock_ref();
++  gpu_unlock();
++}
++
++void gpu_unref(void)
++{
++  gpu_env_t * const ge = gpu_lock();
++  gpu_unlock_unref(ge);
++}
++
++// ----------------------------------------------------------------------------
++
++
++// Wait abstractions - mostly so we can easily add profile code
++static void vq_wait_pool_init(vq_wait_pool_t * const wp)
++{
++  unsigned int i;
++  for (i = 0; i != VQ_WAIT_POOL_SIZE; ++i) {
++    sem_init(&wp->pool[i].sem, 0, 0);
++    wp->pool[i].next = wp->pool + i + 1;
++  }
++  wp->head = wp->pool + 0;
++  wp->pool[VQ_WAIT_POOL_SIZE - 1].next = NULL;
++}
++
++static void vq_wait_pool_deinit(vq_wait_pool_t * const wp)
++{
++  unsigned int i;
++  wp->head = NULL;
++  for (i = 0; i != VQ_WAIT_POOL_SIZE; ++i) {
++    sem_destroy(&wp->pool[i].sem);
++    wp->pool[i].next = NULL;
++  }
++}
++
++
++// If sem_init actually takes time then maybe we want a pool...
++static vq_wait_t * vq_wait_new(void)
++{
++  gpu_env_t * const ge = gpu_lock_ref();
++  vq_wait_t * const wait = ge->wait_pool.head;
++  ge->wait_pool.head = wait->next;
++  wait->next = NULL;
++
++#if RPI_TRACE_TIME_VPU_QPU_WAIT
++  tto_start(&ge->ttw.active, ns_time());
++#endif
++
++  gpu_unlock();
++  return wait;
++}
++
++static void vq_wait_delete(vq_wait_t * const wait)
++{
++  gpu_env_t * const ge = gpu_lock();
++  wait->next = ge->wait_pool.head;
++  ge->wait_pool.head = wait;
++
++#if RPI_TRACE_TIME_VPU_QPU_WAIT
++  {
++    trace_time_wait_t * const ttw = &ge->ttw;
++    const int64_t now = ns_time();
++    ++ttw->jcount;
++    tto_end(&ttw->wait, now);
++
++    if (ttw->start0 == 0)
++    {
++      ttw->start0 = ttw->active.start[0];
++      ttw->last_update = ttw->start0;
++    }
++    if (now - ttw->last_update > WAIT_TIME_PRINT_PERIOD)
++    {
++      ttw->last_update += WAIT_TIME_PRINT_PERIOD;
++      ttw_print(ttw, now);
++    }
++  }
++#endif
++  gpu_unlock_unref(ge);
++}
++
++static void vq_wait_wait(vq_wait_t * const wait)
++{
++#if RPI_TRACE_TIME_VPU_QPU_WAIT
++  {
++      const int64_t now = ns_time();
++      gpu_env_t * const ge = gpu_lock();
++      tto_start(&ge->ttw.wait, now);
++      gpu_unlock();
++  }
++#endif
++
++  while (sem_wait(&wait->sem) == -1 && errno == EINTR)
++    /* loop */;
++}
++
++static void vq_wait_post(vq_wait_t * const wait)
++{
++#if RPI_TRACE_TIME_VPU_QPU_WAIT
++  {
++    gpu_env_t *const ge = gpu_lock();
++    tto_end(&ge->ttw.active, ns_time());
++    gpu_unlock();
++  }
++#endif
++
++  sem_post(&wait->sem);
++}
++
++
++
++// Header comments were wrong for these two
++#define VPU_QPU_MASK_QPU  1
++#define VPU_QPU_MASK_VPU  2
++
++typedef struct vpu_qpu_job_env_s vpu_qpu_job_env_t;
++
++vpu_qpu_job_env_t * vpu_qpu_job_init(vpu_qpu_job_env_t * const buf)
++{
++//  vpu_qpu_job_env_t * vqj = calloc(1, sizeof(vpu_qpu_job_env_t));
++  vpu_qpu_job_env_t * vqj = buf;
++//  memset(vqj, 0, sizeof(*vqj));
++  vqj->n = 0;
++  vqj->mask = 0;
++  return vqj;
++}
++
++void vpu_qpu_job_delete(vpu_qpu_job_env_t * const vqj)
++{
++//  memset(vqj, 0, sizeof(*vqj));
++//  free(vqj);
++}
++
++static inline struct gpu_job_s * new_job(vpu_qpu_job_env_t * const vqj)
++{
++  struct gpu_job_s * const j = vqj->j + vqj->n++;
++  av_assert1(vqj->n <= VPU_QPU_JOB_MAX);
++  return j;
++}
++
++void vpu_qpu_job_add_vpu(vpu_qpu_job_env_t * const vqj, const uint32_t vpu_code,
++  const unsigned r0, const unsigned r1, const unsigned r2, const unsigned r3, const unsigned r4, const unsigned r5)
++{
++  if (vpu_code != 0) {
++    struct gpu_job_s *const j = new_job(vqj);
++    vqj->mask |= VPU_QPU_MASK_VPU;
++
++    j->command = EXECUTE_VPU;
++    j->callback.func = 0;
++    j->callback.cookie = NULL;
++    // The bottom two bits of the execute address contain no-flush flags
++    // b0 will flush the VPU I-cache if unset so we nearly always want that set
++    // as we never reload code
++    j->u.v.q[0] = vpu_code | gpu->vpu_i_cache_flushed;
++    j->u.v.q[1] = r0;
++    j->u.v.q[2] = r1;
++    j->u.v.q[3] = r2;
++    j->u.v.q[4] = r3;
++    j->u.v.q[5] = r4;
++    j->u.v.q[6] = r5;
++    gpu->vpu_i_cache_flushed = 1;
++  }
++}
++
++// flags are QPU_FLAGS_xxx
++void vpu_qpu_job_add_qpu(vpu_qpu_job_env_t * const vqj, const unsigned int n, const uint32_t * const mail)
++{
++  if (n != 0) {
++    struct gpu_job_s *const j = new_job(vqj);
++    vqj->mask |= VPU_QPU_MASK_QPU;
++
++    j->command = EXECUTE_QPU;
++    j->callback.func = 0;
++    j->callback.cookie = NULL;
++
++    j->u.q.jobs = n;
++#if RPI_TRACE_QPU_PROFILE_ALL
++    j->u.q.noflush = QPU_FLAGS_NO_FLUSH_VPU | QPU_FLAGS_PROF_CLEAR_AND_ENABLE | QPU_FLAGS_PROF_OUTPUT_COUNTS;
++#else
++    j->u.q.noflush = QPU_FLAGS_NO_FLUSH_VPU;
++#endif
++    j->u.q.timeout = 5000;
++    memcpy(j->u.q.control, mail, n * QPU_MAIL_EL_VALS * sizeof(uint32_t));
++  }
++}
++
++// Convert callback to sem post
++static void vpu_qpu_job_callback_wait(void * v)
++{
++  vq_wait_post(v);
++}
++
++// Poke a user-supplied sem
++static void vpu_qpu_job_callback_sem(void * v)
++{
++  sem_post((sem_t *)v);
++}
++
++void vpu_qpu_job_add_sync_this(vpu_qpu_job_env_t * const vqj, vpu_qpu_wait_h * const wait_h)
++{
++  vq_wait_t * wait;
++
++  if (vqj->mask == 0) {
++    *wait_h = NULL;
++    return;
++  }
++
++  // We are going to want a sync object
++  wait = vq_wait_new();
++
++  // There are 2 VPU Qs & 1 QPU Q so we can collapse sync
++  // If we only posted one thing or only QPU jobs
++  if (vqj->n == 1 || vqj->mask == VPU_QPU_MASK_QPU)
++  {
++    struct gpu_job_s * const j = vqj->j + (vqj->n - 1);
++    av_assert1(j->callback.func == 0);
++
++    j->callback.func = vpu_qpu_job_callback_wait;
++    j->callback.cookie = wait;
++  }
++  else
++  {
++    struct gpu_job_s *const j = new_job(vqj);
++
++    j->command = EXECUTE_SYNC;
++    j->u.s.mask = vqj->mask;
++    j->callback.func = vpu_qpu_job_callback_wait;
++    j->callback.cookie = wait;
++  }
++
++  vqj->mask = 0;
++  *wait_h = wait;
++}
++
++// Returns 0 if no sync added ('cos Q empty), 1 if sync added
++int vpu_qpu_job_add_sync_sem(vpu_qpu_job_env_t * const vqj, sem_t * const sem)
++{
++  // If nothing on q then just return
++  if (vqj->mask == 0)
++    return 0;
++
++  // There are 2 VPU Qs & 1 QPU Q so we can collapse sync
++  // If we only posted one thing or only QPU jobs
++  if (vqj->n == 1 || vqj->mask == VPU_QPU_MASK_QPU)
++  {
++    struct gpu_job_s * const j = vqj->j + (vqj->n - 1);
++    av_assert1(j->callback.func == 0);
++
++    j->callback.func = vpu_qpu_job_callback_sem;
++    j->callback.cookie = sem;
++  }
++  else
++  {
++    struct gpu_job_s *const j = new_job(vqj);
++
++    j->command = EXECUTE_SYNC;
++    j->u.s.mask = vqj->mask;
++    j->callback.func = vpu_qpu_job_callback_sem;
++    j->callback.cookie = sem;
++  }
++
++  vqj->mask = 0;
++  return 1;
++}
++
++
++int vpu_qpu_job_start(vpu_qpu_job_env_t * const vqj)
++{
++  if (vqj->n == 0)
++    return 0;
++
++  return vc_gpuserv_execute_code(vqj->n, vqj->j);
++}
++
++// Simple wrapper of start + delete
++int vpu_qpu_job_finish(vpu_qpu_job_env_t * const vqj)
++{
++  int rv;
++  rv = vpu_qpu_job_start(vqj);
++  vpu_qpu_job_delete(vqj);
++  return rv;
++}
++
++void vpu_qpu_wait(vpu_qpu_wait_h * const wait_h)
++{
++  if (wait_h != NULL)
++  {
++    vq_wait_t * const wait = *wait_h;
++    if (wait != NULL) {
++      *wait_h = NULL;
++      vq_wait_wait(wait);
++      vq_wait_delete(wait);
++    }
++  }
++}
++
++int vpu_qpu_init()
++{
++  gpu_env_t * const ge = gpu_lock_ref();
++  if (ge == NULL)
++    return -1;
++
++  if (ge->init_count++ == 0)
++  {
++    vc_gpuserv_init();
++  }
++
++  gpu_unlock();
++  return 0;
++}
++
++void vpu_qpu_term()
++{
++  gpu_env_t * const ge = gpu_lock();
++
++  if (--ge->init_count == 0) {
++    vc_gpuserv_deinit();
++
++#if RPI_TRACE_TIME_VPU_QPU_WAIT
++    ttw_print(&ge->ttw, ns_time());
++#endif
++  }
++
++  gpu_unlock_unref(ge);
++}
++
++uint32_t qpu_fn(const int * const mc_fn)
++{
++  return gpu->qpu_code_gm_ptr.vc + ((const char *)mc_fn - (const char *)ff_hevc_rpi_shader);
++}
++
++uint32_t qpu_dummy(void)
++{
++  return gpu->dummy_gm_ptr.vc;
++}
++
++int rpi_hevc_qpu_init_fn(HEVCRpiQpu * const qf, const unsigned int bit_depth)
++{
++  // Dummy values we can catch with emulation
++  qf->y_pxx = ~1U;
++  qf->y_bxx = ~2U;
++  qf->y_p00 = ~3U;
++  qf->y_b00 = ~4U;
++  qf->c_pxx = ~5U;
++  qf->c_bxx = ~6U;
++
++  switch (bit_depth) {
++    case 8:
++      qf->y_pxx = qpu_fn(mc_filter_y_pxx);
++      qf->y_pxx = qpu_fn(mc_filter_y_pxx);
++      qf->y_bxx = qpu_fn(mc_filter_y_bxx);
++      qf->y_p00 = qpu_fn(mc_filter_y_p00);
++      qf->y_b00 = qpu_fn(mc_filter_y_b00);
++      qf->c_pxx = qpu_fn(mc_filter_c_p);
++      qf->c_pxx_l1 = qpu_fn(mc_filter_c_p_l1);
++      qf->c_bxx = qpu_fn(mc_filter_c_b);
++      break;
++    case 10:
++      qf->c_pxx = qpu_fn(mc_filter_c10_p);
++      qf->c_pxx_l1 = qpu_fn(mc_filter_c10_p_l1);
++      qf->c_bxx = qpu_fn(mc_filter_c10_b);
++      qf->y_pxx = qpu_fn(mc_filter_y10_pxx);
++      qf->y_bxx = qpu_fn(mc_filter_y10_bxx);
++      qf->y_p00 = qpu_fn(mc_filter_y10_p00);
++      qf->y_b00 = qpu_fn(mc_filter_y10_b00);
++      break;
++    default:
++      return -1;
++  }
++  return 0;
++}
++
+--- /dev/null
++++ b/libavcodec/rpi_qpu.h
+@@ -0,0 +1,103 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++#ifndef RPI_QPU_H
++#define RPI_QPU_H
++
++#include "rpi_mem.h"
++#include "rpi_zc_frames.h"
++
++#pragma GCC diagnostic push
++// Many many redundant decls in the header files
++#pragma GCC diagnostic ignored "-Wredundant-decls"
++#pragma GCC diagnostic ignored "-Wstrict-prototypes"
++#include "interface/vmcs_host/vc_vchi_gpuserv.h"  // for gpu_job_s
++#pragma GCC diagnostic pop
++
++// QPU specific functions
++
++typedef struct HEVCRpiQpu {
++    uint32_t c_pxx;
++    uint32_t c_pxx_l1;
++    uint32_t c_bxx;
++    uint32_t y_pxx;
++    uint32_t y_bxx;
++    uint32_t y_p00;
++    uint32_t y_b00;
++} HEVCRpiQpu;
++
++int rpi_hevc_qpu_init_fn(HEVCRpiQpu * const qf, const unsigned int bit_depth);
++
++uint32_t qpu_fn(const int * const mc_fn);
++uint32_t qpu_dummy(void);
++
++#define QPU_N_GRP    4
++#define QPU_N_MAX    12
++
++#define QPU_MAIL_EL_VALS  2
++
++struct vpu_qpu_wait_s;
++typedef struct vq_wait_s * vpu_qpu_wait_h;
++
++// VPU specific functions
++
++struct vpu_qpu_job_env_s;
++typedef struct vpu_qpu_job_env_s * vpu_qpu_job_h;
++
++#define VPU_QPU_JOB_MAX 4
++struct vpu_qpu_job_env_s
++{
++  unsigned int n;
++  unsigned int mask;
++  struct gpu_job_s j[VPU_QPU_JOB_MAX];
++};
++typedef struct vpu_qpu_job_env_s vpu_qpu_job_env_t;
++
++vpu_qpu_job_h vpu_qpu_job_init(vpu_qpu_job_env_t * const buf);
++void vpu_qpu_job_delete(const vpu_qpu_job_h vqj);
++void vpu_qpu_job_add_vpu(const vpu_qpu_job_h vqj, const uint32_t vpu_code,
++  const unsigned r0, const unsigned r1, const unsigned r2, const unsigned r3, const unsigned r4, const unsigned r5);
++void vpu_qpu_job_add_qpu(const vpu_qpu_job_h vqj, const unsigned int n, const uint32_t * const mail);
++void vpu_qpu_job_add_sync_this(const vpu_qpu_job_h vqj, vpu_qpu_wait_h * const wait_h);
++int vpu_qpu_job_add_sync_sem(vpu_qpu_job_env_t * const vqj, sem_t * const sem);
++int vpu_qpu_job_start(const vpu_qpu_job_h vqj);
++int vpu_qpu_job_finish(const vpu_qpu_job_h vqj);
++
++extern unsigned int vpu_get_fn(const unsigned int bit_depth);
++extern unsigned int vpu_get_constants(void);
++
++// Waits for previous post_codee to complete and Will null out *wait_h after use
++void vpu_qpu_wait(vpu_qpu_wait_h * const wait_h);
++int vpu_qpu_init(void);
++void vpu_qpu_term(void);
++
++void gpu_ref(void);
++void gpu_unref(void);
++
++#endif
+--- /dev/null
++++ b/libavcodec/rpi_zc.c
+@@ -0,0 +1,1227 @@
++#include "config.h"
++
++#include "libavcodec/avcodec.h"
++#include "rpi_mem.h"
++#include "rpi_mailbox.h"
++#include "rpi_zc.h"
++#include "libavutil/avassert.h"
++#include <pthread.h>
++
++#include "libavutil/buffer_internal.h"
++
++#pragma GCC diagnostic push
++// Many many redundant decls in the header files
++#pragma GCC diagnostic ignored "-Wredundant-decls"
++#include <interface/vctypes/vc_image_types.h>
++#include <interface/vcsm/user-vcsm.h>
++#pragma GCC diagnostic pop
++
++#define TRACE_ALLOC 0
++#define DEBUG_ALWAYS_KEEP_LOCKED 0
++
++struct ZcPoolEnt;
++
++typedef struct ZcPool
++{
++    size_t numbytes;
++    struct ZcPoolEnt * head;
++    pthread_mutex_t lock;
++} ZcPool;
++
++typedef struct ZcPoolEnt
++{
++    size_t numbytes;
++
++    unsigned int vcsm_handle;
++    unsigned int vc_handle;
++    void * map_arm;
++    unsigned int map_vc;
++
++    struct ZcPoolEnt * next;
++    struct ZcPool * pool;
++} ZcPoolEnt;
++
++typedef struct ZcOldCtxVals
++{
++    int thread_safe_callbacks;
++    int (*get_buffer2)(struct AVCodecContext *s, AVFrame *frame, int flags);
++    void * opaque;
++} ZcOldCtxVals;
++
++typedef struct AVZcEnv
++{
++    unsigned int refcount;
++    ZcOldCtxVals old;
++
++    void * pool_env;
++    av_rpi_zc_alloc_buf_fn_t * alloc_buf;
++    av_rpi_zc_free_pool_fn_t * free_pool;
++
++    unsigned int pool_size;
++} ZcEnv;
++
++typedef struct ZcUserBufEnv {
++    void * v;
++    const av_rpi_zc_buf_fn_tab_t * fn;
++    size_t numbytes;
++    int offset;
++} ZcUserBufEnv;
++
++#define ZC_BUF_INVALID  0
++#define ZC_BUF_VALID    1
++#define ZC_BUF_NEVER    2
++
++typedef struct ZcBufEnv {
++    GPU_MEM_PTR_T gmem;
++    AVZcEnvPtr zc;
++    int is_valid;
++    AVBufferRef * user;
++    AVRpiZcFrameGeometry geo;
++    size_t size_y;
++    size_t size_c;
++    size_t size_pic;
++    ssize_t offset;
++    pthread_mutex_t lock;
++    pthread_cond_t cond;
++} ZcBufEnv;
++
++
++
++
++
++
++#define ALLOC_PAD       0
++#define ALLOC_ROUND     0x1000
++#define STRIDE_ROUND    64
++#define STRIDE_OR       0
++
++#define DEBUG_ZAP0_BUFFERS 0
++
++static inline int av_rpi_is_sand_format(const int format)
++{
++    return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_SAND64_16) ||
++        (format == AV_PIX_FMT_RPI4_8 || format == AV_PIX_FMT_RPI4_10);
++}
++
++static inline int av_rpi_is_sand_frame(const AVFrame * const frame)
++{
++    return av_rpi_is_sand_format(frame->format);
++}
++
++//----------------------------------------------------------------------------
++//
++// Internal pool stuff
++
++// Pool entry functions
++
++static ZcPoolEnt * zc_pool_ent_alloc(ZcPool * const pool, const size_t req_size)
++{
++    ZcPoolEnt * const zp = av_mallocz(sizeof(ZcPoolEnt));
++
++    // Round up to 4k & add 4k
++    const unsigned int alloc_size = (req_size + ALLOC_PAD + ALLOC_ROUND - 1) & ~(ALLOC_ROUND - 1);
++
++    if (zp == NULL) {
++        av_log(NULL, AV_LOG_ERROR, "av_malloc(ZcPoolEnt) failed\n");
++        goto fail0;
++    }
++
++    // The 0x80 here maps all pages here rather than waiting for lazy mapping
++    // BEWARE that in GPU land a later unlock/lock pair will put us back into
++    // lazy mode - which will also break cache invalidate calls.
++    if ((zp->vcsm_handle = vcsm_malloc_cache(alloc_size, VCSM_CACHE_TYPE_HOST | 0x80, "ffmpeg_rpi_zc")) == 0)
++    {
++        av_log(NULL, AV_LOG_ERROR, "av_gpu_malloc_cached(%d) failed\n", alloc_size);
++        goto fail1;
++    }
++
++#if TRACE_ALLOC
++    printf("%s: Alloc %#x bytes @ h=%d\n", __func__, alloc_size, zp->vcsm_handle);
++#endif
++
++    zp->numbytes = alloc_size;
++    zp->pool = pool;
++    return zp;
++
++fail1:
++    av_free(zp);
++fail0:
++    return NULL;
++}
++
++static void zc_pool_ent_free(ZcPoolEnt * const zp)
++{
++#if TRACE_ALLOC
++    printf("%s: Free %#x bytes @ h=%d\n", __func__, zp->numbytes, zp->vcsm_handle);
++#endif
++
++    if (zp->vcsm_handle != 0)
++    {
++        // VC addr & handle need no dealloc
++        if (zp->map_arm != NULL)
++            vcsm_unlock_hdl(zp->vcsm_handle);
++        vcsm_free(zp->vcsm_handle);
++    }
++    av_free(zp);
++}
++
++//----------------------------------------------------------------------------
++//
++// Pool functions
++
++static void zc_pool_free_ent_list(ZcPoolEnt * p)
++{
++    while (p != NULL)
++    {
++        ZcPoolEnt * const zp = p;
++        p = p->next;
++        zc_pool_ent_free(zp);
++    }
++}
++
++static void zc_pool_flush(ZcPool * const pool)
++{
++    ZcPoolEnt * p = pool->head;
++    pool->head = NULL;
++    pool->numbytes = ~0U;
++    zc_pool_free_ent_list(p);
++}
++
++static ZcPoolEnt * zc_pool_get_ent(ZcPool * const pool, const size_t req_bytes)
++{
++    ZcPoolEnt * zp = NULL;
++    ZcPoolEnt * flush_list = NULL;
++    size_t numbytes;
++
++    pthread_mutex_lock(&pool->lock);
++
++    numbytes = pool->numbytes;
++
++    // If size isn't close then dump the pool
++    // Close in this context means within 128k
++    if (req_bytes > numbytes || req_bytes + 0x20000 < numbytes)
++    {
++        flush_list = pool->head;
++        pool->head = NULL;
++        pool->numbytes = numbytes = req_bytes;
++    }
++    else if (pool->head != NULL)
++    {
++        zp = pool->head;
++        pool->head = zp->next;
++    }
++
++    pthread_mutex_unlock(&pool->lock);
++
++    zc_pool_free_ent_list(flush_list);
++
++    if (zp == NULL)
++        zp = zc_pool_ent_alloc(pool, numbytes);
++
++    return zp;
++}
++
++static void zc_pool_put_ent(ZcPoolEnt * const zp)
++{
++    ZcPool * const pool = zp == NULL ? NULL : zp->pool;
++    if (zp != NULL)
++    {
++        pthread_mutex_lock(&pool->lock);
++#if TRACE_ALLOC
++        printf("%s: Recycle %#x, %#x\n", __func__, pool->numbytes, zp->numbytes);
++#endif
++
++        if (pool->numbytes == zp->numbytes)
++        {
++            zp->next = pool->head;
++            pool->head = zp;
++            pthread_mutex_unlock(&pool->lock);
++        }
++        else
++        {
++            pthread_mutex_unlock(&pool->lock);
++            zc_pool_ent_free(zp);
++        }
++    }
++}
++
++static ZcPool *
++zc_pool_new(void)
++{
++    ZcPool * const pool = av_mallocz(sizeof(*pool));
++    if (pool == NULL)
++        return NULL;
++
++    pool->numbytes = -1;
++    pool->head = NULL;
++    pthread_mutex_init(&pool->lock, NULL);
++    return pool;
++}
++
++static void
++zc_pool_delete(ZcPool * const pool)
++{
++    if (pool != NULL)
++    {
++        pool->numbytes = -1;
++        zc_pool_flush(pool);
++        pthread_mutex_destroy(&pool->lock);
++        av_free(pool);
++    }
++}
++
++//============================================================================
++//
++// ZC implementation using above pool implementation
++//
++// Fn table fns...
++
++static void zc_pool_free_v(void * v)
++{
++    zc_pool_put_ent(v);
++}
++
++static unsigned int zc_pool_ent_vcsm_handle_v(void * v)
++{
++    ZcPoolEnt * zp = v;
++    return zp->vcsm_handle;
++}
++
++static unsigned int zc_pool_ent_vc_handle_v(void * v)
++{
++    ZcPoolEnt * zp = v;
++    if (zp->vc_handle == 0)
++    {
++        if ((zp->vc_handle = vcsm_vc_hdl_from_hdl(zp->vcsm_handle)) == 0)
++            av_log(NULL, AV_LOG_ERROR, "%s: Failed to map VCSM handle %d to VC handle\n",
++                   __func__, zp->vcsm_handle);
++    }
++    return zp->vc_handle;
++}
++
++static void * zc_pool_ent_map_arm_v(void * v)
++{
++    ZcPoolEnt * zp = v;
++    if (zp->map_arm == NULL)
++    {
++        if ((zp->map_arm = vcsm_lock(zp->vcsm_handle)) == NULL)
++            av_log(NULL, AV_LOG_ERROR, "%s: Failed to map VCSM handle %d to ARM address\n",
++                   __func__, zp->vcsm_handle);
++    }
++    return zp->map_arm;
++}
++
++static unsigned int zc_pool_ent_map_vc_v(void * v)
++{
++    ZcPoolEnt * zp = v;
++    if (zp->map_vc == 0)
++    {
++        if ((zp->map_vc = vcsm_vc_addr_from_hdl(zp->vcsm_handle)) == 0)
++            av_log(NULL, AV_LOG_ERROR, "%s: Failed to map VCSM handle %d to VC address\n",
++                   __func__, zp->vcsm_handle);
++    }
++    return zp->map_vc;
++}
++
++static const av_rpi_zc_buf_fn_tab_t zc_pool_buf_fns = {
++    .free        = zc_pool_free_v,
++    .vcsm_handle = zc_pool_ent_vcsm_handle_v,
++    .vc_handle   = zc_pool_ent_vc_handle_v,
++    .map_arm     = zc_pool_ent_map_arm_v,
++    .map_vc      = zc_pool_ent_map_vc_v,
++};
++
++// ZC Env fns
++
++// Delete pool
++// All buffers guaranteed freed by now
++static void
++zc_pool_delete_v(void * v)
++{
++    zc_pool_delete((ZcPool *)v);
++    rpi_mem_gpu_uninit();
++}
++
++// Allocate a new ZC buffer
++static AVBufferRef *
++zc_pool_buf_alloc(void * v, size_t size, const AVRpiZcFrameGeometry * geo)
++{
++    ZcPool * const pool = v;
++    ZcPoolEnt *const zp = zc_pool_get_ent(pool, size);
++    AVBufferRef * buf;
++
++    (void)geo;  // geo ignored here
++
++    if (zp == NULL) {
++        av_log(NULL, AV_LOG_ERROR, "zc_pool_alloc(%d) failed\n", size);
++        goto fail0;
++    }
++
++    if ((buf = av_rpi_zc_buf(size, 0, zp, &zc_pool_buf_fns)) == NULL)
++    {
++        av_log(NULL, AV_LOG_ERROR, "av_rpi_zc_buf() failed\n");
++        goto fail2;
++    }
++
++    return buf;
++
++fail2:
++    zc_pool_put_ent(zp);
++fail0:
++    return NULL;
++}
++
++// Init wrappers - the public fns
++
++AVZcEnvPtr
++av_rpi_zc_int_env_alloc(void * logctx)
++{
++    ZcEnv * zc;
++    ZcPool * pool_env;
++
++    if (rpi_mem_gpu_init(0) < 0)
++        return NULL;
++
++    if ((pool_env = zc_pool_new()) == NULL)
++        goto fail1;
++
++    if ((zc = av_rpi_zc_env_alloc(logctx, pool_env, zc_pool_buf_alloc, zc_pool_delete_v)) == NULL)
++        goto fail2;
++
++    return zc;
++
++fail2:
++    zc_pool_delete(pool_env);
++fail1:
++    rpi_mem_gpu_uninit();
++    return NULL;
++}
++
++void
++av_rpi_zc_int_env_freep(AVZcEnvPtr * zcp)
++{
++    const AVZcEnvPtr zc = *zcp;
++    *zcp = NULL;
++    if (zc != NULL)
++        av_rpi_zc_env_release(zc);
++}
++
++//============================================================================
++//
++// Geometry
++//
++// This is a separate chunck to the rest
++
++// Get mailbox fd - should be in a lock when called
++// Rely on process close to close it
++static int mbox_fd(void)
++{
++    static int fd = -1;
++    if (fd != -1)
++        return fd;
++    return (fd = mbox_open());
++}
++
++AVRpiZcFrameGeometry av_rpi_zc_frame_geometry(
++    const int format, const unsigned int video_width, const unsigned int video_height)
++{
++    static pthread_mutex_t sand_lock = PTHREAD_MUTEX_INITIALIZER;
++
++    AVRpiZcFrameGeometry geo = {
++        .format       = format,
++        .video_width  = video_width,
++        .video_height = video_height
++    };
++
++    switch (format)
++    {
++        case AV_PIX_FMT_YUV420P:
++            geo.stride_y = ((video_width + 32 + STRIDE_ROUND - 1) & ~(STRIDE_ROUND - 1)) | STRIDE_OR;
++            geo.stride_c = geo.stride_y / 2;
++            geo.height_y = (video_height + 32 + 31) & ~31;
++            geo.height_c = geo.height_y / 2;
++            geo.planes_c = 2;
++            geo.stripes = 1;
++            geo.bytes_per_pel = 1;
++            geo.stripe_is_yc = 1;
++            break;
++
++        case AV_PIX_FMT_YUV420P10:
++            geo.stride_y = ((video_width * 2 + 64 + STRIDE_ROUND - 1) & ~(STRIDE_ROUND - 1)) | STRIDE_OR;
++            geo.stride_c = geo.stride_y / 2;
++            geo.height_y = (video_height + 32 + 31) & ~31;
++            geo.height_c = geo.height_y / 2;
++            geo.planes_c = 2;
++            geo.stripes = 1;
++            geo.bytes_per_pel = 2;
++            geo.stripe_is_yc = 1;
++            break;
++
++        case AV_PIX_FMT_SAND128:
++        case AV_PIX_FMT_RPI4_8:
++        {
++            const unsigned int stripe_w = 128;
++
++            static VC_IMAGE_T img = {0};
++
++            // Given the overhead of calling the mailbox keep a stashed
++            // copy as we will almost certainly just want the same numbers again
++            // but that means we need a lock
++            pthread_mutex_lock(&sand_lock);
++
++            if (img.width != video_width || img.height != video_height)
++            {
++                VC_IMAGE_T new_img = {
++                    .type = VC_IMAGE_YUV_UV,
++                    .width = video_width,
++                    .height = video_height
++                };
++
++                mbox_get_image_params(mbox_fd(), &new_img);
++                img = new_img;
++            }
++
++            geo.stride_y = stripe_w;
++            geo.stride_c = stripe_w;
++            geo.height_y = ((intptr_t)img.extra.uv.u - (intptr_t)img.image_data) / stripe_w;
++            geo.height_c = img.pitch / stripe_w - geo.height_y;
++            geo.stripe_is_yc = 1;
++            if (geo.height_y * stripe_w > img.pitch)
++            {
++                // "tall" sand - all C blocks now follow Y
++                geo.height_y = img.pitch / stripe_w;
++                geo.height_c = geo.height_y;
++                geo.stripe_is_yc = 0;
++            }
++            geo.planes_c = 1;
++            geo.stripes = (video_width + stripe_w - 1) / stripe_w;
++            geo.bytes_per_pel = 1;
++
++            pthread_mutex_unlock(&sand_lock);
++#if 0
++            printf("Req: %dx%d: stride=%d/%d, height=%d/%d, stripes=%d, img.pitch=%d\n",
++                   video_width, video_height,
++                   geo.stride_y, geo.stride_c,
++                   geo.height_y, geo.height_c,
++                   geo.stripes, img.pitch);
++#endif
++            av_assert0((int)geo.height_y > 0 && (int)geo.height_c > 0);
++            av_assert0(geo.height_y >= video_height && geo.height_c >= video_height / 2);
++            break;
++        }
++
++        case AV_PIX_FMT_RPI4_10:
++        {
++            const unsigned int stripe_w = 128;  // bytes
++
++            static pthread_mutex_t sand_lock = PTHREAD_MUTEX_INITIALIZER;
++            static VC_IMAGE_T img = {0};
++
++            // Given the overhead of calling the mailbox keep a stashed
++            // copy as we will almost certainly just want the same numbers again
++            // but that means we need a lock
++            pthread_mutex_lock(&sand_lock);
++
++            if (img.width != video_width || img.height != video_height)
++            {
++                VC_IMAGE_T new_img = {
++                    .type = VC_IMAGE_YUV10COL,
++                    .width = video_width,
++                    .height = video_height
++                };
++
++                mbox_get_image_params(mbox_fd(), &new_img);
++                img = new_img;
++            }
++
++            geo.stride_y = stripe_w;
++            geo.stride_c = stripe_w;
++            geo.height_y = ((intptr_t)img.extra.uv.u - (intptr_t)img.image_data) / stripe_w;
++            geo.height_c = img.pitch / stripe_w - geo.height_y;
++            geo.planes_c = 1;
++            geo.stripes = ((video_width * 4 + 2) / 3 + stripe_w - 1) / stripe_w;
++            geo.bytes_per_pel = 1;
++            geo.stripe_is_yc = 1;
++
++            pthread_mutex_unlock(&sand_lock);
++
++#if 0
++            printf("Req: %dx%d: stride=%d/%d, height=%d/%d, stripes=%d, img.pitch=%d\n",
++                   video_width, video_height,
++                   geo.stride_y, geo.stride_c,
++                   geo.height_y, geo.height_c,
++                   geo.stripes, img.pitch);
++#endif
++            av_assert0((int)geo.height_y > 0 && (int)geo.height_c > 0);
++            av_assert0(geo.height_y >= video_height && geo.height_c >= video_height / 2);
++            break;
++        }
++
++        case AV_PIX_FMT_SAND64_16:
++        case AV_PIX_FMT_SAND64_10:
++        {
++            const unsigned int stripe_w = 128;  // bytes
++
++            static pthread_mutex_t sand_lock = PTHREAD_MUTEX_INITIALIZER;
++            static VC_IMAGE_T img = {0};
++
++            // Given the overhead of calling the mailbox keep a stashed
++            // copy as we will almost certainly just want the same numbers again
++            // but that means we need a lock
++            pthread_mutex_lock(&sand_lock);
++
++             if (img.width != video_width || img.height != video_height)
++            {
++                VC_IMAGE_T new_img = {
++                    .type = VC_IMAGE_YUV_UV_16,
++                    .width = video_width,
++                    .height = video_height
++                };
++
++                mbox_get_image_params(mbox_fd(), &new_img);
++                img = new_img;
++            }
++
++            geo.stride_y = stripe_w;
++            geo.stride_c = stripe_w;
++            geo.height_y = ((intptr_t)img.extra.uv.u - (intptr_t)img.image_data) / stripe_w;
++            geo.height_c = img.pitch / stripe_w - geo.height_y;
++            geo.planes_c = 1;
++            geo.stripes = (video_width * 2 + stripe_w - 1) / stripe_w;
++            geo.bytes_per_pel = 2;
++            geo.stripe_is_yc = 1;
++
++            pthread_mutex_unlock(&sand_lock);
++            break;
++        }
++
++        default:
++            break;
++    }
++    return geo;
++}
++
++//============================================================================
++//
++// ZC Env fns
++//
++// Frame copy fns
++
++static AVBufferRef * zc_copy(const AVZcEnvPtr zc,
++    const AVFrame * const src)
++{
++    AVFrame dest_frame;
++    AVFrame * const dest = &dest_frame;
++    unsigned int i;
++    uint8_t * psrc, * pdest;
++
++    dest->format = src->format;
++    dest->width = src->width;
++    dest->height = src->height;
++
++    if (av_rpi_zc_get_buffer(zc, dest) != 0 ||
++        av_rpi_zc_resolve_frame(dest, ZC_RESOLVE_ALLOC_VALID) != 0)
++    {
++        return NULL;
++    }
++
++    for (i = 0, psrc = src->data[0], pdest = dest->data[0];
++         i != dest->height;
++         ++i, psrc += src->linesize[0], pdest += dest->linesize[0])
++    {
++        memcpy(pdest, psrc, dest->width);
++    }
++    for (i = 0, psrc = src->data[1], pdest = dest->data[1];
++         i != dest->height / 2;
++         ++i, psrc += src->linesize[1], pdest += dest->linesize[1])
++    {
++        memcpy(pdest, psrc, dest->width / 2);
++    }
++    for (i = 0, psrc = src->data[2], pdest = dest->data[2];
++         i != dest->height / 2;
++         ++i, psrc += src->linesize[2], pdest += dest->linesize[2])
++    {
++        memcpy(pdest, psrc, dest->width / 2);
++    }
++
++    return dest->buf[0];
++}
++
++
++static AVBufferRef * zc_420p10_to_sand128(const AVZcEnvPtr zc,
++    const AVFrame * const src)
++{
++    assert(0);
++    return NULL;
++}
++
++
++static AVBufferRef * zc_sand64_16_to_sand128(const AVZcEnvPtr zc,
++    const AVFrame * const src, const unsigned int src_bits)
++{
++    assert(0);
++    return NULL;
++}
++
++//----------------------------------------------------------------------------
++//
++// Public info extraction calls
++
++static void zc_buf_env_free_cb(void * opaque, uint8_t * data);
++
++static inline ZcBufEnv * pic_zbe_ptr(AVBufferRef *const buf)
++{
++    // Kludge where we check the free fn to check this is really
++    // one of our buffers - can't think of a better way
++    return buf == NULL || buf->buffer->free != zc_buf_env_free_cb ? NULL :
++        av_buffer_get_opaque(buf);
++}
++
++static inline GPU_MEM_PTR_T * pic_gm_ptr(AVBufferRef * const buf)
++{
++    // As gmem is the first el NULL should be preserved
++    return &pic_zbe_ptr(buf)->gmem;
++}
++
++unsigned int av_rpi_zc_vcsm_handle(const AVRpiZcRefPtr fr_ref)
++{
++    const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref);
++    return p == NULL ? 0 : p->vcsm_handle;
++}
++
++int av_rpi_zc_vc_handle(const AVRpiZcRefPtr fr_ref)
++{
++    const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref);
++    return p == NULL ? -1 : p->vc_handle;
++}
++
++int av_rpi_zc_offset(const AVRpiZcRefPtr fr_ref)
++{
++    const ZcBufEnv * const zbe = pic_zbe_ptr(fr_ref);
++    return zbe == NULL ? 0 : zbe->offset;
++}
++
++int av_rpi_zc_length(const AVRpiZcRefPtr fr_ref)
++{
++    const ZcBufEnv * const zbe = pic_zbe_ptr(fr_ref);
++    return zbe == NULL ? 0 : zbe->size_pic;
++}
++
++int av_rpi_zc_numbytes(const AVRpiZcRefPtr fr_ref)
++{
++    const GPU_MEM_PTR_T * const p = pic_gm_ptr(fr_ref);
++    return p == NULL ? 0 : p->numbytes;
++}
++
++const AVRpiZcFrameGeometry * av_rpi_zc_geometry(const AVRpiZcRefPtr fr_ref)
++{
++    const ZcBufEnv * const zbe = pic_zbe_ptr(fr_ref);
++    return zbe == NULL ? NULL : &zbe->geo;
++}
++
++AVRpiZcRefPtr av_rpi_zc_ref(void * const logctx, const AVZcEnvPtr zc,
++    const AVFrame * const frame, const enum AVPixelFormat expected_format, const int maycopy)
++{
++    av_assert0(!maycopy || zc != NULL);
++
++    if (frame->format != AV_PIX_FMT_YUV420P &&
++        frame->format != AV_PIX_FMT_YUV420P10 &&
++        !av_rpi_is_sand_frame(frame))
++    {
++        av_log(logctx, AV_LOG_WARNING, "%s: *** Format not SAND/YUV420P: %d\n", __func__, frame->format);
++        return NULL;
++    }
++
++    if (frame->buf[1] != NULL || frame->format != expected_format)
++    {
++#if RPI_ZC_SAND_8_IN_10_BUF
++        if (frame->format == AV_PIX_FMT_SAND64_10 && expected_format == AV_PIX_FMT_SAND128 && frame->buf[RPI_ZC_SAND_8_IN_10_BUF] != NULL)
++        {
++//            av_log(s, AV_LOG_INFO, "%s: --- found buf[4]\n", __func__);
++            return av_buffer_ref(frame->buf[RPI_ZC_SAND_8_IN_10_BUF]);
++        }
++#endif
++
++        if (maycopy)
++        {
++            if (frame->buf[1] != NULL)
++                av_log(logctx, AV_LOG_INFO, "%s: *** Not a single buf frame: copying\n", __func__);
++            else
++                av_log(logctx, AV_LOG_INFO, "%s: *** Unexpected frame format %d: copying to %d\n", __func__, frame->format, expected_format);
++
++            switch (frame->format)
++            {
++                case AV_PIX_FMT_YUV420P10:
++                    return zc_420p10_to_sand128(zc, frame);
++
++                case AV_PIX_FMT_SAND64_10:
++                    return zc_sand64_16_to_sand128(zc, frame, 10);
++
++                default:
++                    return zc_copy(zc, frame);
++            }
++        }
++        else
++        {
++            if (frame->buf[1] != NULL)
++                av_log(logctx, AV_LOG_WARNING, "%s: *** Not a single buf frame: buf[1] != NULL\n", __func__);
++            else
++                av_log(logctx, AV_LOG_INFO, "%s: *** Unexpected frame format: %d != %d\n", __func__, frame->format, expected_format);
++            return NULL;
++        }
++    }
++
++    if (pic_gm_ptr(frame->buf[0]) == NULL)
++    {
++        if (maycopy)
++        {
++            av_log(logctx, AV_LOG_INFO, "%s: *** Not one of our buffers: copying\n", __func__);
++            return zc_copy(zc, frame);
++        }
++        else
++        {
++            av_log(logctx, AV_LOG_WARNING, "%s: *** Not one of our buffers: NULL\n", __func__);
++            return NULL;
++        }
++    }
++
++    return av_buffer_ref(frame->buf[0]);
++}
++
++void av_rpi_zc_unref(AVRpiZcRefPtr fr_ref)
++{
++    if (fr_ref != NULL)
++    {
++        av_buffer_unref(&fr_ref);
++    }
++}
++
++//----------------------------------------------------------------------------
++
++// Extract user environment from an AVBufferRef
++void * av_rpi_zc_buf_v(AVBufferRef * const buf)
++{
++    ZcBufEnv * const zbe = pic_zbe_ptr(buf);
++    if (zbe != NULL && zbe->user != NULL)
++    {
++        const ZcUserBufEnv * const zub = (const ZcUserBufEnv *)zbe->user->data;
++        return zub == NULL ? NULL : zub->v;
++    }
++    return NULL;
++}
++
++// AV buffer pre-free callback
++static void zc_user_buf_free_cb(void * opaque, uint8_t * data)
++{
++    if (opaque != NULL)
++    {
++        ZcUserBufEnv * const zub = opaque;
++
++        if (zub->fn->free)
++            zub->fn->free(zub->v);
++
++        av_free(zub);
++    }
++}
++
++static void zc_buf_env_free_cb(void * opaque, uint8_t * data)
++{
++    if (opaque != NULL)
++    {
++        ZcBufEnv * const zbe = opaque;
++
++        av_buffer_unref(&zbe->user);
++
++        if (zbe->zc != NULL)
++            av_rpi_zc_env_release(zbe->zc);
++
++        pthread_cond_destroy(&zbe->cond);
++        pthread_mutex_destroy(&zbe->lock);
++        av_free(zbe);
++    }
++}
++
++
++// Wrap the various ZC bits in an AV Buffer and resolve those things we want
++// resolved now.
++// Currently we resolve everything, but in future we might not
++AVBufferRef * av_rpi_zc_buf(size_t numbytes, int addr_offset, void * v, const av_rpi_zc_buf_fn_tab_t * fn_tab)
++{
++    AVBufferRef *buf;
++    ZcUserBufEnv * zub;
++
++    if ((zub = av_malloc(sizeof(ZcUserBufEnv))) == NULL)
++        return NULL;
++
++    zub->fn = fn_tab;
++    zub->v = v;
++    zub->numbytes = numbytes;
++    zub->offset = addr_offset;
++
++    if ((buf = av_buffer_create((uint8_t*)zub, sizeof(*zub), zc_user_buf_free_cb, zub, 0)) == NULL)
++    {
++        av_log(NULL, AV_LOG_ERROR, "ZC: Failed av_buffer_create\n");
++        av_free(zub);
++        return NULL;
++    }
++
++    return buf;
++}
++
++int av_rpi_zc_resolve_buffer(AVBufferRef * const buf, const int alloc_mode)
++{
++    ZcBufEnv * const zbe = pic_zbe_ptr(buf);
++
++    if (zbe == NULL)
++        return AVERROR(EINVAL);
++
++    if (alloc_mode == ZC_RESOLVE_FAIL && !zbe->is_valid)
++        return AVERROR(EAGAIN);
++
++    if (alloc_mode == ZC_RESOLVE_WAIT_VALID && !zbe->is_valid)
++    {
++        pthread_mutex_lock(&zbe->lock);
++        while (!zbe->is_valid)
++            pthread_cond_wait(&zbe->cond, &zbe->lock);
++        pthread_mutex_unlock(&zbe->lock);
++    }
++
++    if (zbe->is_valid == ZC_BUF_NEVER)
++        return AVERROR(EINVAL);
++
++    // Do alloc if we need it
++    if (zbe->user == NULL)
++    {
++        ZcEnv * const zc = zbe->zc;
++        const ZcUserBufEnv * zub;
++
++        av_assert0(alloc_mode == ZC_RESOLVE_ALLOC || alloc_mode == ZC_RESOLVE_ALLOC_VALID);
++
++        if ((zbe->user = zc->alloc_buf(zc->pool_env, zbe->size_pic, &zbe->geo)) == NULL)
++        {
++            av_log(NULL, AV_LOG_ERROR, "rpi_get_display_buffer: Failed to get buffer from pool\n");
++            goto fail;
++        }
++        zub = (const ZcUserBufEnv *)zbe->user->data;
++
++        // Track
++
++        zbe->offset = zub->offset;
++        zbe->gmem.numbytes = zub->numbytes;
++        if ((zbe->gmem.arm =  zub->fn->map_arm(zub->v)) == NULL)
++        {
++            av_log(NULL, AV_LOG_ERROR, "ZC: Failed to lock vcsm_handle %u\n", zbe->gmem.vcsm_handle);
++            goto fail;
++        }
++
++        if ((zbe->gmem.vcsm_handle = zub->fn->vcsm_handle(zub->v)) == 0)
++        {
++            av_log(NULL, AV_LOG_ERROR, "ZC: Failed to get vcsm_handle\n");
++            goto fail;
++        }
++
++        if ((zbe->gmem.vc_handle = zub->fn->vc_handle(zub->v)) == 0)
++        {
++            av_log(NULL, AV_LOG_ERROR, "ZC: Failed to get vc handle from vcsm_handle %u\n", zbe->gmem.vcsm_handle);
++            goto fail;
++        }
++        if ((zbe->gmem.vc = zub->fn->map_vc(zub->v)) == 0)
++        {
++            av_log(NULL, AV_LOG_ERROR, "ZC: Failed to get vc addr from vcsm_handle %u\n", zbe->gmem.vcsm_handle);
++            goto fail;
++        }
++
++        buf->buffer->data = zbe->gmem.arm + zbe->offset;
++        buf->buffer->size = zbe->size_pic;
++
++        // In this mode we shouldn't have anyone waiting for us
++        // so no need to signal
++        if (alloc_mode == ZC_RESOLVE_ALLOC_VALID)
++            zbe->is_valid = 1;
++    }
++
++    // Just overwrite - no point in testing
++    buf->data = zbe->gmem.arm + zbe->offset;
++    buf->size = zbe->size_pic;
++    return 0;
++
++fail:
++    av_buffer_unref(&zbe->user);
++    return AVERROR(ENOMEM);
++}
++
++int av_rpi_zc_resolve_frame(AVFrame * const frame, const int may_alloc)
++{
++    int rv;
++
++    // Do alloc if we need it
++    if ((rv = av_rpi_zc_resolve_buffer(frame->buf[0], may_alloc)) != 0)
++        return rv;
++
++    // If we are a framebuf copy then the alloc can be done but we haven't
++    // imported its results yet
++    if (frame->data[0] == NULL)
++    {
++        const ZcBufEnv * const zbe = pic_zbe_ptr(frame->buf[0]);
++
++        frame->linesize[0] = zbe->geo.stride_y;
++        frame->linesize[1] = zbe->geo.stride_c;
++        frame->linesize[2] = zbe->geo.stride_c;
++        // abuse: linesize[3] = "stripe stride"
++        // stripe_stride is NOT the stride between slices it is (that / geo.stride_y).
++        // In a general case this makes the calculation an xor and multiply rather
++        // than a divide and multiply
++        if (zbe->geo.stripes > 1)
++            frame->linesize[3] = zbe->geo.stripe_is_yc ? zbe->geo.height_y + zbe->geo.height_c : zbe->geo.height_y;
++
++        frame->data[0] = frame->buf[0]->data;
++        frame->data[1] = frame->data[0] + (zbe->geo.stripe_is_yc ? zbe->size_y : zbe->size_y * zbe->geo.stripes);
++        if (zbe->geo.planes_c > 1)
++            frame->data[2] = frame->data[1] + zbe->size_c;
++
++        frame->extended_data = frame->data;
++        // Leave extended buf alone
++    }
++
++    return 0;
++}
++
++int av_rpi_zc_set_valid_frame(AVFrame * const frame)
++{
++    ZcBufEnv * const zbe = pic_zbe_ptr(frame->buf[0]);
++
++    if (zbe == NULL)
++        return AVERROR(EINVAL);
++
++    zbe->is_valid = ZC_BUF_VALID;
++    pthread_cond_broadcast(&zbe->cond);
++
++    return 0;
++}
++
++int av_rpi_zc_set_broken_frame(AVFrame * const frame)
++{
++    ZcBufEnv * const zbe = pic_zbe_ptr(frame->buf[0]);
++
++    if (zbe == NULL)
++        return AVERROR(EINVAL);
++
++    zbe->is_valid = ZC_BUF_NEVER;
++    pthread_cond_broadcast(&zbe->cond);
++
++    return 0;
++}
++
++void av_rpi_zc_set_decoder_pool_size(ZcEnv *const zc, const unsigned int pool_size)
++{
++    zc->pool_size = pool_size;
++}
++
++unsigned int av_rpi_zc_get_decoder_pool_size(ZcEnv *const zc)
++{
++    return zc->pool_size;
++}
++
++int av_rpi_zc_get_buffer(ZcEnv *const zc, AVFrame * const frame)
++{
++#if 1
++    ZcBufEnv * zbe = av_mallocz(sizeof(*zbe));
++
++    for (unsigned int i = 0; i < AV_NUM_DATA_POINTERS; i++) {
++        frame->buf[i] = NULL;
++        frame->data[i] = NULL;
++        frame->linesize[i] = 0;
++    }
++
++    if (zbe == NULL)
++        return AVERROR(ENOMEM);
++
++    if ((frame->buf[0] = av_buffer_create((uint8_t *)zbe, sizeof(*zbe), zc_buf_env_free_cb, zbe, 0)) == NULL)
++    {
++        av_free(zbe);
++        return AVERROR(ENOMEM);
++    }
++
++    pthread_mutex_init(&zbe->lock, NULL);
++    pthread_cond_init(&zbe->cond, NULL);
++    zbe->zc = zc;
++    atomic_fetch_add(&zc->refcount, 1);
++
++    zbe->geo = av_rpi_zc_frame_geometry(frame->format, frame->width, frame->height);  // Note geometry for later use
++    zbe->size_y = zbe->geo.stride_y * zbe->geo.height_y;
++    zbe->size_c = zbe->geo.stride_c * zbe->geo.height_c;
++    zbe->size_pic = (zbe->size_y + zbe->size_c * zbe->geo.planes_c) * zbe->geo.stripes;
++
++#else
++    const AVRpiZcFrameGeometry geo = av_rpi_zc_frame_geometry(frame->format, frame->width, frame->height);
++    const unsigned int size_y = geo.stride_y * geo.height_y;
++    const unsigned int size_c = geo.stride_c * geo.height_c;
++    const unsigned int size_pic = (size_y + size_c * geo.planes_c) * geo.stripes;
++    AVBufferRef * buf;
++    unsigned int i;
++
++//    printf("Do local alloc: format=%#x, %dx%d: %u\n", frame->format, frame->width, frame->height, size_pic);
++
++    if ((buf = zc->alloc_buf(zc->pool_env, size_pic, &geo)) == NULL)
++    {
++        av_log(NULL, AV_LOG_ERROR, "rpi_get_display_buffer: Failed to get buffer from pool\n");
++        return AVERROR(ENOMEM);
++    }
++
++    // Track
++    atomic_fetch_add(&zc->refcount, 1);
++    pic_zbe_ptr(buf)->zc = zc;
++
++    for (i = 0; i < AV_NUM_DATA_POINTERS; i++) {
++        frame->buf[i] = NULL;
++        frame->data[i] = NULL;
++        frame->linesize[i] = 0;
++    }
++
++    frame->buf[0] = buf;
++
++    frame->linesize[0] = geo.stride_y;
++    frame->linesize[1] = geo.stride_c;
++    frame->linesize[2] = geo.stride_c;
++    // abuse: linesize[3] = "stripe stride"
++    // stripe_stride is NOT the stride between slices it is (that / geo.stride_y).
++    // In a general case this makes the calculation an xor and multiply rather
++    // than a divide and multiply
++    if (geo.stripes > 1)
++        frame->linesize[3] = geo.stripe_is_yc ? geo.height_y + geo.height_c : geo.height_y;
++
++    frame->data[0] = buf->data;
++    frame->data[1] = frame->data[0] + (geo.stripe_is_yc ? size_y : size_y * geo.stripes);
++    if (geo.planes_c > 1)
++        frame->data[2] = frame->data[1] + size_c;
++
++    frame->extended_data = frame->data;
++    // Leave extended buf alone
++
++#if RPI_ZC_SAND_8_IN_10_BUF != 0
++    // *** If we intend to use this for real we will want a 2nd buffer pool
++    frame->buf[RPI_ZC_SAND_8_IN_10_BUF] = zc_pool_buf_alloc(&zc->pool, size_pic);  // *** 2 * wanted size - kludge
++#endif
++#endif
++
++    return 0;
++}
++
++void av_rpi_zc_env_release(const AVZcEnvPtr zc)
++{
++    const int n = atomic_fetch_add(&zc->refcount, -1);
++    if (n == 1)  // was 1, now 0
++    {
++        zc->free_pool(zc->pool_env);
++        av_free(zc);
++    }
++}
++
++AVZcEnvPtr av_rpi_zc_env_alloc(void * logctx,
++                    void * pool_env,
++                    av_rpi_zc_alloc_buf_fn_t * alloc_buf_fn,
++                    av_rpi_zc_free_pool_fn_t * free_pool_fn)
++{
++    ZcEnv * zc;
++
++    if ((zc = av_mallocz(sizeof(ZcEnv))) == NULL)
++    {
++        av_log(logctx, AV_LOG_ERROR, "av_rpi_zc_env_alloc: Context allocation failed\n");
++        return NULL;
++    }
++
++    *zc = (ZcEnv){
++        .refcount = ATOMIC_VAR_INIT(1),
++        .pool_env = pool_env,
++        .alloc_buf = alloc_buf_fn,
++        .free_pool = free_pool_fn,
++        .pool_size = 0
++    };
++
++    return zc;
++}
++
++//============================================================================
++//
++// External ZC initialisation
++
++#define RPI_GET_BUFFER2 1
++
++
++static int zc_get_buffer2(struct AVCodecContext *s, AVFrame *frame, int flags)
++{
++#if !RPI_GET_BUFFER2
++    return avcodec_default_get_buffer2(s, frame, flags);
++#else
++    int rv;
++
++    if ((s->codec->capabilities & AV_CODEC_CAP_DR1) == 0)
++    {
++//        printf("Do default alloc: format=%#x\n", frame->format);
++        rv = avcodec_default_get_buffer2(s, frame, flags);
++    }
++    else if (frame->format == AV_PIX_FMT_YUV420P ||
++             av_rpi_is_sand_frame(frame))
++    {
++        if ((rv = av_rpi_zc_get_buffer(s->opaque, frame)) == 0)
++            rv = av_rpi_zc_resolve_frame(frame, ZC_RESOLVE_ALLOC_VALID);
++    }
++    else
++    {
++        rv = avcodec_default_get_buffer2(s, frame, flags);
++    }
++
++#if 0
++    printf("%s: fmt:%d, %dx%d lsize=%d/%d/%d/%d data=%p/%p/%p bref=%p/%p/%p opaque[0]=%p\n", __func__,
++        frame->format, frame->width, frame->height,
++        frame->linesize[0], frame->linesize[1], frame->linesize[2], frame->linesize[3],
++        frame->data[0], frame->data[1], frame->data[2],
++        frame->buf[0], frame->buf[1], frame->buf[2],
++        av_buffer_get_opaque(frame->buf[0]));
++#endif
++    return rv;
++#endif
++}
++
++int av_rpi_zc_in_use(const struct AVCodecContext * const s)
++{
++    return s->get_buffer2 == zc_get_buffer2;
++}
++
++int av_rpi_zc_init2(struct AVCodecContext * const s,
++                    void * pool_env,
++                    av_rpi_zc_alloc_buf_fn_t * alloc_buf_fn,
++                    av_rpi_zc_free_pool_fn_t * free_pool_fn)
++{
++    ZcEnv * zc;
++
++    av_assert0(!av_rpi_zc_in_use(s));
++
++    if ((zc = av_rpi_zc_env_alloc(s, pool_env, alloc_buf_fn, free_pool_fn)) == NULL)
++        return AVERROR(ENOMEM);
++
++    zc->old = (ZcOldCtxVals){
++        .opaque = s->opaque,
++        .get_buffer2 = s->get_buffer2,
++        .thread_safe_callbacks = s->thread_safe_callbacks
++    };
++
++    s->opaque = zc;
++    s->get_buffer2 = zc_get_buffer2;
++    s->thread_safe_callbacks = 1;
++    return 0;
++}
++
++void av_rpi_zc_uninit2(struct AVCodecContext * const s)
++{
++    ZcEnv * const zc = s->opaque;
++
++    av_assert0(av_rpi_zc_in_use(s));
++
++    s->get_buffer2 = zc->old.get_buffer2;
++    s->opaque = zc->old.opaque;
++    s->thread_safe_callbacks = zc->old.thread_safe_callbacks;
++
++    av_rpi_zc_env_release(zc);
++}
++
+--- /dev/null
++++ b/libavcodec/rpi_zc.h
+@@ -0,0 +1,228 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#ifndef LIBAVCODEC_RPI_ZC_H
++#define LIBAVCODEC_RPI_ZC_H
++
++// Zero-Copy frame code for RPi
++// RPi needs Y/U/V planes to be contiguous for display.  By default
++// ffmpeg will allocate separated planes so a memcpy is needed before
++// display.  This code provides a method a making ffmpeg allocate a single
++// bit of memory for the frame when can then be reference counted until
++// display has finished with it.
++
++// Frame buffer number in which to stuff an 8-bit copy of a 16-bit frame
++// 0 disables
++// *** This option still in development
++//     Only works if SAO active
++//     Allocates buffers that are twice the required size
++#define RPI_ZC_SAND_8_IN_10_BUF  0
++
++struct AVBufferRef;
++struct AVFrame;
++struct AVCodecContext;
++enum AVPixelFormat;
++
++// "Opaque" pointer to whatever we are using as a buffer reference
++typedef struct AVBufferRef * AVRpiZcRefPtr;
++
++struct AVZcEnv;
++typedef struct AVZcEnv * AVZcEnvPtr;
++
++typedef struct AVRpiZcFrameGeometry
++{
++    unsigned int stride_y;  // Luma stride (bytes)
++    unsigned int height_y;  // Luma height (lines)
++    unsigned int stride_c;  // Chroma stride (bytes)
++    unsigned int height_c;  // Chroma stride (lines)
++    unsigned int planes_c;  // Chroma plane count (U, V = 2, interleaved = 1)
++    unsigned int stripes;   // Number of stripes (sand)
++    unsigned int bytes_per_pel;
++    int stripe_is_yc;       // A single stripe is Y then C (false for tall sand)
++
++    int format;                 // Requested format
++    unsigned int video_width;   // Requested width
++    unsigned int video_height;  // Requested height
++} AVRpiZcFrameGeometry;
++
++// Get expected MMAL geometry for a given format, width & height
++AVRpiZcFrameGeometry av_rpi_zc_frame_geometry(
++    const int format,
++    const unsigned int video_width, const unsigned int video_height);
++
++//----------------------------------------------------------------------------
++//
++// Calls that extract info from a ZC frame whether internally or externally
++// allocated
++
++// Generate a ZC reference to the buffer(s) in this frame
++// If the buffer doesn't appear to be one allocated by ZC
++// then the behaviour depends on maycopy:
++//   If maycopy=0 then return NULL
++//   If maycopy=1 && the src frame is in a form where we can easily copy
++//     the data, then allocate a new buffer and copy the data into it
++//   Otherwise return NULL
++// If maycopy == 0 then ZC may be NULL
++AVRpiZcRefPtr av_rpi_zc_ref(void * const logging_context, const AVZcEnvPtr zc,
++    const struct AVFrame * const frame, const enum AVPixelFormat expected_format, const int maycopy);
++
++// Unreference the buffer refed/allocated by _zc_ref
++// If fr_ref is NULL then this will NOP
++void av_rpi_zc_unref(AVRpiZcRefPtr fr_ref);
++
++// Get the vc_handle from the frame ref
++// Returns -1 if ref doesn't look valid
++int av_rpi_zc_vc_handle(const AVRpiZcRefPtr fr_ref);
++// Get the vcsm_handle from the frame ref
++// Returns 0 if ref doesn't look valid
++unsigned int av_rpi_zc_vcsm_handle(const AVRpiZcRefPtr fr_ref);
++// Get offset from the start of the memory referenced
++// by the vc_handle to valid data
++int av_rpi_zc_offset(const AVRpiZcRefPtr fr_ref);
++// Length of buffer data
++int av_rpi_zc_length(const AVRpiZcRefPtr fr_ref);
++// Get the number of bytes allocated from the frame ref
++// Returns 0 if ref doesn't look valid
++int av_rpi_zc_numbytes(const AVRpiZcRefPtr fr_ref);
++// Geometry this frame was allocated with
++const AVRpiZcFrameGeometry * av_rpi_zc_geometry(const AVRpiZcRefPtr fr_ref);
++
++//----------------------------------------------------------------------------
++//
++// Calls for external frame allocation
++
++// Callbacks registered in av_rpi_zc_init2
++
++// Callback to allocate a buf for a frame
++// The frame itself is generated in the calling code
++//
++// Parameters:
++//   pool_env  value passed to av-rpi_zc_init2
++//   size      size wanted
++//   geo       geometry of the frame to be allocated
++// Returns:
++//   NULL      Alloc failed
++//   ptr       AVBufferBuf* of allocated buffer
++//             In most cases av_rpi_zc_buf will be called by this function
++//             and this will be the buf returned by that.
++typedef AVBufferRef * av_rpi_zc_alloc_buf_fn_t(void * pool_env, size_t size,
++                                               const AVRpiZcFrameGeometry * geo);
++
++// Callback once ffmpeg is completely done with this pool
++// Called once all allocated buffers have been derefed and ffmpegs ref to this
++// pool has been dropped
++typedef void av_rpi_zc_free_pool_fn_t(void * pool_env);
++
++// Init ZC into a context
++// Sets opaque, get_buffer2, thread_safe_callbacks
++// Use if you want to allocate your own pools and/or create ZC buffers for
++// all decoders
++// RPI HEVC decoders will allocate appropriate VCSM buffers which can be taken
++// apart by av_rpi_zc_xxx calls without this
++int av_rpi_zc_init2(struct AVCodecContext * const s,
++                    void * pool_env, av_rpi_zc_alloc_buf_fn_t * alloc_buf_fn,
++                    av_rpi_zc_free_pool_fn_t * free_pool_fn);
++
++// Free ZC from a context
++void av_rpi_zc_uninit2(struct AVCodecContext * const s);
++
++// Get minimum pool size in frames - valid by the time the first alloc request
++// occurs.  Takes into account thread requests and DPB sizes derived from SPS
++// rather than just adding a worst case DPB size.
++unsigned int av_rpi_zc_get_decoder_pool_size(const AVZcEnvPtr zc);
++
++typedef struct av_rpi_zc_buf_fn_tab_s {
++    // This AVBuffer is being freed by ffmpeg - return memory
++    // to external pool. Memory may be, but need not be, unmapped.
++    // v is the ptr passed in av_rpi_zc_buf
++    void (* free)(void * v);
++
++    // Return appropriate handles / mappings
++    // v is the ptr passed in av_rpi_zc_buf
++    unsigned int (* vcsm_handle)(void * v);
++    unsigned int (* vc_handle)(void * v);
++    void * (* map_arm)(void * v);
++    unsigned int (* map_vc)(void * v);
++} av_rpi_zc_buf_fn_tab_t;
++
++// Allocate a ZC AVBufferRef and set its callback table
++// Doesn't take a buffer address directly - relies on callbacks to return
++// addresses as they are required.  Mappings need not be generated until
++// the map callbacks are called but they should persist from then until
++// the buffer is freed.
++//
++// Parameters:
++//   numbytes    Size of the buffer
++//   addr_offset Offset to first usable byte of buffer (for alignment)
++//               normally 0
++//   v           Pointer passed to callbacks
++//   fn_tab      Function table
++AVBufferRef * av_rpi_zc_buf(size_t numbytes, int addr_offset, void * v, const av_rpi_zc_buf_fn_tab_t * fn_tab);
++
++// Get v ptr set in in av_rpi_zc_buf
++void * av_rpi_zc_buf_v(AVBufferRef * const buf);
++
++//----------------------------------------------------------------------------
++//
++// Mostly internal calls but might possibly be wanted by outside code
++
++void av_rpi_zc_int_env_freep(AVZcEnvPtr * zc);
++AVZcEnvPtr av_rpi_zc_int_env_alloc(void * const logctx);
++void av_rpi_zc_set_decoder_pool_size(const AVZcEnvPtr zc, const unsigned int pool_size);
++
++// Test to see if the context is using zc (checks get_buffer2)
++int av_rpi_zc_in_use(const struct AVCodecContext * const s);
++
++// Get buffer generates placeholders for later alloc
++int av_rpi_zc_get_buffer(const AVZcEnvPtr zc, AVFrame * const frame);
++// Resolve actually does the alloc (noop if already alloced)
++// Set data pointers on a buffer/frame that was copied before the alloc
++// accured
++#define ZC_RESOLVE_FAIL         0  // return error on invalid
++#define ZC_RESOLVE_ALLOC        1  // alloc as invalid
++#define ZC_RESOLVE_WAIT_VALID   2  // wait for valid
++#define ZC_RESOLVE_ALLOC_VALID  3  // alloc as valid
++int av_rpi_zc_resolve_buffer(AVBufferRef * const buf, const int may_alloc);
++int av_rpi_zc_resolve_frame(AVFrame * const frame, const int may_alloc);
++
++int av_rpi_zc_set_valid_frame(AVFrame * const frame);
++int av_rpi_zc_set_broken_frame(AVFrame * const frame);
++
++
++
++
++AVZcEnvPtr av_rpi_zc_env_alloc(void * logctx,
++                    void * pool_env,
++                    av_rpi_zc_alloc_buf_fn_t * alloc_buf_fn,
++                    av_rpi_zc_free_pool_fn_t * free_pool_fn);
++void av_rpi_zc_env_release(const AVZcEnvPtr zc);
++
++
++#endif
++
+--- /dev/null
++++ b/libavcodec/rpi_zc_frames.h
+@@ -0,0 +1,142 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox, Ben Avison
++*/
++
++#ifndef RPI_ZC_FRAMES_H
++#define RPI_ZC_FRAMES_H
++
++#define RPI_ONE_BUF 1
++
++#include "rpi_mem.h"  // for GPU_MEM_PTR_T
++#include "libavutil/frame.h"
++
++#if !RPI_ONE_BUF
++static inline uint32_t get_vc_address_y(const AVFrame * const frame) {
++    GPU_MEM_PTR_T *p = av_buffer_pool_buffer_get_opaque(frame->buf[0]);
++    return p->vc;
++}
++
++static inline uint32_t get_vc_address_u(const AVFrame * const frame) {
++    GPU_MEM_PTR_T *p = av_buffer_pool_buffer_get_opaque(frame->buf[1]);
++    return p->vc;
++}
++
++static inline uint32_t get_vc_address_v(const AVFrame * const frame) {
++    GPU_MEM_PTR_T *p = av_buffer_pool_buffer_get_opaque(frame->buf[2]);
++    return p->vc;
++}
++
++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_y(const AVFrame * const frame) {
++    return *(GPU_MEM_PTR_T *)av_buffer_pool_buffer_get_opaque(frame->buf[0]);
++}
++
++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_u(const AVFrame * const frame) {
++    return *(GPU_MEM_PTR_T *)av_buffer_pool_buffer_get_opaque(frame->buf[1]);
++}
++
++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_v(const AVFrame * const frame) {
++    return *(GPU_MEM_PTR_T *)av_buffer_pool_buffer_get_opaque(frame->buf[2]);
++}
++
++#else
++
++static inline int gpu_is_buf1(const AVFrame * const frame)
++{
++    return frame->buf[1] == NULL;
++}
++
++static inline GPU_MEM_PTR_T * gpu_buf1_gmem(const AVFrame * const frame)
++{
++    return av_buffer_get_opaque(frame->buf[0]);
++}
++
++static inline GPU_MEM_PTR_T * gpu_buf3_gmem(const AVFrame * const frame, const unsigned int n)
++{
++    return av_buffer_pool_buffer_get_opaque(frame->buf[n]);
++}
++
++static inline uint32_t get_vc_address3(const AVFrame * const frame, const unsigned int n)
++{
++    const GPU_MEM_PTR_T * const gm = gpu_is_buf1(frame) ? gpu_buf1_gmem(frame) : gpu_buf3_gmem(frame, n);
++    return gm->vc + (frame->data[n] - gm->arm);
++}
++
++
++static inline uint32_t get_vc_address_y(const AVFrame * const frame) {
++    return get_vc_address3(frame, 0);
++}
++
++static inline uint32_t get_vc_address_u(const AVFrame * const frame) {
++    return get_vc_address3(frame, 1);
++}
++
++static inline uint32_t get_vc_address_v(const AVFrame * const frame) {
++    return get_vc_address3(frame, 2);
++}
++
++#if 0
++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_y(const AVFrame * const frame) {
++    if (gpu_is_buf1(frame))
++    {
++        GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame);
++        g.numbytes = frame->data[1] - frame->data[0];
++        return g;
++    }
++    else
++        return *gpu_buf3_gmem(frame, 0);
++}
++
++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_u(const AVFrame * const frame) {
++    if (gpu_is_buf1(frame))
++    {
++        GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame);
++        g.arm += frame->data[1] - frame->data[0];
++        g.vc += frame->data[1] - frame->data[0];
++        g.numbytes = frame->data[2] - frame->data[1];  // chroma size
++        return g;
++    }
++    else
++        return *gpu_buf3_gmem(frame, 1);
++}
++
++static inline GPU_MEM_PTR_T get_gpu_mem_ptr_v(const AVFrame * const frame) {
++    if (gpu_is_buf1(frame))
++    {
++        GPU_MEM_PTR_T g = *gpu_buf1_gmem(frame);
++        g.arm += frame->data[2] - frame->data[0];
++        g.vc += frame->data[2] - frame->data[0];
++        g.numbytes = frame->data[2] - frame->data[1];  // chroma size
++        return g;
++    }
++    else
++        return *gpu_buf3_gmem(frame, 2);
++}
++#endif
++#endif
++
++#endif
+--- /dev/null
++++ b/libavcodec/rpivid_hevc.c
+@@ -0,0 +1,2128 @@
++// FFMPEG HEVC decoder hardware accelerator
++// Andrew Holme, Argon Design Ltd
++// Copyright (c) June 2017 Raspberry Pi Ltd
++
++#include <stdio.h>
++#include <fcntl.h>
++#include <pthread.h>
++#include <semaphore.h>
++#include <unistd.h>
++#include <sys/mman.h>
++
++#include "fftools/ffmpeg.h"
++#include "libavutil/avassert.h"
++#include "libavutil/imgutils.h"
++#include "avcodec.h"
++#include "hwconfig.h"
++#include "decode.h"
++
++#include "hevc.h"
++#include "hevcdec.h"
++#include "rpi_zc.h"
++#include "rpi_mem.h"
++#include "rpi_zc_frames.h"
++#include "rpi_mailbox.h"
++
++
++#define OPT_PHASE_TIMING 0      // Generate stats for phase usage
++
++#define OPT_EMU 0
++
++#define TRACE_DEV 0
++#define TRACE_ENTRY 0
++
++#define NUM_SCALING_FACTORS 4064
++
++#define AXI_BASE64 0
++
++#define PROB_BACKUP ((20<<12) + (20<<6) + (0<<0))
++#define PROB_RELOAD ((20<<12) + (20<<0) + (0<<6))
++
++#define RPIVID_COL_PICS 17                 // 16 ref & current
++
++#define RPIVID_BITBUFS          2          // Bit + Cmd bufs (phase 0 & 1)
++#define RPIVID_BITBUF_SIZE      (4 << 20)  // Bit + Cmd buf size
++
++#define RPIVID_COEFFBUFS        3          // PU + Coeff bufs (phase 1 & 2)
++#define RPIVID_COEFFBUF_SIZE    (16 << 20) // PU + Coeff buf size
++
++//////////////////////////////////////////////////////////////////////////////
++//
++// Register offsets
++
++#define RPI_SPS0         0
++#define RPI_SPS1         4
++#define RPI_PPS          8
++#define RPI_SLICE        12
++#define RPI_TILESTART    16
++#define RPI_TILEEND      20
++#define RPI_SLICESTART   24
++#define RPI_MODE         28
++#define RPI_LEFT0        32
++#define RPI_LEFT1        36
++#define RPI_LEFT2        40
++#define RPI_LEFT3        44
++#define RPI_QP           48
++#define RPI_CONTROL      52
++#define RPI_STATUS       56
++#define RPI_VERSION      60
++#define RPI_BFBASE       64
++#define RPI_BFNUM        68
++#define RPI_BFCONTROL    72
++#define RPI_BFSTATUS     76
++#define RPI_PUWBASE      80
++#define RPI_PUWSTRIDE    84
++#define RPI_COEFFWBASE   88
++#define RPI_COEFFWSTRIDE 92
++#define RPI_SLICECMDS    96
++#define RPI_BEGINTILEEND 100
++#define RPI_TRANSFER     104
++#define RPI_CFBASE       108
++#define RPI_CFNUM        112
++#define RPI_CFSTATUS     116
++
++#define RPI_PURBASE       0x8000
++#define RPI_PURSTRIDE     0x8004
++#define RPI_COEFFRBASE    0x8008
++#define RPI_COEFFRSTRIDE  0x800C
++#define RPI_NUMROWS       0x8010
++#define RPI_CONFIG2       0x8014
++#define RPI_OUTYBASE      0x8018
++#define RPI_OUTYSTRIDE    0x801C
++#define RPI_OUTCBASE      0x8020
++#define RPI_OUTCSTRIDE    0x8024
++#define RPI_STATUS2       0x8028
++#define RPI_FRAMESIZE     0x802C
++#define RPI_MVBASE        0x8030
++#define RPI_MVSTRIDE      0x8034
++#define RPI_COLBASE       0x8038
++#define RPI_COLSTRIDE     0x803C
++#define RPI_CURRPOC       0x8040
++
++//////////////////////////////////////////////////////////////////////////////
++
++// Unused but left here to illustrate the diffrences between FFmpegs prob
++// structure and the rpivid one
++
++struct FFM_PROB {
++    uint8_t  sao_merge_flag                   [ 1];
++    uint8_t  sao_type_idx                     [ 1];
++    uint8_t  split_coding_unit_flag           [ 3];
++    uint8_t  cu_transquant_bypass_flag        [ 1];
++    uint8_t  skip_flag                        [ 3];
++    uint8_t  cu_qp_delta                      [ 3];
++    uint8_t  pred_mode_flag                   [ 1];
++    uint8_t  part_mode                        [ 4];
++    uint8_t  prev_intra_luma_pred_flag        [ 1];
++    uint8_t  intra_chroma_pred_mode           [ 2];
++    uint8_t  merge_flag                       [ 1];
++    uint8_t  merge_idx                        [ 1];
++    uint8_t  inter_pred_idc                   [ 5];
++    uint8_t  ref_idx_l0                       [ 2];
++    uint8_t  ref_idx_l1                       [ 2];
++    uint8_t  abs_mvd_greater0_flag            [ 2];
++    uint8_t  abs_mvd_greater1_flag            [ 2];
++    uint8_t  mvp_lx_flag                      [ 1];
++    uint8_t  no_residual_data_flag            [ 1];
++    uint8_t  split_transform_flag             [ 3];
++    uint8_t  cbf_luma                         [ 2];
++    uint8_t  cbf_cb_cr                        [ 4];
++    uint8_t  transform_skip_flag/*[][]*/      [ 2];
++    uint8_t  explicit_rdpcm_flag/*[][]*/      [ 2];
++    uint8_t  explicit_rdpcm_dir_flag/*[][]*/  [ 2];
++    uint8_t  last_significant_coeff_x_prefix  [18];
++    uint8_t  last_significant_coeff_y_prefix  [18];
++    uint8_t  significant_coeff_group_flag     [ 4];
++    uint8_t  significant_coeff_flag           [44];
++    uint8_t  coeff_abs_level_greater1_flag    [24];
++    uint8_t  coeff_abs_level_greater2_flag    [ 6];
++    uint8_t  log2_res_scale_abs               [ 8];
++    uint8_t  res_scale_sign_flag              [ 2];
++    uint8_t  cu_chroma_qp_offset_flag         [ 1];
++    uint8_t  cu_chroma_qp_offset_idx          [ 1];
++} __attribute__((packed));
++
++//////////////////////////////////////////////////////////////////////////////
++
++struct RPI_PROB {
++    uint8_t  SAO_MERGE_FLAG             [ 1];
++    uint8_t  SAO_TYPE_IDX               [ 1];
++    uint8_t  SPLIT_FLAG                 [ 3];
++    uint8_t  CU_SKIP_FLAG               [ 3];
++    uint8_t  CU_TRANSQUANT_BYPASS_FLAG  [ 1];
++    uint8_t  PRED_MODE                  [ 1];
++    uint8_t  PART_SIZE                  [ 4];
++    uint8_t  INTRA_PRED_MODE            [ 1];
++    uint8_t  CHROMA_PRED_MODE           [ 1];
++    uint8_t  MERGE_FLAG_EXT             [ 1];
++    uint8_t  MERGE_IDX_EXT              [ 1];
++    uint8_t  INTER_DIR                  [ 5];
++    uint8_t  REF_PIC                    [ 2];
++    uint8_t  MVP_IDX                    [ 1];
++    uint8_t  MVD                        [ 2];
++    uint8_t  QT_ROOT_CBF                [ 1];
++    uint8_t  TRANS_SUBDIV_FLAG          [ 3];
++    uint8_t  QT_CBF                     [ 6];
++    uint8_t  DQP                        [ 2];
++    uint8_t  ONE_FLAG                   [24];
++    uint8_t  LASTX                      [18];
++    uint8_t  LASTY                      [18];
++    uint8_t  SIG_CG_FLAG                [ 4];
++    uint8_t  ABS_FLAG                   [ 6];
++    uint8_t  TRANSFORMSKIP_FLAG         [ 2];
++    uint8_t  SIG_FLAG                   [42];
++    uint8_t  SIG_FLAG_unused            [ 2];
++} __attribute__((packed));
++
++//////////////////////////////////////////////////////////////////////////////
++
++struct RPI_CMD {
++    uint32_t addr;
++    uint32_t data;
++} __attribute__((packed));
++
++struct RPI_BIT {
++    int         cmd;
++    const void *ptr;
++    int         len;
++};
++
++//////////////////////////////////////////////////////////////////////////////
++
++struct RPI_T;
++
++// Actual addressability is 38bits but we can only alloc in the bottom 32
++// currently - when passed to rpivid h/w the address is always >> 6 so will
++// fit in 32 bit there
++// At some point we may weant to make this uint64_t
++typedef uint32_t vid_vc_addr_t;
++
++typedef enum rpivid_decode_state_e {
++    RPIVID_DECODE_NEW = 0,
++    RPIVID_DECODE_START,
++    RPIVID_DECODE_SLICE,
++    RPIVID_DECODE_END,
++} rpivid_decode_state_t;
++
++#define RPI_PROB_VALS 154U
++#define RPI_PROB_ARRAY_SIZE ((154 + 3) & ~3)
++
++typedef struct dec_env_s {
++    const AVCodecContext * avctx;
++
++    rpivid_decode_state_t state;
++    unsigned int    decode_order;
++
++    int             phase_no;           // Current phase (i.e. the last one we waited for)
++    struct dec_env_s * phase_wait_q_next;
++    sem_t           phase_wait;
++
++    struct RPI_BIT *bit_fifo;
++    struct RPI_CMD *cmd_fifo;
++    unsigned int    bit_len, bit_max;
++    unsigned int    cmd_len, cmd_max;
++    unsigned int    num_slice_msgs;
++    unsigned int    PicWidthInCtbsY;
++    unsigned int    PicHeightInCtbsY;
++    unsigned int    dpbno_col;
++    uint32_t        reg_slicestart;
++    unsigned int    wpp_entry_x;
++    unsigned int    wpp_entry_y;
++
++    const uint8_t * nal_buffer;
++    size_t          nal_size;
++
++    uint16_t        slice_msgs[2*HEVC_MAX_REFS*8+3];
++    uint8_t         scaling_factors[NUM_SCALING_FACTORS];
++//    unsigned int    RefPicList[2][HEVC_MAX_REFS];
++} dec_env_t;
++
++#define RPIVID_PHASES 3
++#define RPIVID_PHASE_NEW (RPIVID_PHASES) // Phase before we have inced decode order
++#define RPIVID_PHASE_START (-1)          // Phase after we have inced decode_order
++
++#if OPT_PHASE_TIMING
++static const unsigned int time_thresholds[8] = {
++    10, 15, 20, 30, 45, 60, 75, 90
++};
++#endif
++
++typedef struct phase_wait_env_s {
++    unsigned int    last_order;
++    dec_env_t *     q;
++#if OPT_PHASE_TIMING
++    uint64_t phase_time;
++    uint64_t max_phase_time;
++    uint64_t time_in_phase;
++    uint64_t time_out_phase;
++    unsigned int max_time_decode_order;
++    unsigned int time_bins[9];
++    unsigned int time_bins3[9];
++    unsigned int time_bins5[9];
++    uint64_t time_stash[16];
++    unsigned int i3;
++#endif
++} phase_wait_env_t;                      // Single linked list of threads waiting for this phase
++
++typedef struct RPI_T {
++    atomic_int      ref_count;
++    sem_t           ref_zero;
++
++    dec_env_t **    dec_envs;
++    AVZcEnvPtr      zc;
++
++    pthread_mutex_t phase_lock;
++    phase_wait_env_t phase_reqs[RPIVID_PHASES];
++
++    volatile uint32_t * regs;
++    volatile uint32_t * ints;
++
++    GPU_MEM_PTR_T   gcolbuf;
++    unsigned int    col_stride;
++    size_t          col_picsize;
++
++    unsigned int    bitbuf_no;
++    sem_t           bitbuf_sem;
++    GPU_MEM_PTR_T   gbitbufs[RPIVID_BITBUFS];
++
++    unsigned int    max_pu_msgs;
++    unsigned int    coeffbuf_no;
++    sem_t           coeffbuf_sem;
++    GPU_MEM_PTR_T   gcoeffbufs[RPIVID_COEFFBUFS];
++
++    unsigned int    decode_order;
++    int             mbox_fd;
++    int             gpu_init_type;
++} RPI_T;
++
++#if OPT_PHASE_TIMING
++static uint64_t tus64(void)
++{
++    struct timespec ts;
++    clock_gettime(CLOCK_MONOTONIC, &ts);
++    return (uint64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
++}
++#endif
++
++static inline unsigned int rnd64(unsigned int x)
++{
++    return (x + 63) & ~63;
++}
++
++static inline int rpi_sem_wait(sem_t * const sem)
++{
++    int rv;
++    while ((rv = sem_wait(sem)) != 0 && errno == EINTR)
++        /* Loop */;
++    return rv;
++}
++
++//============================================================================
++
++#define REGS_NAME "/dev/rpivid-hevcmem"
++#define REGS_SIZE 0x10000
++#define INTS_NAME "/dev/rpivid-intcmem"
++#define INTS_SIZE 0x10000  // 4 is probably enough but we are going to alloc a page anyway
++
++static volatile uint32_t * map_dev(AVCodecContext * const avctx, const char * const dev_name, size_t size)
++{
++    void *gpio_map;
++    int  mem_fd;
++
++    /* open /dev/mem */
++    if ((mem_fd = open(dev_name, O_RDWR|O_SYNC) ) < 0) {
++        av_log(avctx, AV_LOG_WARNING, "can't open %s\n", dev_name);
++        return NULL;
++    }
++
++    // Now map it
++    gpio_map = mmap(
++       NULL,
++       size,
++       PROT_READ|PROT_WRITE,
++       MAP_SHARED,
++       mem_fd,
++       0
++    );
++
++    close(mem_fd);  // No longer need the FD
++
++    if (gpio_map == MAP_FAILED) {
++        av_log(avctx, AV_LOG_WARNING, "GPIO mapping failed");
++        return NULL;
++    }
++
++    return (volatile uint32_t *)gpio_map;
++}
++
++static void unmap_devp(volatile uint32_t ** const p_gpio_map, size_t size)
++{
++    volatile uint32_t * const gpio_map = *p_gpio_map;
++    if (gpio_map != NULL) {
++        *p_gpio_map = NULL;
++        munmap((void *)gpio_map, size);
++    }
++}
++
++#define MANGLE(x) ((x) &~0xc0000000)          // ** If x is ever a 64 bit thing this will need fixing!
++#define MANGLE64(x) (uint32_t)(MANGLE(x)>>6)
++
++static inline void apb_write_vc_addr(const RPI_T *const rpi, const uint32_t addr, const vid_vc_addr_t data)
++{
++#if TRACE_DEV
++    printf("W %x %08x\n", addr, MANGLE64(data));
++#endif
++
++    rpi->regs[addr >> 2] = MANGLE64(data);
++}
++
++static inline void apb_write_vc_len(const RPI_T *const rpi, const uint32_t addr, const unsigned int data)
++{
++#if TRACE_DEV
++    printf("W %x %08x\n", addr, data >> 6);
++#endif
++
++    rpi->regs[addr >> 2] = data >> 6;  // ?? rnd64 - but not currently needed
++}
++
++static inline void apb_write(const RPI_T * const rpi, const uint32_t addr, const uint32_t data)
++{
++#if TRACE_DEV
++    printf("W %x %08x\n", addr, data);
++#endif
++
++    rpi->regs[addr >> 2] = data;
++}
++
++static inline uint32_t apb_read(const RPI_T * const rpi, const uint32_t addr)
++{
++    const uint32_t v = rpi->regs[addr >> 2];
++#if TRACE_DEV
++    printf("R %x (=%x)\n", addr, v);
++#endif
++    return v;
++}
++
++#define ARG_IC_ICTRL_ACTIVE1_INT_SET                   0x00000001
++#define ARG_IC_ICTRL_ACTIVE1_EDGE_SET                  0x00000002
++#define ARG_IC_ICTRL_ACTIVE1_EN_SET                    0x00000004
++#define ARG_IC_ICTRL_ACTIVE1_STATUS_SET                0x00000008
++#define ARG_IC_ICTRL_ACTIVE2_INT_SET                   0x00000010
++#define ARG_IC_ICTRL_ACTIVE2_EDGE_SET                  0x00000020
++#define ARG_IC_ICTRL_ACTIVE2_EN_SET                    0x00000040
++#define ARG_IC_ICTRL_ACTIVE2_STATUS_SET                0x00000080
++
++static inline void int_wait(const RPI_T * const rpi, const unsigned int phase)
++{
++    const uint32_t mask_reset = phase == 1 ? ~ARG_IC_ICTRL_ACTIVE2_INT_SET : ~ARG_IC_ICTRL_ACTIVE1_INT_SET;
++    const uint32_t mask_done = phase == 1 ? ARG_IC_ICTRL_ACTIVE1_INT_SET : ARG_IC_ICTRL_ACTIVE2_INT_SET;
++    uint32_t ival;
++    while (((ival = rpi->ints[0]) & mask_done) == 0) {
++        usleep(1000);
++    }
++    rpi->ints[0] = ival & mask_reset;
++}
++
++#if TRACE_DEV && 0
++static void apb_dump_regs(const RPI_T * const rpi, uint16_t addr, int num) {
++    int i;
++
++    for (i=0; i<num; i++)
++    {
++        if ((i%4)==0)
++          printf("%08x: ", 0x7eb00000 + addr + 4*i);
++
++        printf("%08x", rpi->regs[(addr>>2)+i]);
++
++        if ((i%4)==3 || i+1 == num)
++            printf("\n");
++        else
++            printf(" ");
++    }
++}
++
++static void axi_dump(const dec_env_t * const de, uint64_t addr, uint32_t size) {
++    int i;
++
++    for (i=0; i<size>>2; i++)
++    {
++        if ((i%4)==0)
++            printf("%08x: ", MANGLE(de->gbuf.vc) + (uint32_t)addr + 4*i);
++
++        printf("%08x", ((uint32_t*)de->gbuf.arm)[(addr>>2)+i]);
++
++        if ((i%4)==3 || i+1 == size>>2)
++            printf("\n");
++        else
++            printf(" ");
++    }
++}
++#endif
++
++//////////////////////////////////////////////////////////////////////////////
++
++static inline size_t round_up_size(const size_t x)
++{
++    /* Admit no size < 256 */
++    const unsigned int n = x < 256 ? 8 : av_log2(x) - 1;
++
++    return x >= (3 << n) ? 4 << n : (3 << n);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Scaling factors
++
++static void expand_scaling_list(
++    const unsigned int sizeID,
++    const unsigned int matrixID,
++    uint8_t * const dst0,
++    const uint8_t * const src0,
++    uint8_t dc)
++{
++    switch (sizeID) {
++        case 0:
++            memcpy(dst0, src0, 16);
++            break;
++        case 1:
++            memcpy(dst0, src0, 64);
++            break;
++        case 2:
++        {
++            uint8_t * d = dst0;
++            for (unsigned int y=0; y != 16; y++) {
++                const uint8_t * s = src0 + (y >> 1) * 8;
++                for (unsigned int x = 0; x != 8; ++x) {
++                    *d++ = *s;
++                    *d++ = *s++;
++                }
++            }
++            dst0[0] = dc;
++            break;
++        }
++        default:
++        {
++            uint8_t * d = dst0;
++            for (unsigned int y=0; y != 32; y++) {
++                const uint8_t * s = src0 + (y >> 2) * 8;
++                for (unsigned int x = 0; x != 8; ++x) {
++                    *d++ = *s;
++                    *d++ = *s;
++                    *d++ = *s;
++                    *d++ = *s++;
++                }
++            }
++            dst0[0] = dc;
++            break;
++        }
++    }
++}
++
++static void populate_scaling_factors(dec_env_t * const de, const HEVCContext * const s) {
++    // Array of constants for scaling factors
++    static const uint32_t scaling_factor_offsets[4][6] = {
++        // MID0    MID1    MID2    MID3    MID4    MID5
++        {0x0000, 0x0010, 0x0020, 0x0030, 0x0040, 0x0050},   // SID0 (4x4)
++        {0x0060, 0x00A0, 0x00E0, 0x0120, 0x0160, 0x01A0},   // SID1 (8x8)
++        {0x01E0, 0x02E0, 0x03E0, 0x04E0, 0x05E0, 0x06E0},   // SID2 (16x16)
++        {0x07E0,      0,      0, 0x0BE0,      0,      0}};  // SID3 (32x32)
++
++    // ffmpeg places SID3,MID1 where matrixID 3 normally is
++    const ScalingList * const sl =
++        s->ps.pps->scaling_list_data_present_flag ? &s->ps.pps->scaling_list
++                                                  : &s->ps.sps->scaling_list;
++    unsigned int mid;
++
++    for (mid=0; mid<6; mid++)
++        expand_scaling_list(0, mid,
++            de->scaling_factors + scaling_factor_offsets[0][mid],
++            sl->sl[0][mid], 0);
++    for (mid=0; mid<6; mid++)
++        expand_scaling_list(1, mid,
++            de->scaling_factors + scaling_factor_offsets[1][mid],
++            sl->sl[1][mid], 0);
++    for (mid=0; mid<6; mid++)
++        expand_scaling_list(2, mid,
++            de->scaling_factors + scaling_factor_offsets[2][mid],
++            sl->sl[2][mid],
++            sl->sl_dc[0][mid]);
++    // second scaling matrix for 32x32 is at matrixID 3 not 1 in ffmpeg
++    for (mid=0; mid<6; mid += 3)
++        expand_scaling_list(3, mid,
++            de->scaling_factors + scaling_factor_offsets[3][mid],
++            sl->sl[3][mid],
++            sl->sl_dc[1][mid]);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Probabilities
++
++static const uint8_t prob_init[3][156] = {
++	{
++		 153, 200, 139, 141, 157, 154, 154, 154,
++		 154, 154, 184, 154, 154, 154, 184,  63,
++		 154, 154, 154, 154, 154, 154, 154, 154,
++		 154, 154, 154, 154, 154, 153, 138, 138,
++		 111, 141,  94, 138, 182, 154, 154, 154,
++		 140,  92, 137, 138, 140, 152, 138, 139,
++		 153,  74, 149,  92, 139, 107, 122, 152,
++		 140, 179, 166, 182, 140, 227, 122, 197,
++		 110, 110, 124, 125, 140, 153, 125, 127,
++		 140, 109, 111, 143, 127, 111,  79, 108,
++		 123,  63, 110, 110, 124, 125, 140, 153,
++		 125, 127, 140, 109, 111, 143, 127, 111,
++		  79, 108, 123,  63,  91, 171, 134, 141,
++		 138, 153, 136, 167, 152, 152, 139, 139,
++		 111, 111, 125, 110, 110,  94, 124, 108,
++		 124, 107, 125, 141, 179, 153, 125, 107,
++		 125, 141, 179, 153, 125, 107, 125, 141,
++		 179, 153, 125, 140, 139, 182, 182, 152,
++		 136, 152, 136, 153, 136, 139, 111, 136,
++		 139, 111,   0,   0,	},
++	{
++		 153, 185, 107, 139, 126, 197, 185, 201,
++		 154, 149, 154, 139, 154, 154, 154, 152,
++		 110, 122,  95,  79,  63,  31,  31, 153,
++		 153, 168, 140, 198,  79, 124, 138,  94,
++		 153, 111, 149, 107, 167, 154, 154, 154,
++		 154, 196, 196, 167, 154, 152, 167, 182,
++		 182, 134, 149, 136, 153, 121, 136, 137,
++		 169, 194, 166, 167, 154, 167, 137, 182,
++		 125, 110,  94, 110,  95,  79, 125, 111,
++		 110,  78, 110, 111, 111,  95,  94, 108,
++		 123, 108, 125, 110,  94, 110,  95,  79,
++		 125, 111, 110,  78, 110, 111, 111,  95,
++		  94, 108, 123, 108, 121, 140,  61, 154,
++		 107, 167,  91, 122, 107, 167, 139, 139,
++		 155, 154, 139, 153, 139, 123, 123,  63,
++		 153, 166, 183, 140, 136, 153, 154, 166,
++		 183, 140, 136, 153, 154, 166, 183, 140,
++		 136, 153, 154, 170, 153, 123, 123, 107,
++		 121, 107, 121, 167, 151, 183, 140, 151,
++		 183, 140,   0,   0,	},
++	{
++		 153, 160, 107, 139, 126, 197, 185, 201,
++		 154, 134, 154, 139, 154, 154, 183, 152,
++		 154, 137,  95,  79,  63,  31,  31, 153,
++		 153, 168, 169, 198,  79, 224, 167, 122,
++		 153, 111, 149,  92, 167, 154, 154, 154,
++		 154, 196, 167, 167, 154, 152, 167, 182,
++		 182, 134, 149, 136, 153, 121, 136, 122,
++		 169, 208, 166, 167, 154, 152, 167, 182,
++		 125, 110, 124, 110,  95,  94, 125, 111,
++		 111,  79, 125, 126, 111, 111,  79, 108,
++		 123,  93, 125, 110, 124, 110,  95,  94,
++		 125, 111, 111,  79, 125, 126, 111, 111,
++		  79, 108, 123,  93, 121, 140,  61, 154,
++		 107, 167,  91, 107, 107, 167, 139, 139,
++		 170, 154, 139, 153, 139, 123, 123,  63,
++		 124, 166, 183, 140, 136, 153, 154, 166,
++		 183, 140, 136, 153, 154, 166, 183, 140,
++		 136, 153, 154, 170, 153, 138, 138, 122,
++		 121, 122, 121, 167, 151, 183, 140, 151,
++		 183, 140,   0,   0,	},
++};
++
++
++//////////////////////////////////////////////////////////////////////////////
++// Phase 1 command and bit FIFOs
++
++// ???? uint16_t addr - put in uint32_t
++static int p1_apb_write(dec_env_t * const de, const uint16_t addr, const uint32_t data) {
++    if (de->cmd_len==de->cmd_max)
++        av_assert0(de->cmd_fifo = realloc(de->cmd_fifo, (de->cmd_max*=2)*sizeof(struct RPI_CMD)));
++
++#if TRACE_DEV
++    printf("[%02x] %x %x\n", de->cmd_len, addr, data);
++#endif
++
++    de->cmd_fifo[de->cmd_len].addr = addr;
++    de->cmd_fifo[de->cmd_len].data = data;
++    return de->cmd_len++;
++}
++
++static void p1_axi_write(dec_env_t * const de, const uint32_t len, const void * const ptr, const int cmd_idx) {
++    if (de->bit_len==de->bit_max)
++        av_assert0(de->bit_fifo = realloc(de->bit_fifo, (de->bit_max*=2)*sizeof(struct RPI_BIT)));
++    de->bit_fifo[de->bit_len].cmd = cmd_idx;
++    de->bit_fifo[de->bit_len].ptr = ptr;
++    de->bit_fifo[de->bit_len].len = len;
++    de->bit_len++;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Write probability and scaling factor memories
++
++#if 0
++static void WriteProb(dec_env_t * const de) {
++    int i;
++    const uint8_t *p = (uint8_t *) &de->probabilities;
++    for (i=0; i<sizeof(struct RPI_PROB); i+=4, p+=4)
++        p1_apb_write(de, 0x1000+i, p[0] + (p[1]<<8) + (p[2]<<16) + (p[3]<<24));
++}
++#endif
++
++static void WriteProb(dec_env_t * const de, const HEVCContext * const s) {
++    uint8_t dst[RPI_PROB_ARRAY_SIZE];
++
++    const unsigned int init_type = (s->sh.cabac_init_flag && s->sh.slice_type != HEVC_SLICE_I) ?
++        s->sh.slice_type + 1 : 2 - s->sh.slice_type;
++    const uint8_t * p = prob_init[init_type];
++    const int q = av_clip(s->sh.slice_qp, 0, 51);
++    unsigned int i;
++
++    for (i = 0; i < RPI_PROB_VALS; i++) {
++        int init_value = p[i];
++        int m = (init_value >> 4) * 5 - 45;
++        int n = ((init_value & 15) << 3) - 16;
++        int pre = 2 * (((m * q) >> 4) + n) - 127;
++
++        pre ^= pre >> 31;
++        if (pre > 124)
++            pre = 124 + (pre & 1);
++        dst[i] = pre;
++    }
++    for (i = RPI_PROB_VALS; i != RPI_PROB_ARRAY_SIZE; ++i) {
++        dst[i] = 0;
++    }
++
++    for (i=0; i < RPI_PROB_ARRAY_SIZE; i+=4)
++        p1_apb_write(de, 0x1000+i, dst[i] + (dst[i+1]<<8) + (dst[i+2]<<16) + (dst[i+3]<<24));
++
++}
++
++
++static void WriteScalingFactors(dec_env_t * const de) {
++    int i;
++    const uint8_t *p = (uint8_t *) de->scaling_factors;
++    for (i=0; i<NUM_SCALING_FACTORS; i+=4, p+=4)
++        p1_apb_write(de, 0x2000+i, p[0] + (p[1]<<8) + (p[2]<<16) + (p[3]<<24));
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++static int ctb_to_tile (unsigned int ctb, unsigned int *bd, int num) {
++    int i;
++    for (i=1; ctb >= bd[i]; i++); // bd[] has num+1 elements; bd[0]=0; see hevc_ps.c
++    return i-1;
++}
++
++static int ctb_to_slice_w_h (unsigned int ctb, int ctb_size, int width, unsigned int *bd, int num) {
++    if (ctb < bd[num-1]) return ctb_size;
++    else if (width % ctb_size) return width % ctb_size;
++    else return ctb_size;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Handle PU and COEFF stream overflow
++
++
++// Returns:
++// -2 Other error
++// -1 Out of coeff space
++//  0  OK
++//  1  Out of PU space
++
++static int check_status(const RPI_T * const rpi, dec_env_t * const de) {
++    uint32_t status;
++
++    // this is the definition of successful completion of phase 1
++    // it assures that status register is zero and all blocks in each tile have completed
++    if (apb_read(rpi, RPI_CFSTATUS) == apb_read(rpi, RPI_CFNUM))
++        return 0;
++
++    status = apb_read(rpi, RPI_STATUS);
++
++    if ((status & 8) != 0)
++        return -1;
++
++    if ((status & 0x10) != 0)
++        return 1;
++
++    return -2;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Write STATUS register with expected end CTU address of previous slice
++
++static void end_previous_slice(dec_env_t * const de, const HEVCContext * const s, const int ctb_addr_ts) {
++    const HEVCPPS * const pps = s->ps.pps;
++    int last_x = pps->ctb_addr_ts_to_rs[ctb_addr_ts-1] % de->PicWidthInCtbsY;
++    int last_y = pps->ctb_addr_ts_to_rs[ctb_addr_ts-1] / de->PicWidthInCtbsY;
++    p1_apb_write(de, RPI_STATUS, 1 + (last_x<<5) + (last_y<<18));
++}
++
++static void wpp_pause(dec_env_t * const de, int ctb_row) {
++    p1_apb_write(de, RPI_STATUS, (ctb_row<<18) + 0x25);
++    p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
++    p1_apb_write(de, RPI_MODE, ctb_row==de->PicHeightInCtbsY-1 ? 0x70000 : 0x30000);
++    p1_apb_write(de, RPI_CONTROL, (ctb_row<<16) + 2);
++}
++
++static void wpp_end_previous_slice(dec_env_t * const de, const HEVCContext * const s, int ctb_addr_ts) {
++    const HEVCPPS *pps = s->ps.pps;
++    int new_x = s->sh.slice_ctb_addr_rs % de->PicWidthInCtbsY;
++    int new_y = s->sh.slice_ctb_addr_rs / de->PicWidthInCtbsY;
++    int last_x = pps->ctb_addr_ts_to_rs[ctb_addr_ts-1] % de->PicWidthInCtbsY;
++    int last_y = pps->ctb_addr_ts_to_rs[ctb_addr_ts-1] / de->PicWidthInCtbsY;
++    if (de->wpp_entry_x<2 && (de->wpp_entry_y<new_y || new_x>2) && de->PicWidthInCtbsY>2)
++        wpp_pause(de, last_y);
++    p1_apb_write(de, RPI_STATUS, 1 + (last_x<<5) + (last_y<<18));
++    if (new_x==2 || de->PicWidthInCtbsY==2 && de->wpp_entry_y<new_y)
++        p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++static void new_slice_segment(dec_env_t * const de, const HEVCContext * const s)
++{
++    const HEVCSPS *sps = s->ps.sps;
++    const HEVCPPS *pps = s->ps.pps;
++
++    p1_apb_write(de, RPI_SPS0,
++        (sps->log2_min_cb_size                    <<  0) +
++        (sps->log2_ctb_size                       <<  4) +
++        (sps->log2_min_tb_size                    <<  8) +
++        (sps->log2_max_trafo_size                 << 12) +
++        (sps->bit_depth                           << 16) +
++        (sps->bit_depth                           << 20) +
++        (sps->max_transform_hierarchy_depth_intra << 24) +
++        (sps->max_transform_hierarchy_depth_inter << 28));
++
++    p1_apb_write(de, RPI_SPS1,
++        (sps->pcm.bit_depth                                        <<  0) +
++        (sps->pcm.bit_depth_chroma                                 <<  4) +
++        (sps->pcm.log2_min_pcm_cb_size                             <<  8) +
++        (sps->pcm.log2_max_pcm_cb_size                             << 12) +
++        (sps->separate_colour_plane_flag? 0:sps->chroma_format_idc << 16) +
++        (sps->amp_enabled_flag                                     << 18) +
++        (sps->pcm_enabled_flag                                     << 19) +
++        (sps->scaling_list_enable_flag                             << 20) +
++        (sps->sps_strong_intra_smoothing_enable_flag               << 21));
++
++    p1_apb_write(de, RPI_PPS,
++        (sps->log2_ctb_size - pps->diff_cu_qp_delta_depth   <<  0) +
++        (pps->cu_qp_delta_enabled_flag                      <<  4) +
++        (pps->transquant_bypass_enable_flag                 <<  5) +
++        (pps->transform_skip_enabled_flag                   <<  6) +
++        (pps->sign_data_hiding_flag                         <<  7) +
++      (((pps->cb_qp_offset + s->sh.slice_cb_qp_offset)&255) <<  8) +
++      (((pps->cr_qp_offset + s->sh.slice_cr_qp_offset)&255) << 16) +
++        (pps->constrained_intra_pred_flag                   << 24));
++
++    if (s->ps.sps->scaling_list_enable_flag) WriteScalingFactors(de);
++
++    if (!s->sh.dependent_slice_segment_flag) {
++        int ctb_col = s->sh.slice_ctb_addr_rs % de->PicWidthInCtbsY;
++        int ctb_row = s->sh.slice_ctb_addr_rs / de->PicWidthInCtbsY;
++        de->reg_slicestart = (ctb_col<<0) + (ctb_row<<16);
++    }
++
++    p1_apb_write(de, RPI_SLICESTART, de->reg_slicestart);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++static void write_slice(dec_env_t * const de, const HEVCContext * const s,
++                        const unsigned int slice_w, const unsigned int slice_h) {
++    uint32_t u32 =
++          (s->sh.slice_type                           << 12)
++        + (s->sh.slice_sample_adaptive_offset_flag[0] << 14)
++        + (s->sh.slice_sample_adaptive_offset_flag[1] << 15)
++        + (slice_w                                    << 17)
++        + (slice_h                                    << 24);
++
++    if (s->sh.slice_type==HEVC_SLICE_B || s->sh.slice_type==HEVC_SLICE_P) u32 |=
++          (s->sh.max_num_merge_cand << 0)
++        + (s->sh.nb_refs[L0]        << 4)
++        + (s->sh.nb_refs[L1]        << 8);
++
++    if (s->sh.slice_type==HEVC_SLICE_B)
++        u32 |= s->sh.mvd_l1_zero_flag<<16;
++    p1_apb_write(de, RPI_SLICE, u32);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Wavefront mode
++
++static void wpp_entry_point(dec_env_t * const de, const HEVCContext * const s,
++                            const int do_bte, const int resetQPY, const int ctb_addr_ts) {
++    const HEVCSPS * const sps = s->ps.sps;
++    const HEVCPPS * const pps = s->ps.pps;
++
++    int ctb_size = 1<<sps->log2_ctb_size;
++    int ctb_addr_rs = pps->ctb_addr_ts_to_rs[ctb_addr_ts];
++
++    int ctb_col = de->wpp_entry_x = ctb_addr_rs % de->PicWidthInCtbsY;
++    int ctb_row = de->wpp_entry_y = ctb_addr_rs / de->PicWidthInCtbsY;
++
++    int endx = de->PicWidthInCtbsY-1;
++    int endy = ctb_row;
++
++    uint8_t slice_w = ctb_to_slice_w_h(ctb_col, ctb_size, sps->width,  pps->col_bd, pps->num_tile_columns);
++    uint8_t slice_h = ctb_to_slice_w_h(ctb_row, ctb_size, sps->height, pps->row_bd, pps->num_tile_rows);
++
++    p1_apb_write(de, RPI_TILESTART, 0);
++    p1_apb_write(de, RPI_TILEEND, endx + (endy<<16));
++
++    if (do_bte)
++        p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy<<16));
++
++    write_slice(de, s, slice_w, ctb_row==de->PicHeightInCtbsY-1? slice_h : ctb_size);
++
++    if (resetQPY) p1_apb_write(de, RPI_QP, sps->qp_bd_offset + s->sh.slice_qp);
++
++    p1_apb_write(de, RPI_MODE, ctb_row==de->PicHeightInCtbsY-1? 0x60001 : 0x20001);
++    p1_apb_write(de, RPI_CONTROL, (ctb_col<<0) + (ctb_row<<16));
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Tiles mode
++
++static void new_entry_point(dec_env_t * const de, const HEVCContext * const s,
++                            const int do_bte, const int resetQPY, const int ctb_addr_ts) {
++    const HEVCSPS * const sps = s->ps.sps;
++    const HEVCPPS * const pps = s->ps.pps;
++
++    int ctb_col = pps->ctb_addr_ts_to_rs[ctb_addr_ts] % de->PicWidthInCtbsY;
++    int ctb_row = pps->ctb_addr_ts_to_rs[ctb_addr_ts] / de->PicWidthInCtbsY;
++
++    int tile_x = ctb_to_tile (ctb_col, pps->col_bd, pps->num_tile_columns);
++    int tile_y = ctb_to_tile (ctb_row, pps->row_bd, pps->num_tile_rows);
++
++    int endx = pps->col_bd[tile_x+1] - 1;
++    int endy = pps->row_bd[tile_y+1] - 1;
++
++    uint8_t slice_w = ctb_to_slice_w_h(ctb_col, 1<<sps->log2_ctb_size, sps->width,  pps->col_bd, pps->num_tile_columns);
++    uint8_t slice_h = ctb_to_slice_w_h(ctb_row, 1<<sps->log2_ctb_size, sps->height, pps->row_bd, pps->num_tile_rows);
++
++    p1_apb_write(de, RPI_TILESTART, pps->col_bd[tile_x] + (pps->row_bd[tile_y]<<16));
++    p1_apb_write(de, RPI_TILEEND, endx + (endy<<16));
++
++    if (do_bte)
++        p1_apb_write(de, RPI_BEGINTILEEND, endx + (endy<<16));
++
++    write_slice(de, s, slice_w, slice_h);
++
++    if (resetQPY)
++        p1_apb_write(de, RPI_QP, sps->qp_bd_offset + s->sh.slice_qp);
++
++    p1_apb_write(de, RPI_MODE, (0xFFFF                            <<  0)
++                              + (0x0                               << 16)
++                              + ((tile_x==pps->num_tile_columns-1) << 17)
++                              + ((tile_y==pps->num_tile_rows-1)    << 18));
++
++    p1_apb_write(de, RPI_CONTROL, (ctb_col<<0) + (ctb_row<<16));
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++// Doesn't attempt to remove from context as we should only do this at the end
++// of time or on create error
++static void
++dec_env_delete(dec_env_t * const de)
++{
++//    gpu_free(&de->gbuf);
++
++    av_freep(&de->cmd_fifo);
++    av_freep(&de->bit_fifo);
++
++    sem_destroy(&de->phase_wait);
++    av_free(de);
++}
++
++static dec_env_t *
++dec_env_new(AVCodecContext * const avctx, RPI_T * const rpi)
++{
++    dec_env_t * const de = av_mallocz(sizeof(*de));
++    int i;
++
++    if (de == NULL)
++        return NULL;
++
++    de->avctx = avctx;
++    de->phase_no = RPIVID_PHASE_NEW;
++
++    sem_init(&de->phase_wait, 0, 0);
++
++    if ((de->cmd_fifo = malloc((de->cmd_max=1024)*sizeof(struct RPI_CMD))) == NULL)
++        goto fail;
++
++    if ((de->bit_fifo = malloc((de->bit_max=1024)*sizeof(struct RPI_BIT))) == NULL)
++        goto fail;
++
++    pthread_mutex_lock(&rpi->phase_lock); // Abuse - not worth creating a lock just for this
++    for (i = 0; i != avctx->thread_count; ++i) {
++        if (rpi->dec_envs[i] == NULL)
++        {
++            rpi->dec_envs[i] = de;
++            break;
++        }
++    }
++    pthread_mutex_unlock(&rpi->phase_lock);
++
++    if (i == avctx->thread_count) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to find a slot for hw thread context\n");
++        goto fail;
++    }
++
++    return de;
++
++fail:
++    dec_env_delete(de);
++    return NULL;
++}
++
++
++static dec_env_t *
++dec_env_get(AVCodecContext * const avctx, RPI_T * const rpi)
++{
++    dec_env_t * de = NULL;
++    const int ref_count = atomic_fetch_add(&rpi->ref_count, 1);
++
++    if (ref_count <= 0) {
++        // Already dead
++        av_log(avctx, AV_LOG_ERROR, "RPIVID called whilst dead\n");;
++        return NULL;
++    }
++
++    for (int i = 0; i != avctx->thread_count; ++i) {
++        if (rpi->dec_envs[i] == NULL)
++        {
++            de = dec_env_new(avctx, rpi);
++            break;
++        }
++        if (rpi->dec_envs[i]->avctx == avctx)
++        {
++            de = rpi->dec_envs[i];
++            break;
++        }
++    }
++    return de;
++}
++
++// Call at end of fn
++// Used to ensure we aren't in a worker thead when killed
++static void
++dec_env_release(RPI_T * const rpi, dec_env_t * const de)
++{
++    const int n = atomic_fetch_sub(&rpi->ref_count, 1);
++    if (n == 1) {
++        sem_post(&rpi->ref_zero);
++    }
++}
++
++//----------------------------------------------------------------------------
++
++// Wait for a slot in the given phase
++// Any error return is probably fatal
++static int
++wait_phase(RPI_T * const rpi, dec_env_t * const de, const int phase_no)
++{
++    int needs_wait = 0;
++    phase_wait_env_t *const p = rpi->phase_reqs + phase_no;
++
++    pthread_mutex_lock(&rpi->phase_lock);
++    if (p->last_order + 1 != de->decode_order) {
++        de->phase_wait_q_next = p->q;
++        p->q = de;
++        needs_wait = 1;
++    }
++    pthread_mutex_unlock(&rpi->phase_lock);
++
++    if (needs_wait) {
++        while (sem_wait(&de->phase_wait) == -1)
++        {
++            int err;
++            if ((err = errno) != EINTR)
++                return AVERROR(err);
++        }
++    }
++
++    de->phase_no = phase_no;
++    return 0;
++}
++
++static void
++post_phase(RPI_T * const rpi, dec_env_t * const de, const int phase_no)
++{
++    dec_env_t * next_de = NULL;
++    phase_wait_env_t *const p = rpi->phase_reqs + phase_no;
++    dec_env_t ** q = &p->q;
++
++    pthread_mutex_lock(&rpi->phase_lock);
++
++    p->last_order = de->decode_order;
++    while (*q != NULL) {
++        dec_env_t * const t_de = *q;
++
++        if (t_de->decode_order == p->last_order + 1) {
++            // This is us - remove from Q
++            *q = t_de->phase_wait_q_next;
++            t_de->phase_wait_q_next = NULL; // Tidy
++            next_de = t_de;
++            break;
++        }
++        q = &t_de->phase_wait_q_next;
++    }
++
++    pthread_mutex_unlock(&rpi->phase_lock);
++
++    if (next_de != NULL)
++        sem_post(&next_de->phase_wait);
++}
++
++// Wait & signal stuff s.t. threads in other phases can continue
++static void
++abort_phases(RPI_T * const rpi, dec_env_t * const de)
++{
++    for (int i = de->phase_no + 1; i < RPIVID_PHASE_NEW; ++i) {
++        wait_phase(rpi, de, i);
++        post_phase(rpi, de, i);
++    }
++    de->phase_no = RPIVID_PHASE_NEW;
++}
++
++// Start timing for phase
++// Stats only - no actual effect
++static inline void tstart_phase(RPI_T * const rpi, const int phase_no)
++{
++#if OPT_PHASE_TIMING
++    phase_wait_env_t *const p = rpi->phase_reqs + phase_no;
++    const int64_t now = tus64();
++    if (p->phase_time != 0)
++        p->time_out_phase += now - p->phase_time;
++    p->phase_time = now;
++#endif
++}
++
++#if OPT_PHASE_TIMING
++static unsigned int tavg_bin_phase(phase_wait_env_t *const p, const unsigned int avg_n)
++{
++    uint64_t tsum = 0;
++    unsigned int i;
++    for (i = 0; i != avg_n; ++i)
++        tsum += p->time_stash[(p->i3 - i) & 15];
++    for (i = 0; i != 9; ++i) {
++        if (time_thresholds[i] * 1000 * avg_n > tsum)
++            break;
++    }
++    return i;
++}
++#endif
++
++// End timing for phase
++// Stats only - no actual effect
++static inline void tend_phase(RPI_T * const rpi, const int phase_no)
++{
++#if OPT_PHASE_TIMING
++    phase_wait_env_t *const p = rpi->phase_reqs + phase_no;
++    const uint64_t now = tus64();
++    const uint64_t in_time = now - p->phase_time;
++
++    p->time_in_phase += in_time;
++    p->phase_time = now;
++    p->time_stash[p->i3] = in_time;
++    if (in_time > p->max_phase_time) {
++        p->max_phase_time = in_time;
++        p->max_time_decode_order = p->last_order;
++    }
++    ++p->time_bins[tavg_bin_phase(p, 1)];
++    ++p->time_bins3[tavg_bin_phase(p, 3)];
++    ++p->time_bins5[tavg_bin_phase(p, 5)];
++
++    p->i3 = (p->i3 + 1) & 15;
++#endif
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Start frame
++
++static int rpi_hevc_start_frame(
++    AVCodecContext * avctx,
++    const uint8_t *buffer,
++    uint32_t size) {
++
++    RPI_T * const rpi = avctx->internal->hwaccel_priv_data;
++    dec_env_t * const de = dec_env_get(avctx, rpi);
++    const HEVCContext * const s = avctx->priv_data;
++    const HEVCSPS * const sps = s->ps.sps;
++    const unsigned int CtbSizeY = 1U << sps->log2_ctb_size;
++
++#if TRACE_ENTRY
++    printf("<<< %s[%p]\n", __func__, de);
++#endif
++
++    if (de == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Cannot find find context for thread\n", __func__);
++        return -1;
++    }
++
++    de->phase_no = RPIVID_PHASE_START;
++    de->decode_order = ++rpi->decode_order;  // *** atomic?
++
++    ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame
++
++    if (de->state != RPIVID_DECODE_NEW && de->state != RPIVID_DECODE_END) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Unexpected state transition: %d", __func__, de->state);
++        return -1;
++    }
++    de->state = RPIVID_DECODE_START;
++
++    de->PicWidthInCtbsY  = (sps->width + CtbSizeY - 1) / CtbSizeY;  //7-15
++    de->PicHeightInCtbsY = (sps->height + CtbSizeY - 1) / CtbSizeY;  //7-17
++    de->bit_len = 0;
++    de->cmd_len = 0;
++
++#if TRACE_ENTRY
++    printf(">>> %s[%p]\n", __func__, de);
++#endif
++
++    dec_env_release(rpi, de);
++    return 0;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Slice messages
++
++static void msg_slice(dec_env_t * const de, const uint16_t msg) {
++    de->slice_msgs[de->num_slice_msgs++] = msg;
++}
++
++static void program_slicecmds(dec_env_t * const de, const int sliceid) {
++    int i;
++    p1_apb_write(de, RPI_SLICECMDS, de->num_slice_msgs+(sliceid<<8));
++    for(i=0; i < de->num_slice_msgs; i++) {
++        p1_apb_write(de, 0x4000+4*i, de->slice_msgs[i] & 0xffff);
++    }
++}
++
++static void pre_slice_decode(dec_env_t * const de, const HEVCContext * const s) {
++    const HEVCSPS * const sps = s->ps.sps;
++    const HEVCPPS * const pps = s->ps.pps;
++    const SliceHeader *sh = &s->sh;
++
++    int weightedPredFlag, i, rIdx;
++    uint16_t cmd_slice;
++    unsigned int collocated_from_l0_flag;
++
++    de->num_slice_msgs=0;
++    de->dpbno_col = 0;
++    cmd_slice = 0;
++    if (sh->slice_type==HEVC_SLICE_I) cmd_slice = 1;
++    if (sh->slice_type==HEVC_SLICE_P) cmd_slice = 2;
++    if (sh->slice_type==HEVC_SLICE_B) cmd_slice = 3;
++
++    if (sh->slice_type!=HEVC_SLICE_I) {
++        cmd_slice += sh->nb_refs[L0]<<2;
++        cmd_slice += sh->nb_refs[L1]<<6;
++    }
++
++    if (sh->slice_type==HEVC_SLICE_P ||  sh->slice_type==HEVC_SLICE_B)
++        cmd_slice |= sh->max_num_merge_cand<<11;
++
++    collocated_from_l0_flag =
++        !sh->slice_temporal_mvp_enabled_flag ?
++            0 :
++        sh->slice_type == HEVC_SLICE_B ?
++            (sh->collocated_list == L0) :
++            (sh->slice_type==HEVC_SLICE_P);
++    cmd_slice |= collocated_from_l0_flag<<14;
++
++    if (sh->slice_type==HEVC_SLICE_P || sh->slice_type==HEVC_SLICE_B) {
++
++        int NoBackwardPredFlag = 1; // Flag to say all reference pictures are from the past
++        for(i=L0; i<=L1; i++) {
++            for(rIdx=0; rIdx <sh->nb_refs[i]; rIdx++) {
++                HEVCFrame *f = s->ref->refPicList[i].ref[rIdx];
++                HEVCFrame *c = s->ref; // CurrentPicture
++                if (c->poc < f->poc) NoBackwardPredFlag = 0;
++            }
++        }
++
++        if (sps->sps_temporal_mvp_enabled_flag)
++        {
++            const RefPicList *rpl = (sh->slice_type != HEVC_SLICE_B || collocated_from_l0_flag) ?
++                s->ref->refPicList + 0 :
++                s->ref->refPicList + 1;
++            de->dpbno_col = rpl->ref[sh->collocated_ref_idx] - s->DPB;
++        }
++
++        cmd_slice += NoBackwardPredFlag<<10;
++        msg_slice(de, cmd_slice);
++
++        // Write reference picture descriptions
++        weightedPredFlag = sh->slice_type==HEVC_SLICE_P? pps->weighted_pred_flag : pps->weighted_bipred_flag;
++
++        for(i=L0; i<=L1; i++)
++            for(rIdx=0; rIdx <sh->nb_refs[i]; rIdx++) {
++                HEVCFrame *f = s->ref->refPicList[i].ref[rIdx];
++                HEVCFrame *c = s->ref; // CurrentPicture
++                int pic = f - s->DPB;
++                // Make sure pictures are in range 0 to 15
++                int adjusted_pic = f<c? pic : pic-1;
++                int lt = s->ref->refPicList[i].isLongTerm[rIdx];
++                msg_slice(de, adjusted_pic+(lt<<4)+(weightedPredFlag<<5)+(weightedPredFlag<<6));
++                msg_slice(de, f->poc);
++                if (weightedPredFlag) {
++                    msg_slice(de,   s->sh.luma_log2_weight_denom+(((i?s->  sh.luma_weight_l1:  s->sh.luma_weight_l0)[rIdx]   &0x1ff)<<3));
++                    msg_slice(de,                                  (i?s->  sh.luma_offset_l1:  s->sh.luma_offset_l0)[rIdx]   & 0xff);
++                    msg_slice(de, s->sh.chroma_log2_weight_denom+(((i?s->sh.chroma_weight_l1:s->sh.chroma_weight_l0)[rIdx][0]&0x1ff)<<3));
++                    msg_slice(de,                                  (i?s->sh.chroma_offset_l1:s->sh.chroma_offset_l0)[rIdx][0]& 0xff);
++                    msg_slice(de, s->sh.chroma_log2_weight_denom+(((i?s->sh.chroma_weight_l1:s->sh.chroma_weight_l0)[rIdx][1]&0x1ff)<<3));
++                    msg_slice(de,                                  (i?s->sh.chroma_offset_l1:s->sh.chroma_offset_l0)[rIdx][1]& 0xff);
++                }
++            }
++    }
++    else
++        msg_slice(de, cmd_slice);
++
++    msg_slice(de, ((sh->beta_offset/2)&15)
++        + (((sh->tc_offset/2)&15)                           <<  4)
++        + (sh->disable_deblocking_filter_flag               <<  8)
++        + (sh->slice_loop_filter_across_slices_enabled_flag <<  9)
++        + (pps->loop_filter_across_tiles_enabled_flag       << 10)); // CMD_DEBLOCK
++
++    msg_slice(de, ((sh->slice_cr_qp_offset&31)<<5) + (sh->slice_cb_qp_offset&31)); // CMD_QPOFF
++}
++
++
++//////////////////////////////////////////////////////////////////////////////
++
++static void rpi_hevc_abort_frame(AVCodecContext * const avctx) {
++    RPI_T * const rpi = avctx->internal->hwaccel_priv_data;
++    dec_env_t * const de = dec_env_get(avctx,  rpi);
++
++#if TRACE_ENTRY
++    printf("<<< %s[%p]\n", __func__, de);
++#endif
++
++    if (de == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Cannot find find context for thread\n", __func__);
++        return;
++    }
++
++    switch (de->state) {
++        case RPIVID_DECODE_NEW:
++        case RPIVID_DECODE_END:
++            // Expected transition
++            break;
++
++        case RPIVID_DECODE_SLICE:
++            // Error transition
++            av_log(avctx, AV_LOG_INFO, "Error in decode - aborting\n");
++            break;
++
++        case RPIVID_DECODE_START:
++        default:
++            av_log(avctx, AV_LOG_ERROR, "%s: Unexpected state transition: %d", __func__, de->state);
++            break;
++    }
++
++    abort_phases(rpi, de);
++    de->state = RPIVID_DECODE_NEW;
++
++    dec_env_release(rpi, de);
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// End frame
++
++static int rpi_hevc_end_frame(AVCodecContext * const avctx) {
++    RPI_T * const rpi = avctx->internal->hwaccel_priv_data;
++    const HEVCContext * const s = avctx->priv_data;
++    const HEVCPPS * const pps = s->ps.pps;
++    const HEVCSPS * const sps = s->ps.sps;
++    dec_env_t * const de = dec_env_get(avctx,  rpi);
++    AVFrame * const f = s->ref->frame;
++    const unsigned int dpbno_cur = s->ref - s->DPB;
++    vid_vc_addr_t cmds_vc;
++    vid_vc_addr_t pu_base_vc;
++    unsigned int pu_stride;
++    vid_vc_addr_t coeff_base_vc;
++    unsigned int coeff_stride;
++    unsigned int i;
++    int rv = 0;
++    int status = 0;
++    int coeffbuf_sem_claimed = 0;
++
++#if TRACE_ENTRY
++    fprintf("<<< %s[%p]\n", __func__, de);
++#endif
++
++    if (de == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Cannot find find context for thread\n", __func__);
++        return AVERROR_BUG;  // Should never happen
++    }
++
++    if (de->state != RPIVID_DECODE_SLICE) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Unexpected state: %d\n", __func__, de->state);
++        rv = AVERROR_UNKNOWN;
++        goto fail;
++    }
++    de->state = RPIVID_DECODE_END;
++
++    // End of command compilation
++    {
++        const unsigned int last_x = pps->col_bd[pps->num_tile_columns]-1;
++        const unsigned int last_y = pps->row_bd[pps->num_tile_rows]-1;
++        if (pps->entropy_coding_sync_enabled_flag) {
++            if (de->wpp_entry_x<2 && de->PicWidthInCtbsY>2)
++                wpp_pause(de, last_y);
++        }
++        p1_apb_write(de, RPI_STATUS, 1 + (last_x<<5) + (last_y<<18));
++    }
++
++    // Phase 0 ---------------------------------------------------------------
++
++    wait_phase(rpi, de, 0);
++    rpi_sem_wait(&rpi->bitbuf_sem);
++    tstart_phase(rpi, 0);
++
++    // Copy cmds & bits into gpu side buffer
++    // Layout: CMDS, BITS
++    {
++        uint8_t * const armbase = rpi->gbitbufs[rpi->bitbuf_no].arm;
++        vid_vc_addr_t vcbase = rpi->gbitbufs[rpi->bitbuf_no].vc;
++        unsigned int cmd_bytes = de->cmd_len * sizeof(struct RPI_CMD);
++
++        uint8_t * p = armbase + rnd64(cmd_bytes);
++        uint8_t * const eobits = armbase + rpi->gbitbufs[rpi->bitbuf_no].numbytes;
++
++        cmds_vc = vcbase;
++
++        // Copy all the bits & update bitstream cmds to point at the right bits
++        for (i = 0; i < de->bit_len; ++i)
++        {
++            const unsigned int seg_len = de->bit_fifo[i].len;
++
++            if (p + seg_len > eobits) {
++                status = -1;
++                break;
++            }
++
++            memcpy(p, de->bit_fifo[i].ptr, seg_len);
++            de->cmd_fifo[de->bit_fifo[i].cmd].data = MANGLE64((p - armbase) + vcbase);
++
++            p += rnd64(seg_len);
++        }
++
++        memcpy(armbase, de->cmd_fifo, cmd_bytes);
++    }
++
++    if (status == 0)
++    {
++        if (++rpi->bitbuf_no >= RPIVID_BITBUFS)
++            rpi->bitbuf_no = 0;
++    }
++    else
++    {
++        sem_post(&rpi->bitbuf_sem);
++        av_log(avctx, AV_LOG_ERROR, "Out of HEVC bit/cmd memory\n");
++        rv = AVERROR_BUFFER_TOO_SMALL;
++    }
++
++    tend_phase(rpi, 0);
++    post_phase(rpi, de, 0);
++
++    if (status < 0)
++        goto fail;
++
++    // Phase 1 ---------------------------------------------------------------
++
++    wait_phase(rpi, de, 1);
++    rpi_sem_wait(&rpi->coeffbuf_sem);
++    coeffbuf_sem_claimed = 1;
++    tstart_phase(rpi, 1);
++
++    status = 0;
++    for (;;)
++    {
++        // (Re-)allocate PU/COEFF stream space
++        const unsigned int total_size = rpi->gcoeffbufs[rpi->coeffbuf_no].numbytes;
++        unsigned int pu_size;
++
++        pu_base_vc = rpi->gcoeffbufs[rpi->coeffbuf_no].vc;
++        pu_stride = rnd64(rpi->max_pu_msgs * 2 * de->PicWidthInCtbsY);
++        pu_size = pu_stride * de->PicHeightInCtbsY;
++
++        if (pu_size >= total_size || status == -1) {
++            GPU_MEM_PTR_T newbuf;
++
++            if (gpu_malloc_uncached(round_up_size(total_size + 1), &newbuf) != 0)
++            {
++                av_log(avctx, AV_LOG_ERROR, "Failed to reallocate coeffbuf\n");
++                status = -1;
++                break;
++            }
++            gpu_free(rpi->gcoeffbufs + rpi->coeffbuf_no);
++            rpi->gcoeffbufs[rpi->coeffbuf_no] = newbuf;
++            status = 0;
++            continue;
++        }
++
++        // Allocate all remaining space to coeff
++        coeff_base_vc = pu_base_vc + pu_size;
++        coeff_stride = ((total_size - pu_size) / de->PicHeightInCtbsY) & ~63;  // Round down to multiple of 64
++
++        apb_write_vc_addr(rpi, RPI_PUWBASE, pu_base_vc);
++        apb_write_vc_len(rpi, RPI_PUWSTRIDE, pu_stride);
++        apb_write_vc_addr(rpi, RPI_COEFFWBASE, coeff_base_vc);
++        apb_write_vc_len(rpi, RPI_COEFFWSTRIDE, coeff_stride);
++
++        // Trigger command FIFO
++        apb_write(rpi, RPI_CFNUM, de->cmd_len);
++#if TRACE_DEV && 0
++        apb_dump_regs(rpi, 0x0, 32);
++        apb_dump_regs(rpi, 0x8000, 24);
++        axi_dump(de, ((uint64_t)a64)<<6, de->cmd_len * sizeof(struct RPI_CMD));
++#endif
++        apb_write_vc_addr(rpi, RPI_CFBASE, cmds_vc);
++
++        int_wait(rpi, 1);
++
++        status = check_status(rpi, de);
++
++        if (status == -1)
++            continue;
++        else if (status != 1)
++            break;
++
++        // Status 1 means out of PU space so try again with more
++        // If we ran out of Coeff space then we are out of memory - we could possibly realloc?
++        rpi->max_pu_msgs += rpi->max_pu_msgs / 2;
++    }
++
++    // Inc inside the phase 1 lock, but only inc if we succeeded otherwise we
++    // may reuse a live buffer when we kick the coeff sem
++    if (status == 0)
++    {
++        if (++rpi->coeffbuf_no >= RPIVID_COEFFBUFS)
++            rpi->coeffbuf_no = 0;
++    }
++    else
++    {
++        if (status == -1)
++        {
++            av_log(avctx, AV_LOG_ERROR, "Out of pu + coeff intermediate memory: pus=%d\n", rpi->max_pu_msgs);
++            rv = AVERROR_BUFFER_TOO_SMALL;
++        }
++        else
++        {
++            av_log(avctx, AV_LOG_WARNING, "Phase 1 decode error\n");
++            rv = AVERROR_INVALIDDATA;
++        }
++    }
++
++    tend_phase(rpi, 1);
++    sem_post(&rpi->bitbuf_sem);
++    post_phase(rpi, de, 1);
++
++    if (status != 0)
++        goto fail;
++
++    // Phase 2 ---------------------------------------------------------------
++
++    wait_phase(rpi, de, 2);
++
++    if ((rv = av_rpi_zc_resolve_frame(f, ZC_RESOLVE_ALLOC)) != 0)
++    {
++        // As we are in phase 2 already here we don't need to worry about
++        // ceoffbuf_no despite the early exit
++        post_phase(rpi, de, 2);
++        av_log(avctx, AV_LOG_ERROR, "Failed to allocate output frame\n");
++        goto fail;
++    }
++
++    tstart_phase(rpi, 2);
++
++    apb_write_vc_addr(rpi, RPI_PURBASE, pu_base_vc);
++    apb_write_vc_len(rpi, RPI_PURSTRIDE, pu_stride);
++    apb_write_vc_addr(rpi, RPI_COEFFRBASE, coeff_base_vc);
++    apb_write_vc_len(rpi, RPI_COEFFRSTRIDE, coeff_stride);
++
++    apb_write_vc_addr(rpi, RPI_OUTYBASE, get_vc_address_y(f));
++    apb_write_vc_addr(rpi, RPI_OUTCBASE, get_vc_address_u(f));
++    apb_write_vc_len(rpi, RPI_OUTYSTRIDE, f->linesize[3] * 128);
++    apb_write_vc_len(rpi, RPI_OUTCSTRIDE, f->linesize[3] * 128);
++
++    // Keep the last thing we resolved as fallback for any ref we fail to
++    // resolve.  As a final fallback use our current frame.  The pels might
++    // not be there yet but at least the memory is valid.
++    //
++    // Attempt to resolve the entire DPB - we could note what we have used
++    // in ref lists but probably simpler and more reliable to set the whole thing
++    {
++        AVFrame * fallback_frame = f;
++        for (i = 0; i != 16; ++i) {
++            // Avoid current frame
++            const HEVCFrame * hevc_fr = (s->DPB + i >= s->ref) ? s->DPB + i + 1 : s->DPB + i;
++            AVFrame * fr = hevc_fr->frame;
++
++            if (fr != NULL &&
++                av_rpi_zc_resolve_frame(fr, ZC_RESOLVE_FAIL) == 0)
++            {
++                fallback_frame = fr;
++            }
++            else
++            {
++                fr = fallback_frame;
++            }
++
++            apb_write_vc_addr(rpi, 0x9000+16*i, get_vc_address_y(fr));
++            apb_write(rpi, 0x9004+16*i, 0);
++            apb_write_vc_addr(rpi, 0x9008+16*i, get_vc_address_u(fr));
++            apb_write(rpi, 0x900C+16*i, 0);
++        }
++    }
++
++    apb_write(rpi, RPI_CONFIG2,
++          (sps->bit_depth                             << 0) // BitDepthY
++        + (sps->bit_depth                             << 4) // BitDepthC
++       + ((sps->bit_depth>8)                          << 8) // BitDepthY
++       + ((sps->bit_depth>8)                          << 9) // BitDepthC
++        + (sps->log2_ctb_size                         <<10)
++        + (pps->constrained_intra_pred_flag           <<13)
++        + (sps->sps_strong_intra_smoothing_enable_flag<<14)
++        + (sps->sps_temporal_mvp_enabled_flag         <<15)
++        + (pps->log2_parallel_merge_level             <<16)
++        + (s->sh.slice_temporal_mvp_enabled_flag      <<19)
++        + (sps->pcm.loop_filter_disable_flag          <<20)
++       + ((pps->cb_qp_offset&31)                      <<21)
++       + ((pps->cr_qp_offset&31)                      <<26));
++
++    apb_write(rpi, RPI_FRAMESIZE, (sps->height<<16) + sps->width);
++    apb_write(rpi, RPI_CURRPOC, s->poc);
++
++    // collocated reads/writes
++    if (sps->sps_temporal_mvp_enabled_flag) {
++        av_assert0(de->dpbno_col < RPIVID_COL_PICS);
++        av_assert0(dpbno_cur < RPIVID_COL_PICS);
++
++        apb_write_vc_len(rpi, RPI_COLSTRIDE, rpi->col_stride);
++        apb_write_vc_len(rpi, RPI_MVSTRIDE,  rpi->col_stride);
++        apb_write_vc_addr(rpi, RPI_MVBASE,  rpi->gcolbuf.vc + dpbno_cur * rpi->col_picsize);
++        apb_write_vc_addr(rpi, RPI_COLBASE, rpi->gcolbuf.vc + de->dpbno_col * rpi->col_picsize);
++    }
++
++#if TRACE_DEV && 0
++    apb_dump_regs(rpi, 0x0, 32);
++    apb_dump_regs(rpi, 0x8000, 24);
++#endif
++
++    apb_write(rpi, RPI_NUMROWS, de->PicHeightInCtbsY);
++    apb_read(rpi, RPI_NUMROWS); // Read back to confirm write has reached block
++
++    int_wait(rpi, 2);
++
++    tend_phase(rpi, 2);
++    coeffbuf_sem_claimed = 0;
++    sem_post(&rpi->coeffbuf_sem);
++    // Set valid here to avoid race in resolving in any pending phase 2
++    av_rpi_zc_set_valid_frame(f);
++
++    post_phase(rpi, de, 2);
++
++    // Flush frame for CPU access
++    // Arguably the best place would be at the start of phase 2 but here
++    // will overlap with the wait
++    //
++    // * Even better would be to have better lock/unlock control in ZC for external access
++    if (rpi->gpu_init_type == GPU_INIT_GPU)  // * CMA is currently always uncached
++    {
++        rpi_cache_buf_t cbuf;
++        rpi_cache_flush_env_t * const fe = rpi_cache_flush_init(&cbuf);
++        rpi_cache_flush_add_frame(fe, f, RPI_CACHE_FLUSH_MODE_INVALIDATE);
++        rpi_cache_flush_finish(fe);
++    }
++
++#if TRACE_ENTRY
++    printf(">>> %s[%p] OK\n", __func__, de);
++#endif
++
++    dec_env_release(rpi, de);
++    return 0;
++
++fail:
++    av_rpi_zc_set_broken_frame(f);
++    if (coeffbuf_sem_claimed)
++        sem_post(&rpi->coeffbuf_sem);
++    abort_phases(rpi, de);  // Dummy any unresolved phases
++
++#if TRACE_ENTRY
++    printf(">>> %s[%p] FAIL\n", __func__, de);
++#endif
++
++    dec_env_release(rpi, de);
++    return rv;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++
++#if TRACE_DEV
++static void dump_data(const uint8_t * p, size_t len)
++{
++    size_t i;
++    for (i = 0; i < len; i += 16) {
++        size_t j;
++        printf("%04x", i);
++        for (j = 0; j != 16; ++j) {
++            printf("%c%02x", i == 8 ? '-' : ' ', p[i+j]);
++        }
++        printf("\n");
++    }
++}
++#endif
++
++#if OPT_EMU
++static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx)
++{
++    unsigned int z = 0;
++    while (idx--) {
++        if (*b++ == 0) {
++            ++z;
++            if (z >= 2 && *b == 3) {
++                ++b;
++                z = 0;
++            }
++        }
++        else {
++            z = 0;
++        }
++    }
++    return b;
++}
++#endif
++
++static void WriteBitstream(dec_env_t * const de, const HEVCContext * const s) {
++    const int rpi_use_emu = OPT_EMU; // FFmpeg removes emulation prevention bytes
++    const int offset = 0; // Always 64-byte aligned in sim, need not be on real hardware
++    const GetBitContext *gb = &s->HEVClc->gb;
++
++#if OPT_EMU
++    const uint8_t *ptr = ptr_from_index(de->nal_buffer, gb->index/8 + 1);
++    const int len = de->nal_size - (ptr - de->nal_buffer);
++#else
++    const int len = 1 + gb->size_in_bits/8 - gb->index/8;
++    const void *ptr = &gb->buffer[gb->index/8];
++#endif
++
++#if TRACE_DEV
++    printf("Index=%d, /8=%#x\n", gb->index, gb->index/8);
++    dump_data(de->nal_buffer, 128);
++#endif
++
++    p1_axi_write(de, len, ptr, p1_apb_write(de, RPI_BFBASE, 0)); // BFBASE set later
++    p1_apb_write(de, RPI_BFNUM, len);
++    p1_apb_write(de, RPI_BFCONTROL, offset + (1<<7)); // Stop
++    p1_apb_write(de, RPI_BFCONTROL, offset + (rpi_use_emu<<6));
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Wavefront mode
++
++static void wpp_decode_slice(dec_env_t * const de, const HEVCContext * const s, int ctb_addr_ts)
++{
++    const HEVCPPS * const pps = s->ps.pps;
++
++    int i, resetQPY=1;
++    int indep = !s->sh.dependent_slice_segment_flag;
++    int ctb_col = s->sh.slice_ctb_addr_rs % de->PicWidthInCtbsY;
++
++    if (ctb_addr_ts)
++        wpp_end_previous_slice(de, s, ctb_addr_ts);
++    pre_slice_decode(de, s);
++    WriteBitstream(de, s);
++    if (ctb_addr_ts==0 || indep || de->PicWidthInCtbsY==1)
++        WriteProb(de, s);
++    else if (ctb_col==0)
++        p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
++    else
++        resetQPY=0;
++    program_slicecmds(de, s->slice_idx);
++    new_slice_segment(de, s);
++    wpp_entry_point(de, s, indep, resetQPY, ctb_addr_ts);
++    for (i=0; i<s->sh.num_entry_point_offsets; i++) {
++        int ctb_addr_rs = pps->ctb_addr_ts_to_rs[ctb_addr_ts];
++        int ctb_row = ctb_addr_rs / de->PicWidthInCtbsY;
++        int last_x = de->PicWidthInCtbsY-1;
++        if (de->PicWidthInCtbsY>2)
++            wpp_pause(de, ctb_row);
++        p1_apb_write(de, RPI_STATUS, (ctb_row<<18) + (last_x<<5) + 2);
++        if (de->PicWidthInCtbsY==2)
++            p1_apb_write(de, RPI_TRANSFER, PROB_BACKUP);
++        if (de->PicWidthInCtbsY==1)
++            WriteProb(de, s);
++        else
++            p1_apb_write(de, RPI_TRANSFER, PROB_RELOAD);
++        ctb_addr_ts += pps->column_width[0];
++        wpp_entry_point(de, s, 0, 1, ctb_addr_ts);
++    }
++}
++
++//////////////////////////////////////////////////////////////////////////////
++// Tiles mode
++
++static void decode_slice(dec_env_t * const de, const HEVCContext * const s, int ctb_addr_ts) {
++    const HEVCPPS * const pps = s->ps.pps;
++    int i, resetQPY;
++
++    if (ctb_addr_ts) end_previous_slice(de, s, ctb_addr_ts);
++    pre_slice_decode(de, s);
++    WriteBitstream(de, s);
++    resetQPY = ctb_addr_ts==0
++            || pps->tile_id[ctb_addr_ts]!=pps->tile_id[ctb_addr_ts-1]
++            || !s->sh.dependent_slice_segment_flag;
++    if (resetQPY) WriteProb(de, s);
++    program_slicecmds(de, s->slice_idx);
++    new_slice_segment(de, s);
++    new_entry_point(de, s, !s->sh.dependent_slice_segment_flag, resetQPY, ctb_addr_ts);
++    for (i=0; i<s->sh.num_entry_point_offsets; i++) {
++        int ctb_addr_rs = pps->ctb_addr_ts_to_rs[ctb_addr_ts];
++        int ctb_col = ctb_addr_rs % de->PicWidthInCtbsY;
++        int ctb_row = ctb_addr_rs / de->PicWidthInCtbsY;
++        int tile_x = ctb_to_tile (ctb_col, pps->col_bd, pps->num_tile_columns);
++        int tile_y = ctb_to_tile (ctb_row, pps->row_bd, pps->num_tile_rows);
++        int last_x = pps->col_bd[tile_x+1]-1;
++        int last_y = pps->row_bd[tile_y+1]-1;
++        p1_apb_write(de, RPI_STATUS, 2 + (last_x<<5) + (last_y<<18));
++        WriteProb(de, s);
++        ctb_addr_ts += pps->column_width[tile_x] * pps->row_height[tile_y];
++        new_entry_point(de, s, 0, 1, ctb_addr_ts);
++    }
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++static int cabac_start_align(HEVCContext *s)
++{
++    GetBitContext *gb = &s->HEVClc->gb;
++    skip_bits(gb, 1);
++    align_get_bits(gb);
++    // Should look at getting rid of this
++    return ff_init_cabac_decoder(&s->HEVClc->cc,
++                          gb->buffer + get_bits_count(gb) / 8,
++                          (get_bits_left(gb) + 7) / 8);
++}
++
++static int rpi_hevc_decode_slice(
++    AVCodecContext *avctx,
++    const uint8_t *buffer,
++    uint32_t size)
++{
++    RPI_T * const rpi = avctx->internal->hwaccel_priv_data;
++    HEVCContext * const s = avctx->priv_data;
++    dec_env_t * const de = dec_env_get(avctx, rpi);
++    const HEVCPPS *pps = s->ps.pps;
++    int ctb_addr_ts = pps->ctb_addr_rs_to_ts[s->sh.slice_ctb_addr_rs];
++
++#if TRACE_ENTRY
++    printf("<<< %s[%p]\n", __func__, de);
++#endif
++    if (de == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Cannot find find context for thread\n", __func__);
++        return -1;
++    }
++
++    if (de->state != RPIVID_DECODE_START && de->state != RPIVID_DECODE_SLICE) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Unexpected state: %d\n", __func__, de->state);
++        return -1;
++    }
++    de->state = RPIVID_DECODE_SLICE;
++
++    de->nal_buffer = buffer;
++    de->nal_size   = size;
++
++#if !OPT_EMU
++//    ff_hevc_cabac_init(s, ctb_addr_ts);
++    cabac_start_align(s);
++#endif
++    if (s->ps.sps->scaling_list_enable_flag)
++        populate_scaling_factors(de, s);
++    pps->entropy_coding_sync_enabled_flag? wpp_decode_slice(de, s, ctb_addr_ts)
++                                             : decode_slice(de, s, ctb_addr_ts);
++#if TRACE_ENTRY
++    printf(">>> %s[%p]\n", __func__, de);
++#endif
++    dec_env_release(rpi, de);
++    return 0;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++static int rpivid_retrieve_data(void *logctx, AVFrame *frame)
++{
++    int rv;
++    if ((rv = av_rpi_zc_resolve_frame(frame, ZC_RESOLVE_WAIT_VALID)) != 0)
++        av_log(logctx, AV_LOG_ERROR, "Unable to resolve output frame\n");
++    return rv;
++}
++
++static int rpivid_hevc_alloc_frame(AVCodecContext * avctx, AVFrame *frame)
++{
++    RPI_T * const rpi = avctx->internal->hwaccel_priv_data;
++    HEVCContext * const s = avctx->priv_data;
++    // Frame buffering + 1 output.  Would need thread_count extra but we now
++    // alloc at the start of phase 2 so that is the only thread we need the
++    // extra buffer for.
++    const unsigned int pool_req = s->ps.sps->temporal_layer[s->ps.sps->max_sub_layers - 1].max_dec_pic_buffering + 1;
++    int rv;
++
++    if (av_rpi_zc_in_use(avctx))
++    {
++        const AVZcEnvPtr zc = avctx->opaque;
++        av_rpi_zc_set_decoder_pool_size(zc, pool_req);
++        rv = av_rpi_zc_get_buffer(zc, frame);   // get_buffer2 would alloc
++    }
++    else
++    {
++        if (rpi->zc == NULL) {
++            pthread_mutex_lock(&rpi->phase_lock); // Abuse - not worth creating a lock just for this
++            // Alloc inside lock to make sure we only ever alloc one
++            if (rpi->zc == NULL) {
++                rpi->zc = av_rpi_zc_int_env_alloc(s);
++            }
++            pthread_mutex_unlock(&rpi->phase_lock);
++        }
++        av_rpi_zc_set_decoder_pool_size(rpi->zc, pool_req); // Ignored by local allocator, but set anyway :-)
++        rv = (rpi->zc == NULL) ? AVERROR(ENOMEM) :
++            av_rpi_zc_get_buffer(rpi->zc, frame);
++    }
++
++    if (rv == 0 &&
++        (rv = ff_attach_decode_data(frame)) < 0)
++    {
++        av_frame_unref(frame);
++    }
++
++    if (rv == 0)
++    {
++        FrameDecodeData *fdd = (FrameDecodeData*)frame->private_ref->data;
++        fdd->post_process = rpivid_retrieve_data;
++    }
++
++    return rv;
++}
++
++#if OPT_PHASE_TIMING
++static void log_bin_phase(AVCodecContext * const avctx, const unsigned int * const bins)
++{
++    av_log(avctx, AV_LOG_INFO, "%7d %7d %7d %7d %7d %7d %7d %7d %7d\n",
++           bins[0],  bins[1], bins[2], bins[3],
++           bins[4],  bins[5], bins[6], bins[7], bins[8]);
++}
++#endif
++
++//////////////////////////////////////////////////////////////////////////////
++
++static int rpi_hevc_free(AVCodecContext *avctx) {
++    RPI_T * const rpi = avctx->internal->hwaccel_priv_data;
++
++#if TRACE_ENTRY
++    printf("<<< %s\n", __func__);
++#endif
++
++    dec_env_release(rpi, NULL);
++
++    // Wait for everything else to stop
++    {
++        struct timespec tt;
++        clock_gettime(CLOCK_REALTIME, &tt);
++        tt.tv_sec += 2;
++        while (sem_timedwait(&rpi->ref_zero, &tt) == -1) {
++            const int err = errno;
++            if (err == ETIMEDOUT) {
++                av_log(avctx, AV_LOG_FATAL, "Rpivid worker threads still running\n");
++                return -1;
++            }
++            if (err != EINTR) {
++                av_log(avctx, AV_LOG_ERROR, "Unexpected error %d waiting for work thread to stop\n", err);
++                break;
++            }
++        }
++    }
++
++#if OPT_PHASE_TIMING
++    {
++        unsigned int i;
++        for (i = 0; i != RPIVID_PHASES; ++i) {
++            const phase_wait_env_t * const p = rpi->phase_reqs + i;
++            av_log(avctx, AV_LOG_INFO, "Phase %u: In %3u.%06u, Out %3u.%06u\n", i,
++                   (unsigned int)(p->time_in_phase / 1000000), (unsigned int)(p->time_in_phase % 1000000),
++                   (unsigned int)(p->time_out_phase / 1000000), (unsigned int)(p->time_out_phase % 1000000));
++            av_log(avctx, AV_LOG_INFO, "%7d %7d %7d %7d %7d %7d %7d %7d        >\n",
++                   time_thresholds[0], time_thresholds[1], time_thresholds[2], time_thresholds[3],
++                   time_thresholds[4], time_thresholds[5], time_thresholds[6], time_thresholds[7]);
++            log_bin_phase(avctx, p->time_bins);
++            log_bin_phase(avctx, p->time_bins3);
++            log_bin_phase(avctx, p->time_bins5);
++            av_log(avctx, AV_LOG_INFO, "Longest duraction: %ums @ frame %u\n",
++                   (unsigned int)(p->max_phase_time / 1000),
++                   p->max_time_decode_order);
++        }
++        av_log(avctx, AV_LOG_INFO, "PU max=%d\n", rpi->max_pu_msgs);
++    }
++#endif
++
++    if (rpi->dec_envs != NULL)
++    {
++        for (int i; i < avctx->thread_count && rpi->dec_envs[i] != NULL; ++i) {
++            dec_env_delete(rpi->dec_envs[i]);
++        }
++        av_freep(&rpi->dec_envs);
++    }
++
++    av_rpi_zc_int_env_freep(&rpi->zc);
++
++    gpu_free(&rpi->gcolbuf);
++
++    for (unsigned int i = 0; i != RPIVID_BITBUFS; ++i) {
++        gpu_free(rpi->gbitbufs + i);
++    }
++    for (unsigned int i = 0; i != RPIVID_COEFFBUFS; ++i) {
++        gpu_free(rpi->gcoeffbufs + i);
++    }
++
++    unmap_devp(&rpi->regs, REGS_SIZE);
++    unmap_devp(&rpi->ints, INTS_SIZE);
++
++    if (rpi->gpu_init_type > 0)
++        rpi_mem_gpu_uninit();
++
++    if (rpi->mbox_fd >= 0) {
++        mbox_release_clock(rpi->mbox_fd);
++        mbox_close(rpi->mbox_fd);
++    }
++
++    sem_destroy(&rpi->ref_zero);
++    sem_destroy(&rpi->coeffbuf_sem);
++    sem_destroy(&rpi->bitbuf_sem);
++
++#if TRACE_ENTRY
++    printf(">>> %s\n", __func__);
++#endif
++    return 0;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++static int rpi_hevc_init(AVCodecContext *avctx) {
++    RPI_T * const rpi = avctx->internal->hwaccel_priv_data;
++//    const char *err;
++
++#if TRACE_ENTRY
++    printf("<<< %s\n", __func__);
++#endif
++
++    if (avctx->width>4096 || avctx->height>4096) {
++        av_log(NULL, AV_LOG_FATAL, "Picture size %dx%d exceeds 4096x4096 maximum for HWAccel\n", avctx->width, avctx->height);
++        return AVERROR(ENOTSUP);
++    }
++
++    memset(rpi, 0, sizeof(*rpi));
++
++    rpi->mbox_fd = -1;
++    rpi->decode_order = 0;
++
++    // Initial PU/COEFF stream buffer split chosen as worst case seen so far
++    rpi->max_pu_msgs = 768; // 7.2 says at most 1611 messages per CTU
++
++
++    atomic_store(&rpi->ref_count, 1);
++    sem_init(&rpi->ref_zero, 0, 0);
++
++    sem_init(&rpi->bitbuf_sem,   0, RPIVID_BITBUFS);
++    sem_init(&rpi->coeffbuf_sem, 0, RPIVID_COEFFBUFS);
++
++    pthread_mutex_init(&rpi->phase_lock, NULL);
++
++    if ((rpi->mbox_fd = mbox_open()) < 0)
++    {
++        av_log(avctx, AV_LOG_ERROR, "Failed to open mailbox\n");
++        goto fail;
++    }
++    mbox_request_clock(rpi->mbox_fd);
++
++    if ((rpi->regs = map_dev(avctx, REGS_NAME, REGS_SIZE)) == NULL ||
++        (rpi->ints = map_dev(avctx, INTS_NAME, INTS_SIZE)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to open rpivid devices\n");
++        goto fail;
++    }
++
++    if ((rpi->gpu_init_type = rpi_mem_gpu_init(0)) < 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to init GPU\n");
++        goto fail;
++    }
++
++    if ((rpi->dec_envs = av_mallocz(sizeof(dec_env_t *) * avctx->thread_count)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to alloc %d dec envs\n", avctx->thread_count);
++        goto fail;
++    }
++
++    rpi->col_stride = rnd64(avctx->width);
++    rpi->col_picsize = rpi->col_stride * (((avctx->height + 63) & ~63) >> 4);
++    if (gpu_malloc_uncached(rpi->col_picsize * RPIVID_COL_PICS, &rpi->gcolbuf) != 0)
++    {
++        av_log(avctx, AV_LOG_ERROR, "Failed to allocate col mv buffer\n");
++        goto fail;
++    }
++
++    for (unsigned int i = 0; i != RPIVID_BITBUFS; ++i) {
++        if (gpu_malloc_uncached(RPIVID_BITBUF_SIZE, rpi->gbitbufs + i) != 0)
++        {
++            av_log(avctx, AV_LOG_ERROR, "Failed to allocate bitbuf %d\n", i);
++            goto fail;
++        }
++    }
++
++    for (unsigned int i = 0; i != RPIVID_COEFFBUFS; ++i) {
++        if (gpu_malloc_uncached(RPIVID_COEFFBUF_SIZE, rpi->gcoeffbufs + i) != 0)
++        {
++            av_log(avctx, AV_LOG_ERROR, "Failed to allocate coeffbuf %d\n", i);
++            goto fail;
++        }
++    }
++
++    av_log(avctx, AV_LOG_INFO, "RPI HEVC h/w accel init OK\n");
++
++    return 0;
++
++fail:
++    rpi_hevc_free(avctx);
++    return AVERROR_EXTERNAL;
++}
++
++//////////////////////////////////////////////////////////////////////////////
++
++const AVHWAccel ff_hevc_rpi4_8_hwaccel = {
++    .name           = "hevc_rpi4_8",
++    .type           = AVMEDIA_TYPE_VIDEO,
++    .id             = AV_CODEC_ID_HEVC,
++    .pix_fmt        = AV_PIX_FMT_RPI4_8,
++    .alloc_frame    = rpivid_hevc_alloc_frame,
++    .start_frame    = rpi_hevc_start_frame,
++    .end_frame      = rpi_hevc_end_frame,
++    .abort_frame    = rpi_hevc_abort_frame,
++    .decode_slice   = rpi_hevc_decode_slice,
++    .init           = rpi_hevc_init,
++    .uninit         = rpi_hevc_free,
++    .priv_data_size = sizeof(RPI_T),
++    .caps_internal  = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE,
++};
++
++const AVHWAccel ff_hevc_rpi4_10_hwaccel = {
++    .name           = "hevc_rpi4_10",
++    .type           = AVMEDIA_TYPE_VIDEO,
++    .id             = AV_CODEC_ID_HEVC,
++    .pix_fmt        = AV_PIX_FMT_RPI4_10,
++    .alloc_frame    = rpivid_hevc_alloc_frame,
++    .start_frame    = rpi_hevc_start_frame,
++    .end_frame      = rpi_hevc_end_frame,
++    .abort_frame    = rpi_hevc_abort_frame,
++    .decode_slice   = rpi_hevc_decode_slice,
++    .init           = rpi_hevc_init,
++    .uninit         = rpi_hevc_free,
++    .priv_data_size = sizeof(RPI_T),
++    .caps_internal  = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE,
++};
++
+--- a/libavcodec/v4l2_buffers.c
++++ b/libavcodec/v4l2_buffers.c
+@@ -21,6 +21,7 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+
++#include <drm_fourcc.h>
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
+ #include <sys/mman.h>
+@@ -30,12 +31,14 @@
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/internal.h"
+ #include "libavutil/pixdesc.h"
++#include "libavutil/hwcontext.h"
+ #include "v4l2_context.h"
+ #include "v4l2_buffers.h"
+ #include "v4l2_m2m.h"
++#include "weak_link.h"
+
+ #define USEC_PER_SEC 1000000
+-static AVRational v4l2_timebase = { 1, USEC_PER_SEC };
++static const AVRational v4l2_timebase = { 1, USEC_PER_SEC };
+
+ static inline V4L2m2mContext *buf_to_m2mctx(V4L2Buffer *buf)
+ {
+@@ -52,34 +55,44 @@ static inline AVCodecContext *logger(V4L
+ static inline AVRational v4l2_get_timebase(V4L2Buffer *avbuf)
+ {
+     V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+-
+-    if (s->avctx->pkt_timebase.num)
+-        return s->avctx->pkt_timebase;
+-    return s->avctx->time_base;
++    const AVRational tb = s->avctx->pkt_timebase.num ?
++        s->avctx->pkt_timebase :
++        s->avctx->time_base;
++    return tb.num && tb.den ? tb : v4l2_timebase;
+ }
+
+-static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts)
++static inline void v4l2_set_pts(V4L2Buffer *out, int64_t pts, int no_rescale)
+ {
+-    int64_t v4l2_pts;
+-
+-    if (pts == AV_NOPTS_VALUE)
+-        pts = 0;
+-
+     /* convert pts to v4l2 timebase */
+-    v4l2_pts = av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
++    const int64_t v4l2_pts =
++        no_rescale ? pts :
++        pts == AV_NOPTS_VALUE ? 0 :
++            av_rescale_q(pts, v4l2_get_timebase(out), v4l2_timebase);
+     out->buf.timestamp.tv_usec = v4l2_pts % USEC_PER_SEC;
+     out->buf.timestamp.tv_sec = v4l2_pts / USEC_PER_SEC;
+ }
+
+-static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf)
++static inline int64_t v4l2_get_pts(V4L2Buffer *avbuf, int no_rescale)
+ {
+-    int64_t v4l2_pts;
+-
+     /* convert pts back to encoder timebase */
+-    v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
++    const int64_t v4l2_pts = (int64_t)avbuf->buf.timestamp.tv_sec * USEC_PER_SEC +
+                         avbuf->buf.timestamp.tv_usec;
+
+-    return av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
++    return
++        no_rescale ? v4l2_pts :
++        v4l2_pts == 0 ? AV_NOPTS_VALUE :
++            av_rescale_q(v4l2_pts, v4l2_timebase, v4l2_get_timebase(avbuf));
++}
++
++static void set_buf_length(V4L2Buffer *out, unsigned int plane, uint32_t bytesused, uint32_t length)
++{
++    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
++        out->planes[plane].bytesused = bytesused;
++        out->planes[plane].length = length;
++    } else {
++        out->buf.bytesused = bytesused;
++        out->buf.length = length;
++    }
+ }
+
+ static enum AVColorPrimaries v4l2_get_color_primaries(V4L2Buffer *buf)
+@@ -116,6 +129,105 @@ static enum AVColorPrimaries v4l2_get_co
+     return AVCOL_PRI_UNSPECIFIED;
+ }
+
++static void v4l2_set_color(V4L2Buffer *buf,
++                           const enum AVColorPrimaries avcp,
++                           const enum AVColorSpace avcs,
++                           const enum AVColorTransferCharacteristic avxc)
++{
++    enum v4l2_ycbcr_encoding ycbcr = V4L2_YCBCR_ENC_DEFAULT;
++    enum v4l2_colorspace cs = V4L2_COLORSPACE_DEFAULT;
++    enum v4l2_xfer_func xfer = V4L2_XFER_FUNC_DEFAULT;
++
++    switch (avcp) {
++    case AVCOL_PRI_BT709:
++        cs = V4L2_COLORSPACE_REC709;
++        ycbcr = V4L2_YCBCR_ENC_709;
++        break;
++    case AVCOL_PRI_BT470M:
++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
++        ycbcr = V4L2_YCBCR_ENC_601;
++        break;
++    case AVCOL_PRI_BT470BG:
++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++        break;
++    case AVCOL_PRI_SMPTE170M:
++        cs = V4L2_COLORSPACE_SMPTE170M;
++        break;
++    case AVCOL_PRI_SMPTE240M:
++        cs = V4L2_COLORSPACE_SMPTE240M;
++        break;
++    case AVCOL_PRI_BT2020:
++        cs = V4L2_COLORSPACE_BT2020;
++        break;
++    case AVCOL_PRI_SMPTE428:
++    case AVCOL_PRI_SMPTE431:
++    case AVCOL_PRI_SMPTE432:
++    case AVCOL_PRI_EBU3213:
++    case AVCOL_PRI_RESERVED:
++    case AVCOL_PRI_FILM:
++    case AVCOL_PRI_UNSPECIFIED:
++    default:
++        break;
++    }
++
++    switch (avcs) {
++    case AVCOL_SPC_RGB:
++        cs = V4L2_COLORSPACE_SRGB;
++        break;
++    case AVCOL_SPC_BT709:
++        cs = V4L2_COLORSPACE_REC709;
++        break;
++    case AVCOL_SPC_FCC:
++        cs = V4L2_COLORSPACE_470_SYSTEM_M;
++        break;
++    case AVCOL_SPC_BT470BG:
++        cs = V4L2_COLORSPACE_470_SYSTEM_BG;
++        break;
++    case AVCOL_SPC_SMPTE170M:
++        cs = V4L2_COLORSPACE_SMPTE170M;
++        break;
++    case AVCOL_SPC_SMPTE240M:
++        cs = V4L2_COLORSPACE_SMPTE240M;
++        break;
++    case AVCOL_SPC_BT2020_CL:
++        cs = V4L2_COLORSPACE_BT2020;
++        ycbcr = V4L2_YCBCR_ENC_BT2020_CONST_LUM;
++        break;
++    case AVCOL_SPC_BT2020_NCL:
++        cs = V4L2_COLORSPACE_BT2020;
++        break;
++    default:
++        break;
++    }
++
++    switch (xfer) {
++    case AVCOL_TRC_BT709:
++        xfer = V4L2_XFER_FUNC_709;
++        break;
++    case AVCOL_TRC_IEC61966_2_1:
++        xfer = V4L2_XFER_FUNC_SRGB;
++        break;
++    case AVCOL_TRC_SMPTE240M:
++        xfer = V4L2_XFER_FUNC_SMPTE240M;
++        break;
++    case AVCOL_TRC_SMPTE2084:
++        xfer = V4L2_XFER_FUNC_SMPTE2084;
++        break;
++    default:
++        break;
++    }
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
++        buf->context->format.fmt.pix_mp.colorspace = cs;
++        buf->context->format.fmt.pix_mp.ycbcr_enc = ycbcr;
++        buf->context->format.fmt.pix_mp.xfer_func = xfer;
++    } else {
++        buf->context->format.fmt.pix.colorspace = cs;
++        buf->context->format.fmt.pix.ycbcr_enc = ycbcr;
++        buf->context->format.fmt.pix.xfer_func = xfer;
++    }
++}
++
+ static enum AVColorRange v4l2_get_color_range(V4L2Buffer *buf)
+ {
+     enum v4l2_quantization qt;
+@@ -134,6 +246,20 @@ static enum AVColorRange v4l2_get_color_
+      return AVCOL_RANGE_UNSPECIFIED;
+ }
+
++static void v4l2_set_color_range(V4L2Buffer *buf, const enum AVColorRange avcr)
++{
++    const enum v4l2_quantization q =
++        avcr == AVCOL_RANGE_MPEG ? V4L2_QUANTIZATION_LIM_RANGE :
++        avcr == AVCOL_RANGE_JPEG ? V4L2_QUANTIZATION_FULL_RANGE :
++            V4L2_QUANTIZATION_DEFAULT;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(buf->buf.type)) {
++        buf->context->format.fmt.pix_mp.quantization = q;
++    } else {
++        buf->context->format.fmt.pix.quantization = q;
++    }
++}
++
+ static enum AVColorSpace v4l2_get_color_space(V4L2Buffer *buf)
+ {
+     enum v4l2_ycbcr_encoding ycbcr;
+@@ -210,73 +336,165 @@ static enum AVColorTransferCharacteristi
+     return AVCOL_TRC_UNSPECIFIED;
+ }
+
+-static void v4l2_free_buffer(void *opaque, uint8_t *unused)
++static int v4l2_buf_is_interlaced(const V4L2Buffer * const buf)
+ {
+-    V4L2Buffer* avbuf = opaque;
+-    V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+-
+-    if (atomic_fetch_sub(&avbuf->context_refcount, 1) == 1) {
+-        atomic_fetch_sub_explicit(&s->refcount, 1, memory_order_acq_rel);
++    return V4L2_FIELD_IS_INTERLACED(buf->buf.field);
++}
+
+-        if (s->reinit) {
+-            if (!atomic_load(&s->refcount))
+-                sem_post(&s->refsync);
+-        } else {
+-            if (s->draining && V4L2_TYPE_IS_OUTPUT(avbuf->context->type)) {
+-                /* no need to queue more buffers to the driver */
+-                avbuf->status = V4L2BUF_AVAILABLE;
+-            }
+-            else if (avbuf->context->streamon)
+-                ff_v4l2_buffer_enqueue(avbuf);
+-        }
++static int v4l2_buf_is_top_first(const V4L2Buffer * const buf)
++{
++    return buf->buf.field == V4L2_FIELD_INTERLACED_TB;
++}
+
+-        av_buffer_unref(&avbuf->context_ref);
+-    }
++static void v4l2_set_interlace(V4L2Buffer * const buf, const int is_interlaced, const int is_tff)
++{
++    buf->buf.field = !is_interlaced ? V4L2_FIELD_NONE :
++        is_tff ? V4L2_FIELD_INTERLACED_TB : V4L2_FIELD_INTERLACED_BT;
+ }
+
+-static int v4l2_buf_increase_ref(V4L2Buffer *in)
++static uint8_t * v4l2_get_drm_frame(V4L2Buffer *avbuf)
+ {
+-    V4L2m2mContext *s = buf_to_m2mctx(in);
++    AVDRMFrameDescriptor *drm_desc = &avbuf->drm_frame;
++    AVDRMLayerDescriptor *layer;
+
+-    if (in->context_ref)
+-        atomic_fetch_add(&in->context_refcount, 1);
+-    else {
+-        in->context_ref = av_buffer_ref(s->self_ref);
+-        if (!in->context_ref)
+-            return AVERROR(ENOMEM);
++    /* fill the DRM frame descriptor */
++    drm_desc->nb_objects = avbuf->num_planes;
++    drm_desc->nb_layers = 1;
+
+-        in->context_refcount = 1;
++    layer = &drm_desc->layers[0];
++    layer->nb_planes = avbuf->num_planes;
++
++    for (int i = 0; i < avbuf->num_planes; i++) {
++        layer->planes[i].object_index = i;
++        layer->planes[i].offset = 0;
++        layer->planes[i].pitch = avbuf->plane_info[i].bytesperline;
+     }
+
+-    in->status = V4L2BUF_RET_USER;
+-    atomic_fetch_add_explicit(&s->refcount, 1, memory_order_relaxed);
++    switch (avbuf->context->av_pix_fmt) {
++    case AV_PIX_FMT_YUYV422:
++
++        layer->format = DRM_FORMAT_YUYV;
++        layer->nb_planes = 1;
+
+-    return 0;
++        break;
++
++    case AV_PIX_FMT_NV12:
++    case AV_PIX_FMT_NV21:
++
++        layer->format = avbuf->context->av_pix_fmt == AV_PIX_FMT_NV12 ?
++            DRM_FORMAT_NV12 : DRM_FORMAT_NV21;
++
++        if (avbuf->num_planes > 1)
++            break;
++
++        layer->nb_planes = 2;
++
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
++            avbuf->context->format.fmt.pix.height;
++        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline;
++        break;
++
++    case AV_PIX_FMT_YUV420P:
++
++        layer->format = DRM_FORMAT_YUV420;
++
++        if (avbuf->num_planes > 1)
++            break;
++
++        layer->nb_planes = 3;
++
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = avbuf->plane_info[0].bytesperline *
++            avbuf->context->format.fmt.pix.height;
++        layer->planes[1].pitch = avbuf->plane_info[0].bytesperline >> 1;
++
++        layer->planes[2].object_index = 0;
++        layer->planes[2].offset = layer->planes[1].offset +
++            ((avbuf->plane_info[0].bytesperline *
++              avbuf->context->format.fmt.pix.height) >> 2);
++        layer->planes[2].pitch = avbuf->plane_info[0].bytesperline >> 1;
++        break;
++
++    default:
++        drm_desc->nb_layers = 0;
++        break;
++    }
++
++    return (uint8_t *) drm_desc;
+ }
+
+-static int v4l2_buf_to_bufref(V4L2Buffer *in, int plane, AVBufferRef **buf)
++static void v4l2_free_bufref(void *opaque, uint8_t *data)
+ {
+-    int ret;
++    AVBufferRef * bufref = (AVBufferRef *)data;
++    V4L2Buffer *avbuf = (V4L2Buffer *)bufref->data;
++    struct V4L2Context *ctx = ff_weak_link_lock(&avbuf->context_wl);
+
+-    if (plane >= in->num_planes)
+-        return AVERROR(EINVAL);
++    if (ctx != NULL) {
++        // Buffer still attached to context
++        V4L2m2mContext *s = buf_to_m2mctx(avbuf);
+
+-    /* even though most encoders return 0 in data_offset encoding vp8 does require this value */
+-    *buf = av_buffer_create((char *)in->plane_info[plane].mm_addr + in->planes[plane].data_offset,
+-                            in->plane_info[plane].length, v4l2_free_buffer, in, 0);
+-    if (!*buf)
+-        return AVERROR(ENOMEM);
++        ff_mutex_lock(&ctx->lock);
+
+-    ret = v4l2_buf_increase_ref(in);
+-    if (ret)
+-        av_buffer_unref(buf);
++        avbuf->status = V4L2BUF_AVAILABLE;
+
+-    return ret;
++        if (s->draining && V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer avail\n", ctx->name);
++            /* no need to queue more buffers to the driver */
++        }
++        else if (ctx->streamon) {
++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer requeue\n", ctx->name);
++            avbuf->buf.timestamp.tv_sec = 0;
++            avbuf->buf.timestamp.tv_usec = 0;
++            ff_v4l2_buffer_enqueue(avbuf);  // will set to IN_DRIVER
++        }
++        else {
++            av_log(logger(avbuf), AV_LOG_DEBUG, "%s: Buffer freed but streamoff\n", ctx->name);
++        }
++
++        ff_mutex_unlock(&ctx->lock);
++    }
++
++    ff_weak_link_unlock(avbuf->context_wl);
++    av_buffer_unref(&bufref);
++}
++
++static int v4l2_buffer_export_drm(V4L2Buffer* avbuf)
++{
++    struct v4l2_exportbuffer expbuf;
++    int i, ret;
++
++    for (i = 0; i < avbuf->num_planes; i++) {
++        memset(&expbuf, 0, sizeof(expbuf));
++
++        expbuf.index = avbuf->buf.index;
++        expbuf.type = avbuf->buf.type;
++        expbuf.plane = i;
++
++        ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_EXPBUF, &expbuf);
++        if (ret < 0)
++            return AVERROR(errno);
++
++        if (V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type)) {
++            /* drm frame */
++            avbuf->drm_frame.objects[i].size = avbuf->buf.m.planes[i].length;
++            avbuf->drm_frame.objects[i].fd = expbuf.fd;
++            avbuf->drm_frame.objects[i].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        } else {
++            /* drm frame */
++            avbuf->drm_frame.objects[0].size = avbuf->buf.length;
++            avbuf->drm_frame.objects[0].fd = expbuf.fd;
++            avbuf->drm_frame.objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        }
++    }
++
++    return 0;
+ }
+
+-static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset, AVBufferRef* bref)
++static int v4l2_bufref_to_buf(V4L2Buffer *out, int plane, const uint8_t* data, int size, int offset)
+ {
+     unsigned int bytesused, length;
++    int rv = 0;
+
+     if (plane >= out->num_planes)
+         return AVERROR(EINVAL);
+@@ -284,32 +502,57 @@ static int v4l2_bufref_to_buf(V4L2Buffer
+     length = out->plane_info[plane].length;
+     bytesused = FFMIN(size+offset, length);
+
+-    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, FFMIN(size, length-offset));
+-
+-    if (V4L2_TYPE_IS_MULTIPLANAR(out->buf.type)) {
+-        out->planes[plane].bytesused = bytesused;
+-        out->planes[plane].length = length;
+-    } else {
+-        out->buf.bytesused = bytesused;
+-        out->buf.length = length;
++    if (size > length - offset) {
++        size = length - offset;
++        rv = AVERROR(ENOMEM);
+     }
+
+-    return 0;
++    memcpy((uint8_t*)out->plane_info[plane].mm_addr+offset, data, size);
++
++    set_buf_length(out, plane, bytesused, length);
++
++    return rv;
++}
++
++static AVBufferRef * wrap_avbuf(V4L2Buffer * const avbuf)
++{
++    AVBufferRef * bufref = av_buffer_ref(avbuf->context->bufrefs[avbuf->buf.index]);
++    AVBufferRef * newbuf;
++
++    if (!bufref)
++        return NULL;
++
++    newbuf = av_buffer_create((uint8_t *)bufref, sizeof(*bufref), v4l2_free_bufref, NULL, 0);
++    if (newbuf == NULL)
++        av_buffer_unref(&bufref);
++
++    avbuf->status = V4L2BUF_RET_USER;
++    return newbuf;
+ }
+
+ static int v4l2_buffer_buf_to_swframe(AVFrame *frame, V4L2Buffer *avbuf)
+ {
+-    int i, ret;
++    int i;
+
+     frame->format = avbuf->context->av_pix_fmt;
+
+-    for (i = 0; i < avbuf->num_planes; i++) {
+-        ret = v4l2_buf_to_bufref(avbuf, i, &frame->buf[i]);
+-        if (ret)
+-            return ret;
++    frame->buf[0] = wrap_avbuf(avbuf);
++    if (frame->buf[0] == NULL)
++        return AVERROR(ENOMEM);
+
++    if (buf_to_m2mctx(avbuf)->output_drm) {
++        /* 1. get references to the actual data */
++        frame->data[0] = (uint8_t *) v4l2_get_drm_frame(avbuf);
++        frame->format = AV_PIX_FMT_DRM_PRIME;
++        frame->hw_frames_ctx = av_buffer_ref(avbuf->context->frames_ref);
++        return 0;
++    }
++
++
++    /* 1. get references to the actual data */
++    for (i = 0; i < avbuf->num_planes; i++) {
++        frame->data[i] = (uint8_t *)avbuf->plane_info[i].mm_addr + avbuf->planes[i].data_offset;
+         frame->linesize[i] = avbuf->plane_info[i].bytesperline;
+-        frame->data[i] = frame->buf[i]->data;
+     }
+
+     /* fixup special cases */
+@@ -318,17 +561,17 @@ static int v4l2_buffer_buf_to_swframe(AV
+     case AV_PIX_FMT_NV21:
+         if (avbuf->num_planes > 1)
+             break;
+-        frame->linesize[1] = avbuf->plane_info[0].bytesperline;
+-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
++        frame->linesize[1] = frame->linesize[0];
++        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
+         break;
+
+     case AV_PIX_FMT_YUV420P:
+         if (avbuf->num_planes > 1)
+             break;
+-        frame->linesize[1] = avbuf->plane_info[0].bytesperline >> 1;
+-        frame->linesize[2] = avbuf->plane_info[0].bytesperline >> 1;
+-        frame->data[1] = frame->buf[0]->data + avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height;
+-        frame->data[2] = frame->data[1] + ((avbuf->plane_info[0].bytesperline * avbuf->context->format.fmt.pix_mp.height) >> 2);
++        frame->linesize[1] = frame->linesize[0] / 2;
++        frame->linesize[2] = frame->linesize[1];
++        frame->data[1] = frame->data[0] + frame->linesize[0] * ff_v4l2_get_format_height(&avbuf->context->format);
++        frame->data[2] = frame->data[1] + frame->linesize[1] * ff_v4l2_get_format_height(&avbuf->context->format) / 2;
+         break;
+
+     default:
+@@ -338,68 +581,95 @@ static int v4l2_buffer_buf_to_swframe(AV
+     return 0;
+ }
+
++static void cpy_2d(uint8_t * dst, int dst_stride, const uint8_t * src, int src_stride, int w, int h)
++{
++    if (dst_stride == src_stride && w + 32 >= dst_stride) {
++        memcpy(dst, src, dst_stride * h);
++    }
++    else {
++        while (--h >= 0) {
++            memcpy(dst, src, w);
++            dst += dst_stride;
++            src += src_stride;
++        }
++    }
++}
++
++static int is_chroma(const AVPixFmtDescriptor *desc, int i, int num_planes)
++{
++    return i != 0  && !(i == num_planes - 1 && (desc->flags & AV_PIX_FMT_FLAG_ALPHA));
++}
++
+ static int v4l2_buffer_swframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+ {
+-    int i, ret;
+-    struct v4l2_format fmt = out->context->format;
+-    int pixel_format = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
+-                       fmt.fmt.pix_mp.pixelformat : fmt.fmt.pix.pixelformat;
+-    int height       = V4L2_TYPE_IS_MULTIPLANAR(fmt.type) ?
+-                       fmt.fmt.pix_mp.height : fmt.fmt.pix.height;
+-    int is_planar_format = 0;
+-
+-    switch (pixel_format) {
+-    case V4L2_PIX_FMT_YUV420M:
+-    case V4L2_PIX_FMT_YVU420M:
+-#ifdef V4L2_PIX_FMT_YUV422M
+-    case V4L2_PIX_FMT_YUV422M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YVU422M
+-    case V4L2_PIX_FMT_YVU422M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YUV444M
+-    case V4L2_PIX_FMT_YUV444M:
+-#endif
+-#ifdef V4L2_PIX_FMT_YVU444M
+-    case V4L2_PIX_FMT_YVU444M:
+-#endif
+-    case V4L2_PIX_FMT_NV12M:
+-    case V4L2_PIX_FMT_NV21M:
+-    case V4L2_PIX_FMT_NV12MT_16X16:
+-    case V4L2_PIX_FMT_NV12MT:
+-    case V4L2_PIX_FMT_NV16M:
+-    case V4L2_PIX_FMT_NV61M:
+-        is_planar_format = 1;
+-    }
+-
+-    if (!is_planar_format) {
+-        const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+-        int planes_nb = 0;
+-        int offset = 0;
+-
+-        for (i = 0; i < desc->nb_components; i++)
+-            planes_nb = FFMAX(planes_nb, desc->comp[i].plane + 1);
+-
+-        for (i = 0; i < planes_nb; i++) {
+-            int size, h = height;
+-            if (i == 1 || i == 2) {
++    int i;
++    int num_planes = 0;
++    int pel_strides[4] = {0};
++
++    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
++
++    if ((desc->flags & AV_PIX_FMT_FLAG_HWACCEL) != 0) {
++        av_log(NULL, AV_LOG_ERROR, "%s: HWACCEL cannot be copied\n", __func__);
++        return -1;
++    }
++
++    for (i = 0; i != desc->nb_components; ++i) {
++        if (desc->comp[i].plane >= num_planes)
++            num_planes = desc->comp[i].plane + 1;
++        pel_strides[desc->comp[i].plane] = desc->comp[i].step;
++    }
++
++    if (out->num_planes > 1) {
++        if (num_planes != out->num_planes) {
++            av_log(NULL, AV_LOG_ERROR, "%s: Num planes mismatch: %d != %d\n", __func__, num_planes, out->num_planes);
++            return -1;
++        }
++        for (i = 0; i != num_planes; ++i) {
++            int w = frame->width;
++            int h = frame->height;
++            if (is_chroma(desc, i, num_planes)) {
++                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
+                 h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
+             }
+-            size = frame->linesize[i] * h;
+-            ret = v4l2_bufref_to_buf(out, 0, frame->data[i], size, offset, frame->buf[i]);
+-            if (ret)
+-                return ret;
+-            offset += size;
++
++            cpy_2d(out->plane_info[i].mm_addr, out->plane_info[i].bytesperline,
++                   frame->data[i], frame->linesize[i],
++                   w * pel_strides[i], h);
++            set_buf_length(out, i, out->plane_info[i].bytesperline * h, out->plane_info[i].length);
+         }
+-        return 0;
+     }
++    else
++    {
++        unsigned int offset = 0;
++
++        for (i = 0; i != num_planes; ++i) {
++            int w = frame->width;
++            int h = frame->height;
++            int dst_stride = out->plane_info[0].bytesperline;
++            uint8_t * const dst = (uint8_t *)out->plane_info[0].mm_addr + offset;
++
++            if (is_chroma(desc, i, num_planes)) {
++                // Is chroma
++                dst_stride >>= desc->log2_chroma_w;
++                offset += dst_stride * (out->context->height >> desc->log2_chroma_h);
++                w = AV_CEIL_RSHIFT(w, desc->log2_chroma_w);
++                h = AV_CEIL_RSHIFT(h, desc->log2_chroma_h);
++            }
++            else {
++                // Is luma or alpha
++                offset += dst_stride * out->context->height;
++            }
++            if (offset > out->plane_info[0].length) {
++                av_log(NULL, AV_LOG_ERROR, "%s: Plane total %u > buffer size %zu\n", __func__, offset, out->plane_info[0].length);
++                return -1;
++            }
+
+-    for (i = 0; i < out->num_planes; i++) {
+-        ret = v4l2_bufref_to_buf(out, i, frame->buf[i]->data, frame->buf[i]->size, 0, frame->buf[i]);
+-        if (ret)
+-            return ret;
++            cpy_2d(dst, dst_stride,
++                   frame->data[i], frame->linesize[i],
++                   w * pel_strides[i], h);
++        }
++        set_buf_length(out, 0, offset, out->plane_info[0].length);
+     }
+-
+     return 0;
+ }
+
+@@ -411,14 +681,22 @@ static int v4l2_buffer_swframe_to_buf(co
+
+ int ff_v4l2_buffer_avframe_to_buf(const AVFrame *frame, V4L2Buffer *out)
+ {
+-    v4l2_set_pts(out, frame->pts);
++    out->buf.flags = frame->key_frame ? (out->buf.flags & ~V4L2_BUF_FLAG_KEYFRAME) : (out->buf.flags | V4L2_BUF_FLAG_KEYFRAME);
++    // Beware that colour info is held in format rather than the actual
++    // v4l2 buffer struct so this may not be as useful as you might hope
++    v4l2_set_color(out, frame->color_primaries, frame->colorspace, frame->color_trc);
++    v4l2_set_color_range(out, frame->color_range);
++    // PTS & interlace are buffer vars
++    v4l2_set_pts(out, frame->pts, 0);
++    v4l2_set_interlace(out, frame->interlaced_frame, frame->top_field_first);
+
+     return v4l2_buffer_swframe_to_buf(frame, out);
+ }
+
+-int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf)
++int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *avbuf, int no_rescale_pts)
+ {
+     int ret;
++    V4L2Context * const ctx = avbuf->context;
+
+     av_frame_unref(frame);
+
+@@ -433,13 +711,24 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram
+     frame->colorspace = v4l2_get_color_space(avbuf);
+     frame->color_range = v4l2_get_color_range(avbuf);
+     frame->color_trc = v4l2_get_color_trc(avbuf);
+-    frame->pts = v4l2_get_pts(avbuf);
++    frame->pts = v4l2_get_pts(avbuf, no_rescale_pts);
+     frame->pkt_dts = AV_NOPTS_VALUE;
++    frame->interlaced_frame = v4l2_buf_is_interlaced(avbuf);
++    frame->top_field_first = v4l2_buf_is_top_first(avbuf);
+
+     /* these values are updated also during re-init in v4l2_process_driver_event */
+-    frame->height = avbuf->context->height;
+-    frame->width = avbuf->context->width;
+-    frame->sample_aspect_ratio = avbuf->context->sample_aspect_ratio;
++    frame->height = ctx->height;
++    frame->width = ctx->width;
++    frame->sample_aspect_ratio = ctx->sample_aspect_ratio;
++
++    if (ctx->selection.height && ctx->selection.width) {
++        frame->crop_left = ctx->selection.left < frame->width ? ctx->selection.left : 0;
++        frame->crop_top  = ctx->selection.top < frame->height ? ctx->selection.top  : 0;
++        frame->crop_right = ctx->selection.left + ctx->selection.width < frame->width ?
++            frame->width - (ctx->selection.left + ctx->selection.width) : 0;
++        frame->crop_bottom = ctx->selection.top + ctx->selection.height < frame->height ?
++            frame->width - (ctx->selection.top + ctx->selection.height) : 0;
++    }
+
+     /* 3. report errors upstream */
+     if (avbuf->buf.flags & V4L2_BUF_FLAG_ERROR) {
+@@ -452,15 +741,16 @@ int ff_v4l2_buffer_buf_to_avframe(AVFram
+
+ int ff_v4l2_buffer_buf_to_avpkt(AVPacket *pkt, V4L2Buffer *avbuf)
+ {
+-    int ret;
++    av_log(logger(avbuf), AV_LOG_INFO, "%s\n", __func__);
+
+     av_packet_unref(pkt);
+-    ret = v4l2_buf_to_bufref(avbuf, 0, &pkt->buf);
+-    if (ret)
+-        return ret;
++
++    pkt->buf = wrap_avbuf(avbuf);
++    if (pkt->buf == NULL)
++        return AVERROR(ENOMEM);
+
+     pkt->size = V4L2_TYPE_IS_MULTIPLANAR(avbuf->buf.type) ? avbuf->buf.m.planes[0].bytesused : avbuf->buf.bytesused;
+-    pkt->data = pkt->buf->data;
++    pkt->data = (uint8_t*)avbuf->plane_info[0].mm_addr + avbuf->planes[0].data_offset;
+
+     if (avbuf->buf.flags & V4L2_BUF_FLAG_KEYFRAME)
+         pkt->flags |= AV_PKT_FLAG_KEY;
+@@ -470,36 +760,89 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket
+         pkt->flags |= AV_PKT_FLAG_CORRUPT;
+     }
+
+-    pkt->dts = pkt->pts = v4l2_get_pts(avbuf);
++    pkt->dts = pkt->pts = v4l2_get_pts(avbuf, 0);
+
+     return 0;
+ }
+
+-int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
++                                    const void *extdata, size_t extlen, int no_rescale_pts)
+ {
+     int ret;
+
+-    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, 0, pkt->buf);
+-    if (ret)
++    if (extlen) {
++        ret = v4l2_bufref_to_buf(out, 0, extdata, extlen, 0);
++        if (ret)
++            return ret;
++    }
++
++    ret = v4l2_bufref_to_buf(out, 0, pkt->data, pkt->size, extlen);
++    if (ret && ret != AVERROR(ENOMEM))
+         return ret;
+
+-    v4l2_set_pts(out, pkt->pts);
++    v4l2_set_pts(out, pkt->pts, no_rescale_pts);
+
+     if (pkt->flags & AV_PKT_FLAG_KEY)
+         out->flags = V4L2_BUF_FLAG_KEYFRAME;
+
+-    return 0;
++    return ret;
+ }
+
+-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index)
++int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out)
++{
++    return ff_v4l2_buffer_avpkt_to_buf_ext(pkt, out, NULL, 0, 0);
++}
++
++
++static void v4l2_buffer_buffer_free(void *opaque, uint8_t *data)
++{
++    V4L2Buffer * const avbuf = (V4L2Buffer *)data;
++    int i;
++
++    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->plane_info); ++i) {
++        struct V4L2Plane_info *p = avbuf->plane_info + i;
++        if (p->mm_addr != NULL)
++            munmap(p->mm_addr, p->length);
++    }
++
++    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
++        if (avbuf->drm_frame.objects[i].fd != -1)
++            close(avbuf->drm_frame.objects[i].fd);
++    }
++
++    ff_weak_link_unref(&avbuf->context_wl);
++
++    av_free(avbuf);
++}
++
++
++int ff_v4l2_buffer_initialize(AVBufferRef ** pbufref, int index, V4L2Context *ctx)
+ {
+-    V4L2Context *ctx = avbuf->context;
+     int ret, i;
++    V4L2Buffer * const avbuf = av_mallocz(sizeof(*avbuf));
++    AVBufferRef * bufref;
++
++    *pbufref = NULL;
++    if (avbuf == NULL)
++        return AVERROR(ENOMEM);
+
++    bufref = av_buffer_create((uint8_t*)avbuf, sizeof(*avbuf), v4l2_buffer_buffer_free, NULL, 0);
++    if (bufref == NULL) {
++        av_free(avbuf);
++        return AVERROR(ENOMEM);
++    }
++
++    avbuf->context = ctx;
+     avbuf->buf.memory = V4L2_MEMORY_MMAP;
+     avbuf->buf.type = ctx->type;
+     avbuf->buf.index = index;
+
++    for (i = 0; i != FF_ARRAY_ELEMS(avbuf->drm_frame.objects); ++i) {
++        avbuf->drm_frame.objects[i].fd = -1;
++    }
++
++    avbuf->context_wl = ff_weak_link_ref(ctx->wl_master);
++
+     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+         avbuf->buf.length = VIDEO_MAX_PLANES;
+         avbuf->buf.m.planes = avbuf->planes;
+@@ -507,7 +850,7 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
+
+     ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QUERYBUF, &avbuf->buf);
+     if (ret < 0)
+-        return AVERROR(errno);
++        goto fail;
+
+     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+         avbuf->num_planes = 0;
+@@ -527,25 +870,33 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
+
+         if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+             avbuf->plane_info[i].length = avbuf->buf.m.planes[i].length;
+-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
+-                                           PROT_READ | PROT_WRITE, MAP_SHARED,
+-                                           buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
++
++            if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) ||
++                !buf_to_m2mctx(avbuf)->output_drm) {
++                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.m.planes[i].length,
++                                               PROT_READ | PROT_WRITE, MAP_SHARED,
++                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.planes[i].m.mem_offset);
++            }
+         } else {
+             avbuf->plane_info[i].length = avbuf->buf.length;
+-            avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
+-                                          PROT_READ | PROT_WRITE, MAP_SHARED,
+-                                          buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
++
++            if ((V4L2_TYPE_IS_OUTPUT(ctx->type) && buf_to_m2mctx(avbuf)->output_drm) ||
++                !buf_to_m2mctx(avbuf)->output_drm) {
++                avbuf->plane_info[i].mm_addr = mmap(NULL, avbuf->buf.length,
++                                               PROT_READ | PROT_WRITE, MAP_SHARED,
++                                               buf_to_m2mctx(avbuf)->fd, avbuf->buf.m.offset);
++            }
+         }
+
+-        if (avbuf->plane_info[i].mm_addr == MAP_FAILED)
+-            return AVERROR(ENOMEM);
++        if (avbuf->plane_info[i].mm_addr == MAP_FAILED) {
++            avbuf->plane_info[i].mm_addr = NULL;
++            ret = AVERROR(ENOMEM);
++            goto fail;
++        }
+     }
+
+     avbuf->status = V4L2BUF_AVAILABLE;
+
+-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
+-        return 0;
+-
+     if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+         avbuf->buf.m.planes = avbuf->planes;
+         avbuf->buf.length   = avbuf->num_planes;
+@@ -555,7 +906,20 @@ int ff_v4l2_buffer_initialize(V4L2Buffer
+         avbuf->buf.length    = avbuf->planes[0].length;
+     }
+
+-    return ff_v4l2_buffer_enqueue(avbuf);
++    if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++        if (buf_to_m2mctx(avbuf)->output_drm) {
++            ret = v4l2_buffer_export_drm(avbuf);
++            if (ret)
++                    goto fail;
++        }
++    }
++
++    *pbufref = bufref;
++    return 0;
++
++fail:
++    av_buffer_unref(&bufref);
++    return ret;
+ }
+
+ int ff_v4l2_buffer_enqueue(V4L2Buffer* avbuf)
+@@ -564,9 +928,27 @@ int ff_v4l2_buffer_enqueue(V4L2Buffer* a
+
+     avbuf->buf.flags = avbuf->flags;
+
++    if (avbuf->buf.timestamp.tv_sec || avbuf->buf.timestamp.tv_usec) {
++        av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s pre VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
++               avbuf->context->name, avbuf->buf.index,
++               avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
++               avbuf->context->q_count);
++    }
++
+     ret = ioctl(buf_to_m2mctx(avbuf)->fd, VIDIOC_QBUF, &avbuf->buf);
+-    if (ret < 0)
+-        return AVERROR(errno);
++    if (ret < 0) {
++        int err = errno;
++        av_log(logger(avbuf), AV_LOG_ERROR, "--- %s VIDIOC_QBUF: index %d FAIL err %d (%s)\n",
++               avbuf->context->name, avbuf->buf.index,
++               err, strerror(err));
++        return AVERROR(err);
++    }
++
++    ++avbuf->context->q_count;
++    av_log(logger(avbuf), AV_LOG_DEBUG, "--- %s VIDIOC_QBUF: index %d, ts=%ld.%06ld count=%d\n",
++           avbuf->context->name, avbuf->buf.index,
++           avbuf->buf.timestamp.tv_sec, avbuf->buf.timestamp.tv_usec,
++           avbuf->context->q_count);
+
+     avbuf->status = V4L2BUF_IN_DRIVER;
+
+--- a/libavcodec/v4l2_buffers.h
++++ b/libavcodec/v4l2_buffers.h
+@@ -27,25 +27,34 @@
+ #include <stdatomic.h>
+ #include <linux/videodev2.h>
+
++#include "libavutil/hwcontext_drm.h"
+ #include "avcodec.h"
+
+ enum V4L2Buffer_status {
+     V4L2BUF_AVAILABLE,
+     V4L2BUF_IN_DRIVER,
++    V4L2BUF_IN_USE,
+     V4L2BUF_RET_USER,
+ };
+
+ /**
+  * V4L2Buffer (wrapper for v4l2_buffer management)
+  */
++struct V4L2Context;
++struct ff_weak_link_client;
++
+ typedef struct V4L2Buffer {
+-    /* each buffer needs to have a reference to its context */
++    /* each buffer needs to have a reference to its context
++     * The pointer is good enough for most operation but once the buffer has
++     * been passed to the user the buffer may become orphaned so for free ops
++     * the weak link must be used to ensure that the context is actually
++     * there
++     */
+     struct V4L2Context *context;
++    struct ff_weak_link_client *context_wl;
+
+-    /* This object is refcounted per-plane, so we need to keep track
+-     * of how many context-refs we are holding. */
+-    AVBufferRef *context_ref;
+-    atomic_uint context_refcount;
++    /* DRM descriptor */
++    AVDRMFrameDescriptor drm_frame;
+
+     /* keep track of the mmap address and mmap length */
+     struct V4L2Plane_info {
+@@ -70,11 +79,12 @@ typedef struct V4L2Buffer {
+  *
+  * @param[in] frame The AVFRame to push the information to
+  * @param[in] buf The V4L2Buffer to get the information from
++ * @param[in] no_rescale_pts If non-zero do not rescale PTS
+  *
+  * @returns 0 in case of success, AVERROR(EINVAL) if the number of planes is incorrect,
+  * AVERROR(ENOMEM) if the AVBufferRef can't be created.
+  */
+-int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf);
++int ff_v4l2_buffer_buf_to_avframe(AVFrame *frame, V4L2Buffer *buf, int no_rescale_pts);
+
+ /**
+  * Extracts the data from a V4L2Buffer to an AVPacket
+@@ -98,6 +108,9 @@ int ff_v4l2_buffer_buf_to_avpkt(AVPacket
+  */
+ int ff_v4l2_buffer_avpkt_to_buf(const AVPacket *pkt, V4L2Buffer *out);
+
++int ff_v4l2_buffer_avpkt_to_buf_ext(const AVPacket *pkt, V4L2Buffer *out,
++                                    const void *extdata, size_t extlen, int no_rescale_pts);
++
+ /**
+  * Extracts the data from an AVFrame to a V4L2Buffer
+  *
+@@ -116,7 +129,7 @@ int ff_v4l2_buffer_avframe_to_buf(const
+  *
+  * @returns 0 in case of success, a negative AVERROR code otherwise
+  */
+-int ff_v4l2_buffer_initialize(V4L2Buffer* avbuf, int index);
++int ff_v4l2_buffer_initialize(AVBufferRef **avbuf, int index, struct V4L2Context *ctx);
+
+ /**
+  * Enqueues a V4L2Buffer
+--- a/libavcodec/v4l2_context.c
++++ b/libavcodec/v4l2_context.c
+@@ -27,11 +27,13 @@
+ #include <unistd.h>
+ #include <fcntl.h>
+ #include <poll.h>
++#include "libavutil/avassert.h"
+ #include "libavcodec/avcodec.h"
+ #include "libavcodec/internal.h"
+ #include "v4l2_buffers.h"
+ #include "v4l2_fmt.h"
+ #include "v4l2_m2m.h"
++#include "weak_link.h"
+
+ struct v4l2_format_update {
+     uint32_t v4l2_fmt;
+@@ -53,16 +55,6 @@ static inline AVCodecContext *logger(V4L
+     return ctx_to_m2mctx(ctx)->avctx;
+ }
+
+-static inline unsigned int v4l2_get_width(struct v4l2_format *fmt)
+-{
+-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
+-}
+-
+-static inline unsigned int v4l2_get_height(struct v4l2_format *fmt)
+-{
+-    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
+-}
+-
+ static AVRational v4l2_get_sar(V4L2Context *ctx)
+ {
+     struct AVRational sar = { 0, 1 };
+@@ -94,8 +86,8 @@ static inline unsigned int v4l2_resoluti
+     if (ret)
+         av_log(logger(ctx), AV_LOG_DEBUG, "%s changed (%dx%d) -> (%dx%d)\n",
+             ctx->name,
+-            v4l2_get_width(fmt1), v4l2_get_height(fmt1),
+-            v4l2_get_width(fmt2), v4l2_get_height(fmt2));
++            ff_v4l2_get_format_width(fmt1), ff_v4l2_get_format_height(fmt1),
++            ff_v4l2_get_format_width(fmt2), ff_v4l2_get_format_height(fmt2));
+
+     return ret;
+ }
+@@ -153,58 +145,67 @@ static inline void v4l2_save_to_context(
+     }
+ }
+
+-/**
+- * handle resolution change event and end of stream event
+- * returns 1 if reinit was successful, negative if it failed
+- * returns 0 if reinit was not executed
+- */
+-static int v4l2_handle_event(V4L2Context *ctx)
++static int get_default_selection(V4L2Context * const ctx, struct v4l2_rect *r)
+ {
+-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+-    struct v4l2_format cap_fmt = s->capture.format;
+-    struct v4l2_format out_fmt = s->output.format;
+-    struct v4l2_event evt = { 0 };
+-    int full_reinit, reinit, ret;
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++    struct v4l2_selection selection = {
++        .type = V4L2_BUF_TYPE_VIDEO_CAPTURE,
++        .target = V4L2_SEL_TGT_COMPOSE
++    };
+
+-    ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt);
+-    if (ret < 0) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name);
+-        return 0;
+-    }
++    memset(r, 0, sizeof(*r));
++    if (ioctl(s->fd, VIDIOC_G_SELECTION, &selection))
++        return AVERROR(errno);
+
+-    if (evt.type == V4L2_EVENT_EOS) {
+-        ctx->done = 1;
+-        return 0;
+-    }
++    *r = selection.r;
++    return 0;
++}
+
+-    if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
+-        return 0;
++static int do_source_change(V4L2m2mContext * const s)
++{
++    AVCodecContext *const avctx = s->avctx;
++
++    int ret;
++    int reinit;
++    int full_reinit;
++    struct v4l2_format cap_fmt = s->capture.format;
++    struct v4l2_format out_fmt = s->output.format;
++
++    s->resize_pending = 0;
++    s->capture.done = 0;
+
+     ret = ioctl(s->fd, VIDIOC_G_FMT, &out_fmt);
+     if (ret) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->output.name);
++        av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->output.name);
+         return 0;
+     }
+
+     ret = ioctl(s->fd, VIDIOC_G_FMT, &cap_fmt);
+     if (ret) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT\n", s->capture.name);
++        av_log(avctx, AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", s->capture.name);
+         return 0;
+     }
+
+     full_reinit = v4l2_resolution_changed(&s->output, &out_fmt);
+     if (full_reinit) {
+-        s->output.height = v4l2_get_height(&out_fmt);
+-        s->output.width = v4l2_get_width(&out_fmt);
+-        s->output.sample_aspect_ratio = v4l2_get_sar(&s->output);
++        s->output.height = ff_v4l2_get_format_height(&out_fmt);
++        s->output.width = ff_v4l2_get_format_width(&out_fmt);
+     }
++    s->output.sample_aspect_ratio = v4l2_get_sar(&s->output);
++
++    get_default_selection(&s->capture, &s->capture.selection);
+
+     reinit = v4l2_resolution_changed(&s->capture, &cap_fmt);
+     if (reinit) {
+-        s->capture.height = v4l2_get_height(&cap_fmt);
+-        s->capture.width = v4l2_get_width(&cap_fmt);
+-        s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
++        s->capture.height = ff_v4l2_get_format_height(&cap_fmt);
++        s->capture.width = ff_v4l2_get_format_width(&cap_fmt);
+     }
++    s->capture.sample_aspect_ratio = v4l2_get_sar(&s->capture);
++
++    av_log(avctx, AV_LOG_DEBUG, "Source change: SAR: %d/%d, crop %dx%d @ %d,%d\n",
++           s->capture.sample_aspect_ratio.num, s->capture.sample_aspect_ratio.den,
++           s->capture.selection.width, s->capture.selection.height,
++           s->capture.selection.left, s->capture.selection.top);
+
+     if (full_reinit || reinit)
+         s->reinit = 1;
+@@ -212,34 +213,88 @@ static int v4l2_handle_event(V4L2Context
+     if (full_reinit) {
+         ret = ff_v4l2_m2m_codec_full_reinit(s);
+         if (ret) {
+-            av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_full_reinit\n");
++            av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_full_reinit failed\n");
+             return AVERROR(EINVAL);
+         }
+         goto reinit_run;
+     }
+
+     if (reinit) {
+-        if (s->avctx)
++        if (avctx)
+             ret = ff_set_dimensions(s->avctx, s->capture.width, s->capture.height);
+         if (ret < 0)
+-            av_log(logger(ctx), AV_LOG_WARNING, "update avcodec height and width\n");
++            av_log(avctx, AV_LOG_WARNING, "update avcodec height and width failed\n");
+
+         ret = ff_v4l2_m2m_codec_reinit(s);
+         if (ret) {
+-            av_log(logger(ctx), AV_LOG_ERROR, "v4l2_m2m_codec_reinit\n");
++            av_log(avctx, AV_LOG_ERROR, "v4l2_m2m_codec_reinit failed\n");
+             return AVERROR(EINVAL);
+         }
+         goto reinit_run;
+     }
+
+-    /* dummy event received */
+-    return 0;
++    /* Buffers are OK so just stream off to ack */
++    av_log(avctx, AV_LOG_DEBUG, "%s: Parameters only\n", __func__);
++
++    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
++    if (ret)
++        av_log(avctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF failed\n");
++    s->draining = 0;
+
+     /* reinit executed */
+ reinit_run:
++    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMON);
+     return 1;
+ }
+
++static int ctx_done(V4L2Context * const ctx)
++{
++    int rv = 0;
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++
++    ctx->done = 1;
++
++    if (s->resize_pending && !V4L2_TYPE_IS_OUTPUT(ctx->type))
++        rv = do_source_change(s);
++
++    return rv;
++}
++
++/**
++ * handle resolution change event and end of stream event
++ * returns 1 if reinit was successful, negative if it failed
++ * returns 0 if reinit was not executed
++ */
++static int v4l2_handle_event(V4L2Context *ctx)
++{
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++    struct v4l2_event evt = { 0 };
++    int ret;
++
++    ret = ioctl(s->fd, VIDIOC_DQEVENT, &evt);
++    if (ret < 0) {
++        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_DQEVENT\n", ctx->name);
++        return 0;
++    }
++
++    av_log(logger(ctx), AV_LOG_INFO, "Dq event %d\n", evt.type);
++
++    if (evt.type == V4L2_EVENT_EOS) {
++//        ctx->done = 1;
++        av_log(logger(ctx), AV_LOG_TRACE, "%s VIDIOC_EVENT_EOS\n", ctx->name);
++        return 0;
++    }
++
++    if (evt.type != V4L2_EVENT_SOURCE_CHANGE)
++        return 0;
++
++    s->resize_pending = 1;
++    if (!ctx->done)
++        return 0;
++
++    return do_source_change(s);
++}
++
+ static int v4l2_stop_decode(V4L2Context *ctx)
+ {
+     struct v4l2_decoder_cmd cmd = {
+@@ -280,8 +335,26 @@ static int v4l2_stop_encode(V4L2Context
+     return 0;
+ }
+
++static int count_in_driver(const V4L2Context * const ctx)
++{
++    int i;
++    int n = 0;
++
++    if (!ctx->bufrefs)
++        return -1;
++
++    for (i = 0; i < ctx->num_buffers; ++i) {
++        V4L2Buffer *const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
++        if (avbuf->status == V4L2BUF_IN_DRIVER)
++            ++n;
++    }
++    return n;
++}
++
+ static V4L2Buffer* v4l2_dequeue_v4l2buf(V4L2Context *ctx, int timeout)
+ {
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++    const int is_capture = !V4L2_TYPE_IS_OUTPUT(ctx->type);
+     struct v4l2_plane planes[VIDEO_MAX_PLANES];
+     struct v4l2_buffer buf = { 0 };
+     V4L2Buffer *avbuf;
+@@ -290,50 +363,84 @@ static V4L2Buffer* v4l2_dequeue_v4l2buf(
+         .fd = ctx_to_m2mctx(ctx)->fd,
+     };
+     int i, ret;
++    int no_rx_means_done = 0;
+
+-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx->buffers) {
++    if (is_capture && ctx->bufrefs) {
+         for (i = 0; i < ctx->num_buffers; i++) {
+-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
++            avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
++            if (avbuf->status == V4L2BUF_IN_DRIVER)
+                 break;
+         }
+         if (i == ctx->num_buffers)
+-            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers returned to "
++            av_log(logger(ctx), AV_LOG_WARNING, "All capture buffers (%d) returned to "
+                                                 "userspace. Increase num_capture_buffers "
+                                                 "to prevent device deadlock or dropped "
+-                                                "packets/frames.\n");
++                                                "packets/frames.\n", i);
+     }
+
++#if 0
++    // I think this is true but pointless
++    // we will get some other form of EOF signal
++
+     /* if we are draining and there are no more capture buffers queued in the driver we are done */
+-    if (!V4L2_TYPE_IS_OUTPUT(ctx->type) && ctx_to_m2mctx(ctx)->draining) {
++    if (is_capture && ctx_to_m2mctx(ctx)->draining) {
+         for (i = 0; i < ctx->num_buffers; i++) {
+             /* capture buffer initialization happens during decode hence
+              * detection happens at runtime
+              */
+-            if (!ctx->buffers)
++            if (!ctx->bufrefs)
+                 break;
+
+-            if (ctx->buffers[i].status == V4L2BUF_IN_DRIVER)
++            avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
++            if (avbuf->status == V4L2BUF_IN_DRIVER)
+                 goto start;
+         }
+         ctx->done = 1;
+         return NULL;
+     }
++#endif
+
+ start:
+-    if (V4L2_TYPE_IS_OUTPUT(ctx->type))
+-        pfd.events =  POLLOUT | POLLWRNORM;
+-    else {
++    if (is_capture) {
+         /* no need to listen to requests for more input while draining */
+         if (ctx_to_m2mctx(ctx)->draining)
+             pfd.events =  POLLIN | POLLRDNORM | POLLPRI;
++    } else {
++        pfd.events =  POLLOUT | POLLWRNORM;
+     }
++    no_rx_means_done = s->resize_pending && is_capture;
+
+     for (;;) {
+-        ret = poll(&pfd, 1, timeout);
++        // If we have a resize pending then all buffers should be Qed
++        // With a resize pending we should be in drain but evidence suggests
++        // that not all decoders do this so poll to clear
++        int t2 = no_rx_means_done ? 0 : timeout < 0 ? 3000 : timeout;
++        const int e = pfd.events;
++
++        ret = poll(&pfd, 1, t2);
++
+         if (ret > 0)
+             break;
+-        if (errno == EINTR)
+-            continue;
++
++        if (ret < 0) {
++            int err = errno;
++            if (err == EINTR)
++                continue;
++            av_log(logger(ctx), AV_LOG_ERROR, "=== poll error %d (%s): events=%#x, cap buffers=%d\n",
++                   err, strerror(err),
++                   e, count_in_driver(ctx));
++            return NULL;
++        }
++
++        // ret == 0 (timeout)
++        if (no_rx_means_done) {
++            av_log(logger(ctx), AV_LOG_DEBUG, "Ctx done on timeout\n");
++            ret = ctx_done(ctx);
++            if (ret > 0)
++                goto start;
++        }
++        if (timeout == -1)
++            av_log(logger(ctx), AV_LOG_ERROR, "=== poll unexpected TIMEOUT: events=%#x, cap buffers=%d\n", e, count_in_driver(ctx));;
+         return NULL;
+     }
+
+@@ -343,7 +450,8 @@ start:
+            no need to raise a warning */
+         if (timeout == 0) {
+             for (i = 0; i < ctx->num_buffers; i++) {
+-                if (ctx->buffers[i].status != V4L2BUF_AVAILABLE)
++                avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
++                if (avbuf->status != V4L2BUF_AVAILABLE)
+                     av_log(logger(ctx), AV_LOG_WARNING, "%s POLLERR\n", ctx->name);
+             }
+         }
+@@ -361,22 +469,25 @@ start:
+             ctx->done = 1;
+             return NULL;
+         }
+-        if (ret) {
+-            /* if re-init was successful drop the buffer (if there was one)
+-             * since we had to reconfigure capture (unmap all buffers)
+-             */
+-            return NULL;
+-        }
++        if (ret > 0)
++            goto start;
+     }
+
+     /* 2. dequeue the buffer */
+     if (pfd.revents & (POLLIN | POLLRDNORM | POLLOUT | POLLWRNORM)) {
+
+-        if (!V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++        if (is_capture) {
+             /* there is a capture buffer ready */
+             if (pfd.revents & (POLLIN | POLLRDNORM))
+                 goto dequeue;
+
++            // CAPTURE Q drained
++            if (no_rx_means_done) {
++                if (ctx_done(ctx) > 0)
++                    goto start;
++                return NULL;
++            }
++
+             /* the driver is ready to accept more input; instead of waiting for the capture
+              * buffer to complete we return NULL so input can proceed (we are single threaded)
+              */
+@@ -394,37 +505,58 @@ dequeue:
+             buf.m.planes = planes;
+         }
+
+-        ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf);
+-        if (ret) {
+-            if (errno != EAGAIN) {
+-                ctx->done = 1;
+-                if (errno != EPIPE)
++        while ((ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_DQBUF, &buf)) == -1) {
++            const int err = errno;
++            if (err == EINTR)
++                continue;
++            if (err != EAGAIN) {
++                // EPIPE on CAPTURE can be used instead of BUF_FLAG_LAST
++                if (err != EPIPE || !is_capture)
+                     av_log(logger(ctx), AV_LOG_DEBUG, "%s VIDIOC_DQBUF, errno (%s)\n",
+-                        ctx->name, av_err2str(AVERROR(errno)));
++                        ctx->name, av_err2str(AVERROR(err)));
++                if (ctx_done(ctx) > 0)
++                    goto start;
+             }
+             return NULL;
+         }
++        --ctx->q_count;
++        av_log(logger(ctx), AV_LOG_DEBUG, "--- %s VIDIOC_DQBUF OK: index=%d, ts=%ld.%06ld, count=%d, dq=%d\n",
++               ctx->name, buf.index,
++               buf.timestamp.tv_sec, buf.timestamp.tv_usec,
++               ctx->q_count, ++ctx->dq_count);
++
++        avbuf = (V4L2Buffer *)ctx->bufrefs[buf.index]->data;
++        avbuf->status = V4L2BUF_AVAILABLE;
++        avbuf->buf = buf;
++        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
++            memcpy(avbuf->planes, planes, sizeof(planes));
++            avbuf->buf.m.planes = avbuf->planes;
++        }
+
+-        if (ctx_to_m2mctx(ctx)->draining && !V4L2_TYPE_IS_OUTPUT(ctx->type)) {
++        if (ctx_to_m2mctx(ctx)->draining && is_capture) {
+             int bytesused = V4L2_TYPE_IS_MULTIPLANAR(buf.type) ?
+                             buf.m.planes[0].bytesused : buf.bytesused;
+             if (bytesused == 0) {
+-                ctx->done = 1;
++                av_log(logger(ctx), AV_LOG_DEBUG, "Buffer empty - reQ\n");
++
++                // Must reQ so we don't leak
++                // May not matter if the next thing we do is release all the
++                // buffers but better to be tidy.
++                ff_v4l2_buffer_enqueue(avbuf);
++
++                if (ctx_done(ctx) > 0)
++                    goto start;
+                 return NULL;
+             }
+ #ifdef V4L2_BUF_FLAG_LAST
+-            if (buf.flags & V4L2_BUF_FLAG_LAST)
+-                ctx->done = 1;
++            if (buf.flags & V4L2_BUF_FLAG_LAST) {
++                av_log(logger(ctx), AV_LOG_TRACE, "FLAG_LAST set\n");
++                avbuf->status = V4L2BUF_IN_USE;  // Avoid flushing this buffer
++                ctx_done(ctx);
++            }
+ #endif
+         }
+
+-        avbuf = &ctx->buffers[buf.index];
+-        avbuf->status = V4L2BUF_AVAILABLE;
+-        avbuf->buf = buf;
+-        if (V4L2_TYPE_IS_MULTIPLANAR(ctx->type)) {
+-            memcpy(avbuf->planes, planes, sizeof(planes));
+-            avbuf->buf.m.planes = avbuf->planes;
+-        }
+         return avbuf;
+     }
+
+@@ -443,8 +575,9 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(
+     }
+
+     for (i = 0; i < ctx->num_buffers; i++) {
+-        if (ctx->buffers[i].status == V4L2BUF_AVAILABLE)
+-            return &ctx->buffers[i];
++        V4L2Buffer * const avbuf = (V4L2Buffer *)ctx->bufrefs[i]->data;
++        if (avbuf->status == V4L2BUF_AVAILABLE)
++            return avbuf;
+     }
+
+     return NULL;
+@@ -452,25 +585,45 @@ static V4L2Buffer* v4l2_getfree_v4l2buf(
+
+ static int v4l2_release_buffers(V4L2Context* ctx)
+ {
+-    struct v4l2_requestbuffers req = {
+-        .memory = V4L2_MEMORY_MMAP,
+-        .type = ctx->type,
+-        .count = 0, /* 0 -> unmaps buffers from the driver */
+-    };
+-    int i, j;
++    int i;
++    int ret = 0;
++    const int fd = ctx_to_m2mctx(ctx)->fd;
+
+-    for (i = 0; i < ctx->num_buffers; i++) {
+-        V4L2Buffer *buffer = &ctx->buffers[i];
++    // Orphan any buffers in the wild
++    ff_weak_link_break(&ctx->wl_master);
+
+-        for (j = 0; j < buffer->num_planes; j++) {
+-            struct V4L2Plane_info *p = &buffer->plane_info[j];
+-            if (p->mm_addr && p->length)
+-                if (munmap(p->mm_addr, p->length) < 0)
+-                    av_log(logger(ctx), AV_LOG_ERROR, "%s unmap plane (%s))\n", ctx->name, av_err2str(AVERROR(errno)));
++    if (ctx->bufrefs) {
++        for (i = 0; i < ctx->num_buffers; i++)
++            av_buffer_unref(ctx->bufrefs + i);
++    }
++
++    if (fd != -1) {
++        struct v4l2_requestbuffers req = {
++            .memory = V4L2_MEMORY_MMAP,
++            .type = ctx->type,
++            .count = 0, /* 0 -> unmap all buffers from the driver */
++        };
++
++        while ((ret = ioctl(fd, VIDIOC_REQBUFS, &req)) == -1) {
++            if (errno == EINTR)
++                continue;
++
++            ret = AVERROR(errno);
++
++            av_log(logger(ctx), AV_LOG_ERROR, "release all %s buffers (%s)\n",
++                ctx->name, av_err2str(AVERROR(errno)));
++
++            if (ctx_to_m2mctx(ctx)->output_drm)
++                av_log(logger(ctx), AV_LOG_ERROR,
++                    "Make sure the DRM client releases all FB/GEM objects before closing the codec (ie):\n"
++                    "for all buffers: \n"
++                    "  1. drmModeRmFB(..)\n"
++                    "  2. drmIoctl(.., DRM_IOCTL_GEM_CLOSE,... )\n");
+         }
+     }
++    ctx->q_count = 0;
+
+-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_REQBUFS, &req);
++    return ret;
+ }
+
+ static inline int v4l2_try_raw_format(V4L2Context* ctx, enum AVPixelFormat pixfmt)
+@@ -499,6 +652,8 @@ static inline int v4l2_try_raw_format(V4
+
+ static int v4l2_get_raw_format(V4L2Context* ctx, enum AVPixelFormat *p)
+ {
++    V4L2m2mContext* s = ctx_to_m2mctx(ctx);
++    V4L2m2mPriv *priv = s->avctx->priv_data;
+     enum AVPixelFormat pixfmt = ctx->av_pix_fmt;
+     struct v4l2_fmtdesc fdesc;
+     int ret;
+@@ -517,6 +672,13 @@ static int v4l2_get_raw_format(V4L2Conte
+         if (ret)
+             return AVERROR(EINVAL);
+
++        if (priv->pix_fmt != AV_PIX_FMT_NONE) {
++            if (fdesc.pixelformat != ff_v4l2_format_avfmt_to_v4l2(priv->pix_fmt)) {
++                fdesc.index++;
++                continue;
++            }
++        }
++
+         pixfmt = ff_v4l2_format_v4l2_to_avfmt(fdesc.pixelformat, AV_CODEC_ID_RAWVIDEO);
+         ret = v4l2_try_raw_format(ctx, pixfmt);
+         if (ret){
+@@ -569,18 +731,77 @@ static int v4l2_get_coded_format(V4L2Con
+   *
+   *****************************************************************************/
+
++
++static void flush_all_buffers_status(V4L2Context* const ctx)
++{
++    int i;
++
++    if (!ctx->bufrefs)
++        return;
++
++    for (i = 0; i < ctx->num_buffers; ++i) {
++        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
++        if (buf->status == V4L2BUF_IN_DRIVER)
++            buf->status = V4L2BUF_AVAILABLE;
++    }
++    ctx->q_count = 0;
++}
++
++static int stuff_all_buffers(AVCodecContext * avctx, V4L2Context* ctx)
++{
++    int i;
++    int rv;
++
++    if (!ctx->bufrefs) {
++        rv = ff_v4l2_context_init(ctx);
++        if (rv) {
++            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
++            return rv;
++        }
++    }
++
++    for (i = 0; i < ctx->num_buffers; ++i) {
++        struct V4L2Buffer * const buf = (struct V4L2Buffer *)ctx->bufrefs[i]->data;
++        if (buf->status == V4L2BUF_AVAILABLE) {
++            rv = ff_v4l2_buffer_enqueue(buf);
++            if (rv < 0)
++                return rv;
++        }
++    }
++    return 0;
++}
++
+ int ff_v4l2_context_set_status(V4L2Context* ctx, uint32_t cmd)
+ {
+     int type = ctx->type;
+     int ret;
++    AVCodecContext * const avctx = logger(ctx);
++
++    ff_mutex_lock(&ctx->lock);
++
++    if (cmd == VIDIOC_STREAMON && !V4L2_TYPE_IS_OUTPUT(ctx->type))
++        stuff_all_buffers(avctx, ctx);
+
+     ret = ioctl(ctx_to_m2mctx(ctx)->fd, cmd, &type);
+-    if (ret < 0)
+-        return AVERROR(errno);
++    if (ret < 0) {
++        const int err = errno;
++        av_log(avctx, AV_LOG_ERROR, "%s set status %d (%s) failed: err=%d\n", ctx->name,
++               cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF", err);
++        ret = AVERROR(err);
++    }
++    else
++    {
++        if (cmd == VIDIOC_STREAMOFF)
++            flush_all_buffers_status(ctx);
+
+-    ctx->streamon = (cmd == VIDIOC_STREAMON);
++        ctx->streamon = (cmd == VIDIOC_STREAMON);
++        av_log(avctx, AV_LOG_DEBUG, "%s set status %d (%s) OK\n", ctx->name,
++               cmd, (cmd == VIDIOC_STREAMON) ? "ON" : "OFF");
++    }
+
+-    return 0;
++    ff_mutex_unlock(&ctx->lock);
++
++    return ret;
+ }
+
+ int ff_v4l2_context_enqueue_frame(V4L2Context* ctx, const AVFrame* frame)
+@@ -608,7 +829,8 @@ int ff_v4l2_context_enqueue_frame(V4L2Co
+     return ff_v4l2_buffer_enqueue(avbuf);
+ }
+
+-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt)
++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt,
++                                   const void * extdata, size_t extlen, int no_rescale_pts)
+ {
+     V4L2m2mContext *s = ctx_to_m2mctx(ctx);
+     V4L2Buffer* avbuf;
+@@ -616,8 +838,9 @@ int ff_v4l2_context_enqueue_packet(V4L2C
+
+     if (!pkt->size) {
+         ret = v4l2_stop_decode(ctx);
++        // Log but otherwise ignore stop failure
+         if (ret)
+-            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode\n", ctx->name);
++            av_log(logger(ctx), AV_LOG_ERROR, "%s stop_decode failed: err=%d\n", ctx->name, ret);
+         s->draining = 1;
+         return 0;
+     }
+@@ -626,14 +849,17 @@ int ff_v4l2_context_enqueue_packet(V4L2C
+     if (!avbuf)
+         return AVERROR(EAGAIN);
+
+-    ret = ff_v4l2_buffer_avpkt_to_buf(pkt, avbuf);
+-    if (ret)
++    ret = ff_v4l2_buffer_avpkt_to_buf_ext(pkt, avbuf, extdata, extlen, no_rescale_pts);
++    if (ret == AVERROR(ENOMEM))
++        av_log(logger(ctx), AV_LOG_ERROR, "Buffer overflow in %s: pkt->size=%d > buf->length=%d\n",
++               __func__, pkt->size, avbuf->planes[0].length);
++    else if (ret)
+         return ret;
+
+     return ff_v4l2_buffer_enqueue(avbuf);
+ }
+
+-int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout)
++int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* frame, int timeout, int no_rescale_pts)
+ {
+     V4L2Buffer *avbuf;
+
+@@ -650,7 +876,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Co
+         return AVERROR(EAGAIN);
+     }
+
+-    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf);
++    return ff_v4l2_buffer_buf_to_avframe(frame, avbuf, no_rescale_pts);
+ }
+
+ int ff_v4l2_context_dequeue_packet(V4L2Context* ctx, AVPacket* pkt)
+@@ -702,78 +928,155 @@ int ff_v4l2_context_get_format(V4L2Conte
+
+ int ff_v4l2_context_set_format(V4L2Context* ctx)
+ {
+-    return ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++    int ret;
++
++    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++    if (ret != 0)
++        return ret;
++
++    // Check returned size against min size and if smaller have another go
++    // Only worry about plane[0] as this is meant to enforce limits for
++    // encoded streams where we might know a bit more about the shape
++    // than the driver
++    if (V4L2_TYPE_IS_MULTIPLANAR(ctx->format.type)) {
++        if (ctx->min_buf_size <= ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage)
++            return 0;
++        ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage = ctx->min_buf_size;
++    }
++    else {
++        if (ctx->min_buf_size <= ctx->format.fmt.pix.sizeimage)
++            return 0;
++        ctx->format.fmt.pix.sizeimage = ctx->min_buf_size;
++    }
++
++    ret = ioctl(ctx_to_m2mctx(ctx)->fd, VIDIOC_S_FMT, &ctx->format);
++    return ret;
+ }
+
+ void ff_v4l2_context_release(V4L2Context* ctx)
+ {
+     int ret;
+
+-    if (!ctx->buffers)
++    if (!ctx->bufrefs)
+         return;
+
+     ret = v4l2_release_buffers(ctx);
+     if (ret)
+         av_log(logger(ctx), AV_LOG_WARNING, "V4L2 failed to unmap the %s buffers\n", ctx->name);
+
+-    av_freep(&ctx->buffers);
++    av_freep(&ctx->bufrefs);
++    av_buffer_unref(&ctx->frames_ref);
++
++    ff_mutex_destroy(&ctx->lock);
+ }
+
+-int ff_v4l2_context_init(V4L2Context* ctx)
++
++static int create_buffers(V4L2Context* const ctx, const unsigned int req_buffers)
+ {
+-    V4L2m2mContext *s = ctx_to_m2mctx(ctx);
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
+     struct v4l2_requestbuffers req;
+-    int ret, i;
+-
+-    if (!v4l2_type_supported(ctx)) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
+-        return AVERROR_PATCHWELCOME;
+-    }
+-
+-    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
+-    if (ret)
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed\n", ctx->name);
++    int ret;
++    int i;
+
+     memset(&req, 0, sizeof(req));
+-    req.count = ctx->num_buffers;
++    req.count = req_buffers;
+     req.memory = V4L2_MEMORY_MMAP;
+     req.type = ctx->type;
+-    ret = ioctl(s->fd, VIDIOC_REQBUFS, &req);
+-    if (ret < 0) {
+-        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, strerror(errno));
+-        return AVERROR(errno);
++    while ((ret = ioctl(s->fd, VIDIOC_REQBUFS, &req)) == -1) {
++        if (errno != EINTR) {
++            ret = AVERROR(errno);
++            av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_REQBUFS failed: %s\n", ctx->name, av_err2str(ret));
++            return ret;
++        }
+     }
+
+     ctx->num_buffers = req.count;
+-    ctx->buffers = av_mallocz(ctx->num_buffers * sizeof(V4L2Buffer));
+-    if (!ctx->buffers) {
++    ctx->bufrefs = av_mallocz(ctx->num_buffers * sizeof(*ctx->bufrefs));
++    if (!ctx->bufrefs) {
+         av_log(logger(ctx), AV_LOG_ERROR, "%s malloc enomem\n", ctx->name);
+-        return AVERROR(ENOMEM);
++        goto fail_release;
+     }
+
+-    for (i = 0; i < req.count; i++) {
+-        ctx->buffers[i].context = ctx;
+-        ret = ff_v4l2_buffer_initialize(&ctx->buffers[i], i);
+-        if (ret < 0) {
++    ctx->wl_master = ff_weak_link_new(ctx);
++    if (!ctx->wl_master) {
++        ret = AVERROR(ENOMEM);
++        goto fail_release;
++    }
++
++    for (i = 0; i < ctx->num_buffers; i++) {
++        ret = ff_v4l2_buffer_initialize(&ctx->bufrefs[i], i, ctx);
++        if (ret) {
+             av_log(logger(ctx), AV_LOG_ERROR, "%s buffer[%d] initialization (%s)\n", ctx->name, i, av_err2str(ret));
+-            goto error;
++            goto fail_release;
+         }
+     }
+
+     av_log(logger(ctx), AV_LOG_DEBUG, "%s: %s %02d buffers initialized: %04ux%04u, sizeimage %08u, bytesperline %08u\n", ctx->name,
+         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? av_fourcc2str(ctx->format.fmt.pix_mp.pixelformat) : av_fourcc2str(ctx->format.fmt.pix.pixelformat),
+         req.count,
+-        v4l2_get_width(&ctx->format),
+-        v4l2_get_height(&ctx->format),
++        ff_v4l2_get_format_width(&ctx->format),
++        ff_v4l2_get_format_height(&ctx->format),
+         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].sizeimage : ctx->format.fmt.pix.sizeimage,
+         V4L2_TYPE_IS_MULTIPLANAR(ctx->type) ? ctx->format.fmt.pix_mp.plane_fmt[0].bytesperline : ctx->format.fmt.pix.bytesperline);
+
+     return 0;
+
+-error:
++fail_release:
+     v4l2_release_buffers(ctx);
++    av_freep(&ctx->bufrefs);
++    return ret;
++}
++
++int ff_v4l2_context_init(V4L2Context* ctx)
++{
++    V4L2m2mContext * const s = ctx_to_m2mctx(ctx);
++    int ret;
++
++    // It is not valid to reinit a context without a previous release
++    av_assert0(ctx->bufrefs == NULL);
++
++    if (!v4l2_type_supported(ctx)) {
++        av_log(logger(ctx), AV_LOG_ERROR, "type %i not supported\n", ctx->type);
++        return AVERROR_PATCHWELCOME;
++    }
+
+-    av_freep(&ctx->buffers);
++    ff_mutex_init(&ctx->lock, NULL);
++
++    if (s->output_drm) {
++        AVHWFramesContext *hwframes;
++
++        ctx->frames_ref = av_hwframe_ctx_alloc(s->device_ref);
++        if (!ctx->frames_ref) {
++            ret = AVERROR(ENOMEM);
++            goto fail_unlock;
++        }
++
++        hwframes = (AVHWFramesContext*)ctx->frames_ref->data;
++        hwframes->format = AV_PIX_FMT_DRM_PRIME;
++        hwframes->sw_format = ctx->av_pix_fmt;
++        hwframes->width = ctx->width;
++        hwframes->height = ctx->height;
++        ret = av_hwframe_ctx_init(ctx->frames_ref);
++        if (ret < 0)
++            goto fail_unref_hwframes;
++    }
++
++    ret = ioctl(s->fd, VIDIOC_G_FMT, &ctx->format);
++    if (ret) {
++        ret = AVERROR(errno);
++        av_log(logger(ctx), AV_LOG_ERROR, "%s VIDIOC_G_FMT failed: %s\n", ctx->name, av_err2str(ret));
++        goto fail_unref_hwframes;
++    }
++
++    ret = create_buffers(ctx, ctx->num_buffers);
++    if (ret < 0)
++        goto fail_unref_hwframes;
++
++    return 0;
+
++fail_unref_hwframes:
++    av_buffer_unref(&ctx->frames_ref);
++fail_unlock:
++    ff_mutex_destroy(&ctx->lock);
+     return ret;
+ }
+--- a/libavcodec/v4l2_context.h
++++ b/libavcodec/v4l2_context.h
+@@ -31,6 +31,7 @@
+ #include "libavutil/pixfmt.h"
+ #include "libavutil/frame.h"
+ #include "libavutil/buffer.h"
++#include "libavutil/thread.h"
+ #include "v4l2_buffers.h"
+
+ typedef struct V4L2Context {
+@@ -70,11 +71,18 @@ typedef struct V4L2Context {
+      */
+     int width, height;
+     AVRational sample_aspect_ratio;
++    struct v4l2_rect selection;
+
+     /**
+-     * Indexed array of V4L2Buffers
++     * If the default size of buffer is less than this then try to
++     * set to this.
+      */
+-    V4L2Buffer *buffers;
++    uint32_t min_buf_size;
++
++    /**
++     * Indexed array of pointers to V4L2Buffers
++     */
++    AVBufferRef **bufrefs;
+
+     /**
+      * Readonly after init.
+@@ -92,6 +100,12 @@ typedef struct V4L2Context {
+      */
+     int done;
+
++    AVBufferRef *frames_ref;
++    int q_count;
++    int dq_count;
++    struct ff_weak_link_master *wl_master;
++
++    AVMutex lock;
+ } V4L2Context;
+
+ /**
+@@ -156,9 +170,12 @@ int ff_v4l2_context_dequeue_packet(V4L2C
+  * @param[in] ctx The V4L2Context to dequeue from.
+  * @param[inout] f The AVFrame to dequeue to.
+  * @param[in] timeout The timeout for dequeue (-1 to block, 0 to return immediately, or milliseconds)
++ * @param[in] no_rescale_pts (0 rescale pts, 1 use pts as
++ *       timestamp directly)
++ *
+  * @return 0 in case of success, AVERROR(EAGAIN) if no buffer was ready, another negative error in case of error.
+  */
+-int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout);
++int ff_v4l2_context_dequeue_frame(V4L2Context* ctx, AVFrame* f, int timeout, int no_rescale_pts);
+
+ /**
+  * Enqueues a buffer to a V4L2Context from an AVPacket
+@@ -170,7 +187,7 @@ int ff_v4l2_context_dequeue_frame(V4L2Co
+  * @param[in] pkt A pointer to an AVPacket.
+  * @return 0 in case of success, a negative error otherwise.
+  */
+-int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt);
++int ff_v4l2_context_enqueue_packet(V4L2Context* ctx, const AVPacket* pkt, const void * ext_data, size_t ext_size, int no_rescale_pts);
+
+ /**
+  * Enqueues a buffer to a V4L2Context from an AVFrame
+--- a/libavcodec/v4l2_m2m.c
++++ b/libavcodec/v4l2_m2m.c
+@@ -215,13 +215,7 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont
+         av_log(log_ctx, AV_LOG_ERROR, "capture VIDIOC_STREAMOFF\n");
+
+     /* 2. unmap the capture buffers (v4l2 and ffmpeg):
+-     *    we must wait for all references to be released before being allowed
+-     *    to queue new buffers.
+      */
+-    av_log(log_ctx, AV_LOG_DEBUG, "waiting for user to release AVBufferRefs\n");
+-    if (atomic_load(&s->refcount))
+-        while(sem_wait(&s->refsync) == -1 && errno == EINTR);
+-
+     ff_v4l2_context_release(&s->capture);
+
+     /* 3. get the new capture format */
+@@ -328,7 +322,10 @@ static void v4l2_m2m_destroy_context(voi
+     ff_v4l2_context_release(&s->capture);
+     sem_destroy(&s->refsync);
+
+-    close(s->fd);
++    if (s->fd != -1)
++        close(s->fd);
++
++    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Context destroyed\n");
+
+     av_free(s);
+ }
+@@ -338,17 +335,34 @@ int ff_v4l2_m2m_codec_end(V4L2m2mPriv *p
+     V4L2m2mContext *s = priv->context;
+     int ret;
+
+-    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
+-    if (ret)
+-        av_log(s->avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s\n", s->output.name);
++    if (!s)
++        return 0;
+
+-    ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
+-    if (ret)
+-        av_log(s->avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s\n", s->capture.name);
++    av_log(s->avctx, AV_LOG_DEBUG, "V4L2 Codec end\n");
++
++    if (av_codec_is_decoder(s->avctx->codec))
++        av_packet_unref(&s->buf_pkt);
++
++    if (s->fd >= 0) {
++        ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMOFF);
++        if (ret)
++            av_log(s->avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s\n", s->output.name);
++
++        ret = ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
++        if (ret)
++            av_log(s->avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s\n", s->capture.name);
++    }
+
+     ff_v4l2_context_release(&s->output);
+
++    close(s->fd);
++    s->fd = -1;
++
+     s->self_ref = NULL;
++    // This is only called on avctx close so after this point we don't have that
++    // Crash sooner if we find we are using it (can still log with avctx = NULL)
++    s->avctx = NULL;
++    priv->context = NULL;
+     av_buffer_unref(&priv->context_ref);
+
+     return 0;
+--- a/libavcodec/v4l2_m2m.h
++++ b/libavcodec/v4l2_m2m.h
+@@ -30,6 +30,7 @@
+ #include <linux/videodev2.h>
+
+ #include "libavcodec/avcodec.h"
++#include "libavutil/pixfmt.h"
+ #include "v4l2_context.h"
+
+ #define container_of(ptr, type, member) ({ \
+@@ -38,7 +39,18 @@
+
+ #define V4L_M2M_DEFAULT_OPTS \
+     { "num_output_buffers", "Number of buffers in the output context",\
+-        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 6, INT_MAX, FLAGS }
++        OFFSET(num_output_buffers), AV_OPT_TYPE_INT, { .i64 = 16 }, 2, INT_MAX, FLAGS }
++
++#define FF_V4L2_M2M_TRACK_SIZE 128
++typedef struct V4L2m2mTrackEl {
++    int     discard;   // If we see this buffer its been flushed, so discard
++    int     pkt_size;
++    int64_t pts;
++    int64_t reordered_opaque;
++    int64_t pkt_pos;
++    int64_t pkt_duration;
++    int64_t track_pts;
++} V4L2m2mTrackEl;
+
+ typedef struct V4L2m2mContext {
+     char devname[PATH_MAX];
+@@ -53,6 +65,7 @@ typedef struct V4L2m2mContext {
+     sem_t refsync;
+     atomic_uint refcount;
+     int reinit;
++    int resize_pending;
+
+     /* null frame/packet received */
+     int draining;
+@@ -63,6 +76,23 @@ typedef struct V4L2m2mContext {
+
+     /* reference back to V4L2m2mPriv */
+     void *priv;
++
++    AVBufferRef *device_ref;
++
++    /* generate DRM frames */
++    int output_drm;
++
++    /* Frame tracking */
++    int64_t last_pkt_dts;
++    int64_t last_opaque;
++    unsigned int track_no;
++    V4L2m2mTrackEl track_els[FF_V4L2_M2M_TRACK_SIZE];
++
++    /* req pkt */
++    int req_pkt;
++
++    /* Ext data sent */
++    int extdata_sent;
+ } V4L2m2mContext;
+
+ typedef struct V4L2m2mPriv {
+@@ -73,6 +103,7 @@ typedef struct V4L2m2mPriv {
+
+     int num_output_buffers;
+     int num_capture_buffers;
++    enum AVPixelFormat pix_fmt;
+ } V4L2m2mPriv;
+
+ /**
+@@ -126,4 +157,16 @@ int ff_v4l2_m2m_codec_reinit(V4L2m2mCont
+  */
+ int ff_v4l2_m2m_codec_full_reinit(V4L2m2mContext *ctx);
+
++
++static inline unsigned int ff_v4l2_get_format_width(struct v4l2_format *fmt)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.width : fmt->fmt.pix.width;
++}
++
++static inline unsigned int ff_v4l2_get_format_height(struct v4l2_format *fmt)
++{
++    return V4L2_TYPE_IS_MULTIPLANAR(fmt->type) ? fmt->fmt.pix_mp.height : fmt->fmt.pix.height;
++}
++
++
+ #endif /* AVCODEC_V4L2_M2M_H */
+--- a/libavcodec/v4l2_m2m_dec.c
++++ b/libavcodec/v4l2_m2m_dec.c
+@@ -23,6 +23,10 @@
+
+ #include <linux/videodev2.h>
+ #include <sys/ioctl.h>
++
++#include "libavutil/avassert.h"
++#include "libavutil/hwcontext.h"
++#include "libavutil/hwcontext_drm.h"
+ #include "libavutil/pixfmt.h"
+ #include "libavutil/pixdesc.h"
+ #include "libavutil/opt.h"
+@@ -30,26 +34,51 @@
+ #include "libavcodec/decode.h"
+ #include "libavcodec/internal.h"
+
++#include "libavcodec/hwaccels.h"
++#include "libavcodec/internal.h"
++#include "libavcodec/hwconfig.h"
++
+ #include "v4l2_context.h"
+ #include "v4l2_m2m.h"
+ #include "v4l2_fmt.h"
+
++static int check_output_streamon(AVCodecContext *const avctx, V4L2m2mContext *const s)
++{
++    int ret;
++    struct v4l2_decoder_cmd cmd = {
++        .cmd = V4L2_DEC_CMD_START,
++        .flags = 0,
++    };
++
++    if (s->output.streamon)
++        return 0;
++
++    ret = ff_v4l2_context_set_status(&s->output, VIDIOC_STREAMON);
++    if (ret < 0)
++        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMON on output context\n");
++
++    if (!s->capture.streamon || ret < 0)
++        return ret;
++
++    ret = ioctl(s->fd, VIDIOC_DECODER_CMD, &cmd);
++    if (ret < 0)
++        av_log(avctx, AV_LOG_ERROR, "VIDIOC_DECODER_CMD start error: %d\n", errno);
++    else
++        av_log(avctx, AV_LOG_DEBUG, "VIDIOC_DECODER_CMD start OK\n");
++
++    return ret;
++}
++
+ static int v4l2_try_start(AVCodecContext *avctx)
+ {
+     V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+     V4L2Context *const capture = &s->capture;
+-    V4L2Context *const output = &s->output;
+     struct v4l2_selection selection = { 0 };
+     int ret;
+
+     /* 1. start the output process */
+-    if (!output->streamon) {
+-        ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMON);
+-        if (ret < 0) {
+-            av_log(avctx, AV_LOG_DEBUG, "VIDIOC_STREAMON on output context\n");
+-            return ret;
+-        }
+-    }
++    if ((ret = check_output_streamon(avctx, s)) != 0)
++        return ret;
+
+     if (capture->streamon)
+         return 0;
+@@ -63,15 +92,29 @@ static int v4l2_try_start(AVCodecContext
+     }
+
+     /* 2.1 update the AVCodecContext */
+-    avctx->pix_fmt = ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
+-    capture->av_pix_fmt = avctx->pix_fmt;
++    capture->av_pix_fmt =
++        ff_v4l2_format_v4l2_to_avfmt(capture->format.fmt.pix_mp.pixelformat, AV_CODEC_ID_RAWVIDEO);
++    if (s->output_drm) {
++        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
++        avctx->sw_pix_fmt = capture->av_pix_fmt;
++    }
++    else
++        avctx->pix_fmt = capture->av_pix_fmt;
+
+     /* 3. set the crop parameters */
++#if 1
++    selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++    selection.target = V4L2_SEL_TGT_CROP_DEFAULT;
++    ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
++    av_log(avctx, AV_LOG_INFO, "Post G selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height);
++#else
+     selection.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
+     selection.r.height = avctx->coded_height;
+     selection.r.width = avctx->coded_width;
++    av_log(avctx, AV_LOG_INFO, "Try selection %dx%d\n", avctx->coded_width, avctx->coded_height);
+     ret = ioctl(s->fd, VIDIOC_S_SELECTION, &selection);
+-    if (!ret) {
++    av_log(avctx, AV_LOG_INFO, "Post S selection ret=%d, err=%d %dx%d\n", ret, errno, selection.r.width, selection.r.height);
++    if (1) {
+         ret = ioctl(s->fd, VIDIOC_G_SELECTION, &selection);
+         if (ret) {
+             av_log(avctx, AV_LOG_WARNING, "VIDIOC_G_SELECTION ioctl\n");
+@@ -82,15 +125,7 @@ static int v4l2_try_start(AVCodecContext
+             capture->width  = selection.r.width;
+         }
+     }
+-
+-    /* 4. init the capture context now that we have the capture format */
+-    if (!capture->buffers) {
+-        ret = ff_v4l2_context_init(capture);
+-        if (ret) {
+-            av_log(avctx, AV_LOG_ERROR, "can't request capture buffers\n");
+-            return AVERROR(ENOMEM);
+-        }
+-    }
++#endif
+
+     /* 5. start the capture process */
+     ret = ff_v4l2_context_set_status(capture, VIDIOC_STREAMON);
+@@ -133,52 +168,312 @@ static int v4l2_prepare_decoder(V4L2m2mC
+     return 0;
+ }
+
+-static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++static inline int64_t track_to_pts(AVCodecContext *avctx, unsigned int n)
++{
++    return (int64_t)n;
++}
++
++static inline unsigned int pts_to_track(AVCodecContext *avctx, const int64_t pts)
++{
++    return (unsigned int)pts;
++}
++
++// FFmpeg requires us to propagate a number of vars from the coded pkt into
++// the decoded frame. The only thing that tracks like that in V4L2 stateful
++// is timestamp. PTS maps to timestamp for this decode. FFmpeg makes no
++// guarantees about PTS being unique or specified for every frame so replace
++// the supplied PTS with a simple incrementing number and keep a circular
++// buffer of all the things we want preserved (including the original PTS)
++// indexed by the tracking no.
++static void
++xlat_pts_in(AVCodecContext *const avctx, V4L2m2mContext *const s, AVPacket *const avpkt)
++{
++    int64_t track_pts;
++
++    // Avoid 0
++    if (++s->track_no == 0)
++        s->track_no = 1;
++
++    track_pts = track_to_pts(avctx, s->track_no);
++
++    av_log(avctx, AV_LOG_TRACE, "In PTS=%" PRId64 ", DTS=%" PRId64 ", track=%" PRId64 ", n=%u\n", avpkt->pts, avpkt->dts, track_pts, s->track_no);
++    s->last_pkt_dts = avpkt->dts;
++    s->track_els[s->track_no  % FF_V4L2_M2M_TRACK_SIZE] = (V4L2m2mTrackEl){
++        .discard          = 0,
++        .pkt_size         = avpkt->size,
++        .pts              = avpkt->pts,
++        .reordered_opaque = avctx->reordered_opaque,
++        .pkt_pos          = avpkt->pos,
++        .pkt_duration     = avpkt->duration,
++        .track_pts        = track_pts
++    };
++    avpkt->pts = track_pts;
++}
++
++// Returns -1 if we should discard the frame
++static int
++xlat_pts_out(AVCodecContext *const avctx, V4L2m2mContext *const s, AVFrame *const frame)
++{
++    unsigned int n = pts_to_track(avctx, frame->pts) % FF_V4L2_M2M_TRACK_SIZE;
++    const V4L2m2mTrackEl *const t = s->track_els + n;
++    if (frame->pts == AV_NOPTS_VALUE || frame->pts != t->track_pts)
++    {
++        av_log(avctx, AV_LOG_INFO, "Tracking failure: pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
++        frame->pts              = AV_NOPTS_VALUE;
++        frame->pkt_dts          = s->last_pkt_dts;
++        frame->reordered_opaque = s->last_opaque;
++        frame->pkt_pos          = -1;
++        frame->pkt_duration     = 0;
++        frame->pkt_size         = -1;
++    }
++    else if (!t->discard)
++    {
++        frame->pts              = t->pts;
++        frame->pkt_dts          = s->last_pkt_dts;
++        frame->reordered_opaque = t->reordered_opaque;
++        frame->pkt_pos          = t->pkt_pos;
++        frame->pkt_duration     = t->pkt_duration;
++        frame->pkt_size         = t->pkt_size;
++
++        s->last_opaque = s->track_els[n].reordered_opaque;
++        s->track_els[n].pts = AV_NOPTS_VALUE;  // If we hit this again deny accurate knowledge of PTS
++    }
++    else
++    {
++        av_log(avctx, AV_LOG_DEBUG, "Discard frame (flushed): pts=%" PRId64 ", track[%d]=%" PRId64 "\n", frame->pts, n, t->track_pts);
++        return -1;
++    }
++
++#if FF_API_PKT_PTS
++FF_DISABLE_DEPRECATION_WARNINGS
++    frame->pkt_pts = frame->pts;
++FF_ENABLE_DEPRECATION_WARNINGS
++#endif
++    frame->best_effort_timestamp = frame->pts;
++    frame->pkt_dts               = frame->pts;  // We can't emulate what s/w does in a useful manner?
++    av_log(avctx, AV_LOG_TRACE, "Out PTS=%" PRId64 ", DTS=%" PRId64 "\n", frame->pts, frame->pkt_dts);
++    return 0;
++}
++
++static inline int stream_started(const V4L2m2mContext * const s) {
++    return s->capture.streamon && s->output.streamon;
++}
++
++#define NQ_OK        0
++#define NQ_Q_FULL    1
++#define NQ_SRC_EMPTY 2
++#define NQ_DRAINING  3
++#define NQ_DEAD      4
++
++#define TRY_DQ(nq_status) ((nq_status) >= NQ_OK && (nq_status) <= NQ_DRAINING)
++
++// AVERROR_EOF     Flushing an already flushed stream
++// -ve             Error (all errors except EOF are unexpected)
++// NQ_OK (0)       OK
++// NQ_Q_FULL       Dst full (retry if we think V4L2 Q has space now)
++// NQ_SRC_EMPTY    Src empty (do not retry)
++// NQ_DRAINING     At EOS, dQ dest until EOS there too
++// NQ_DEAD         Not running (do not retry, do not attempt capture dQ)
++
++static int try_enqueue_src(AVCodecContext * const avctx, V4L2m2mContext * const s)
+ {
+-    V4L2m2mContext *s = ((V4L2m2mPriv*)avctx->priv_data)->context;
+-    V4L2Context *const capture = &s->capture;
+-    V4L2Context *const output = &s->output;
+-    AVPacket avpkt = {0};
+     int ret;
+
+-    if (s->buf_pkt.size) {
+-        avpkt = s->buf_pkt;
+-        memset(&s->buf_pkt, 0, sizeof(AVPacket));
+-    } else {
+-        ret = ff_decode_get_packet(avctx, &avpkt);
+-        if (ret < 0 && ret != AVERROR_EOF)
++    // If we don't already have a coded packet - get a new one
++    // We will already have a coded pkt if the output Q was full last time we
++    // tried to Q it
++    if (!s->buf_pkt.size) {
++        ret = ff_decode_get_packet(avctx, &s->buf_pkt);
++
++        if (ret == AVERROR(EAGAIN)) {
++            if (!stream_started(s)) {
++                av_log(avctx, AV_LOG_TRACE, "%s: receive_frame before 1st coded packet\n", __func__);
++                return NQ_DEAD;
++            }
++            return NQ_SRC_EMPTY;
++        }
++
++        if (ret == AVERROR_EOF) {
++            // EOF - enter drain mode
++            av_log(avctx, AV_LOG_TRACE, "--- EOS req: ret=%d, size=%d, started=%d, drain=%d\n",
++                   ret, s->buf_pkt.size, stream_started(s), s->draining);
++            if (!stream_started(s)) {
++                av_log(avctx, AV_LOG_DEBUG, "EOS on flushed stream\n");
++                s->draining = 1;
++                s->capture.done = 1;
++                return AVERROR_EOF;
++            }
++
++            if (!s->draining) {
++                // Calling enqueue with an empty pkt starts drain
++                av_assert0(s->buf_pkt.size == 0);
++                ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt, NULL, 0, 1);
++                if (ret) {
++                    av_log(avctx, AV_LOG_ERROR, "Failed to start drain: ret=%d\n", ret);
++                    return ret;
++                }
++            }
++            return NQ_DRAINING;
++        }
++
++        if (ret < 0) {
++            av_log(avctx, AV_LOG_ERROR, "Failed to get coded packet: err=%d\n", ret);
+             return ret;
++        }
++
++        xlat_pts_in(avctx, s, &s->buf_pkt);
+     }
+
+-    if (s->draining)
+-        goto dequeue;
++    if ((ret = check_output_streamon(avctx, s)) != 0)
++        return ret;
+
+-    ret = ff_v4l2_context_enqueue_packet(output, &avpkt);
+-    if (ret < 0) {
+-        if (ret != AVERROR(EAGAIN))
+-           return ret;
++    ret = ff_v4l2_context_enqueue_packet(&s->output, &s->buf_pkt,
++                                         avctx->extradata, s->extdata_sent ? 0 : avctx->extradata_size,
++                                         1);
++
++    if (ret == AVERROR(EAGAIN)) {
++        // Out of input buffers - keep packet
++        ret = NQ_Q_FULL;
++    }
++    else {
++        // In all other cases we are done with this packet
++        av_packet_unref(&s->buf_pkt);
++        s->extdata_sent = 1;
+
+-        s->buf_pkt = avpkt;
+-        /* no input buffers available, continue dequeing */
++        if (ret) {
++            av_log(avctx, AV_LOG_ERROR, "Packet enqueue failure: err=%d\n", ret);
++            return ret;
++        }
+     }
+
+-    if (avpkt.size) {
+-        ret = v4l2_try_start(avctx);
+-        if (ret) {
+-            av_packet_unref(&avpkt);
++    // Start if we haven't
++    {
++        const int ret2 = v4l2_try_start(avctx);
++        if (ret2) {
++            av_log(avctx, AV_LOG_DEBUG, "Start failure: err=%d\n", ret2);
++            ret = (ret2 == AVERROR(ENOMEM)) ? ret2 : NQ_DEAD;
++        }
++    }
+
+-            /* cant recover */
+-            if (ret == AVERROR(ENOMEM))
+-                return ret;
++    return ret;
++}
+
+-            return 0;
++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++{
++    V4L2m2mContext *const s = ((V4L2m2mPriv*)avctx->priv_data)->context;
++    int src_rv;
++    int dst_rv = 1;  // Non-zero (done), non-negative (error) number
++
++    do {
++        src_rv = try_enqueue_src(avctx, s);
++
++        // If we got a frame last time and we have nothing to enqueue then
++        // return now. rv will be AVERROR(EAGAIN) indicating that we want more input
++        // This should mean that once decode starts we enter a stable state where
++        // we alternately ask for input and produce output
++        if (s->req_pkt && src_rv == NQ_SRC_EMPTY)
++            break;
++
++        if (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) {
++            av_log(avctx, AV_LOG_WARNING, "Poll says src Q has space but enqueue fail");
++            src_rv = NQ_SRC_EMPTY;  // If we can't enqueue pretend that there is nothing to enqueue
++        }
++
++        // Try to get a new frame if
++        // (a) we haven't already got one AND
++        // (b) enqueue returned a status indicating that decode should be attempted
++        if (dst_rv != 0 && TRY_DQ(src_rv)) {
++            do {
++                // Dequeue frame will unref any previous contents of frame
++                // if it returns success so we don't need an explicit unref
++                // when discarding
++                // This returns AVERROR(EAGAIN) if there isn't a frame ready yet
++                // but there is room in the input Q
++                dst_rv = ff_v4l2_context_dequeue_frame(&s->capture, frame, -1, 1);
++
++                if (dst_rv == AVERROR_EOF && (s->draining || s->capture.done))
++                    av_log(avctx, AV_LOG_DEBUG, "Dequeue EOF: draining=%d, cap.done=%d\n",
++                           s->draining, s->capture.done);
++                else if (dst_rv && dst_rv != AVERROR(EAGAIN))
++                    av_log(avctx, AV_LOG_ERROR, "Packet dequeue failure: draining=%d, cap.done=%d, err=%d\n",
++                           s->draining, s->capture.done, dst_rv);
++
++                // Go again if we got a frame that we need to discard
++            } while (dst_rv == 0 && xlat_pts_out(avctx, s, frame));
++        }
++
++        // Continue trying to enqueue packets if either
++        // (a) we succeeded last time OR
++        // (b) enqueue failed due to input Q full AND there is now room
++    } while (src_rv == NQ_OK || (src_rv == NQ_Q_FULL && dst_rv == AVERROR(EAGAIN)) );
++
++    // Ensure that the frame contains nothing if we aren't returning a frame
++    // (might happen when discarding)
++    if (dst_rv)
++        av_frame_unref(frame);
++
++    // If we got a frame this time ask for a pkt next time
++    s->req_pkt = (dst_rv == 0);
++
++#if 0
++    if (dst_rv == 0)
++    {
++        static int z = 0;
++        if (++z > 50) {
++            av_log(avctx, AV_LOG_ERROR, "Streamoff and die?\n");
++            ff_v4l2_context_set_status(&s->capture, VIDIOC_STREAMOFF);
++            return -1;
+         }
+     }
++#endif
++
++    return dst_rv == 0 ? 0 :
++        src_rv < 0 ? src_rv :
++        dst_rv < 0 ? dst_rv :
++            AVERROR(EAGAIN);
++}
++
++#if 0
++#include <time.h>
++static int64_t us_time(void)
++{
++    struct timespec ts;
++    clock_gettime(CLOCK_MONOTONIC, &ts);
++    return (int64_t)ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
++}
++
++static int v4l2_receive_frame(AVCodecContext *avctx, AVFrame *frame)
++{
++    int ret;
++    const int64_t now = us_time();
++    int64_t done;
++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++    ret = v4l2_receive_frame2(avctx, frame);
++    done = us_time();
++    av_log(avctx, AV_LOG_TRACE, ">>> %s: rx time=%" PRId64 ", rv=%d\n", __func__, done - now, ret);
++    return ret;
++}
++#endif
++
++static uint32_t max_coded_size(const AVCodecContext * const avctx)
++{
++    uint32_t wxh = avctx->coded_width * avctx->coded_height;
++    uint32_t size;
+
+-dequeue:
+-    if (!s->buf_pkt.size)
+-        av_packet_unref(&avpkt);
+-    return ff_v4l2_context_dequeue_frame(capture, frame, -1);
++    // Currently the only thing we try to set our own limits for is H264
++    if (avctx->codec_id != AV_CODEC_ID_H264)
++        return 0;
++
++    size = wxh * 3 / 2;
++    // H.264 Annex A table A-1 gives minCR which is either 2 or 4
++    // unfortunately that doesn't yield an actually useful limit
++    // and it should be noted that frame 0 is special cased to allow
++    // a bigger number which really isn't helpful for us. So just pick
++    // frame_size / 2
++    size /= 2;
++    // Add 64k to allow for any overheads and/or encoder hopefulness
++    // with small WxH
++    return size + (1 << 16);
+ }
+
+ static av_cold int v4l2_decode_init(AVCodecContext *avctx)
+@@ -186,8 +481,12 @@ static av_cold int v4l2_decode_init(AVCo
+     V4L2Context *capture, *output;
+     V4L2m2mContext *s;
+     V4L2m2mPriv *priv = avctx->priv_data;
++    int gf_pix_fmt;
+     int ret;
+
++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++
++    av_log(avctx, AV_LOG_INFO, "level=%d\n", avctx->level);
+     ret = ff_v4l2_m2m_create_context(priv, &s);
+     if (ret < 0)
+         return ret;
+@@ -204,17 +503,43 @@ static av_cold int v4l2_decode_init(AVCo
+
+     output->av_codec_id = avctx->codec_id;
+     output->av_pix_fmt  = AV_PIX_FMT_NONE;
++    output->min_buf_size = max_coded_size(avctx);
+
+     capture->av_codec_id = AV_CODEC_ID_RAWVIDEO;
+     capture->av_pix_fmt = avctx->pix_fmt;
++    capture->min_buf_size = 0;
++
++    /* the client requests the codec to generate DRM frames:
++     *   - data[0] will therefore point to the returned AVDRMFrameDescriptor
++     *       check the ff_v4l2_buffer_to_avframe conversion function.
++     *   - the DRM frame format is passed in the DRM frame descriptor layer.
++     *       check the v4l2_get_drm_frame function.
++     */
++
++    gf_pix_fmt = ff_get_format(avctx, avctx->codec->pix_fmts);
++    av_log(avctx, AV_LOG_DEBUG, "avctx requested=%d (%s); get_format requested=%d (%s)\n",
++           avctx->pix_fmt, av_get_pix_fmt_name(avctx->pix_fmt), gf_pix_fmt, av_get_pix_fmt_name(gf_pix_fmt));
++
++    s->output_drm = 0;
++    if (gf_pix_fmt == AV_PIX_FMT_DRM_PRIME || avctx->pix_fmt == AV_PIX_FMT_DRM_PRIME) {
++        avctx->pix_fmt = AV_PIX_FMT_DRM_PRIME;
++        s->output_drm = 1;
++    }
++
++    s->device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DRM);
++    if (!s->device_ref) {
++        ret = AVERROR(ENOMEM);
++        return ret;
++    }
++
++    ret = av_hwdevice_ctx_init(s->device_ref);
++    if (ret < 0)
++        return ret;
+
+     s->avctx = avctx;
+     ret = ff_v4l2_m2m_codec_init(priv);
+     if (ret) {
+         av_log(avctx, AV_LOG_ERROR, "can't configure decoder\n");
+-        s->self_ref = NULL;
+-        av_buffer_unref(&priv->context_ref);
+-
+         return ret;
+     }
+
+@@ -223,10 +548,53 @@ static av_cold int v4l2_decode_init(AVCo
+
+ static av_cold int v4l2_decode_close(AVCodecContext *avctx)
+ {
+-    V4L2m2mPriv *priv = avctx->priv_data;
+-    V4L2m2mContext *s = priv->context;
+-    av_packet_unref(&s->buf_pkt);
+-    return ff_v4l2_m2m_codec_end(priv);
++    int rv;
++    av_log(avctx, AV_LOG_TRACE, "<<< %s\n", __func__);
++    rv = ff_v4l2_m2m_codec_end(avctx->priv_data);
++    av_log(avctx, AV_LOG_TRACE, ">>> %s: rv=%d\n", __func__, rv);
++    return rv;
++}
++
++static void v4l2_decode_flush(AVCodecContext *avctx)
++{
++    // An alternatve and more drastic form of flush is to simply do this:
++    //    v4l2_decode_close(avctx);
++    //    v4l2_decode_init(avctx);
++    // The downside is that this keeps a decoder open until all the frames
++    // associated with it have been returned.  This is a bit wasteful on
++    // possibly limited h/w resources and fails on a Pi for this reason unless
++    // more GPU mem is allocated than is the default.
++
++    V4L2m2mPriv * const priv = avctx->priv_data;
++    V4L2m2mContext * const s = priv->context;
++    V4L2Context * const output = &s->output;
++    V4L2Context * const capture = &s->capture;
++    int ret, i;
++
++    av_log(avctx, AV_LOG_TRACE, "<<< %s: streamon=%d\n", __func__, output->streamon);
++
++    // Reflushing everything is benign, quick and avoids having to worry about
++    // states like EOS processing so don't try to optimize out (having got it
++    // wrong once)
++
++    ret = ff_v4l2_context_set_status(output, VIDIOC_STREAMOFF);
++    if (ret < 0)
++        av_log(avctx, AV_LOG_ERROR, "VIDIOC_STREAMOFF %s error: %d\n", output->name, ret);
++
++    // V4L2 makes no guarantees about whether decoded frames are flushed or not
++    // so mark all frames we are tracking to be discarded if they appear
++    for (i = 0; i != FF_V4L2_M2M_TRACK_SIZE; ++i)
++        s->track_els[i].discard = 1;
++
++    // resend extradata
++    s->extdata_sent = 0;
++    // clear EOS status vars
++    s->draining = 0;
++    output->done = 0;
++    capture->done = 0;
++
++    // Stream on will occur when we actually submit a new frame
++    av_log(avctx, AV_LOG_TRACE, ">>> %s\n", __func__);
+ }
+
+ #define OFFSET(x) offsetof(V4L2m2mPriv, x)
+@@ -235,10 +603,16 @@ static av_cold int v4l2_decode_close(AVC
+ static const AVOption options[] = {
+     V4L_M2M_DEFAULT_OPTS,
+     { "num_capture_buffers", "Number of buffers in the capture context",
+-        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 20, INT_MAX, FLAGS },
++        OFFSET(num_capture_buffers), AV_OPT_TYPE_INT, {.i64 = 20}, 2, INT_MAX, FLAGS },
++    { "pixel_format", "Pixel format to be used by the decoder", OFFSET(pix_fmt), AV_OPT_TYPE_PIXEL_FMT, {.i64 = AV_PIX_FMT_NONE}, AV_PIX_FMT_NONE, AV_PIX_FMT_NB, FLAGS },
+     { NULL},
+ };
+
++static const AVCodecHWConfigInternal *v4l2_m2m_hw_configs[] = {
++    HW_CONFIG_INTERNAL(DRM_PRIME),
++    NULL
++};
++
+ #define M2MDEC_CLASS(NAME) \
+     static const AVClass v4l2_m2m_ ## NAME ## _dec_class = { \
+         .class_name = #NAME "_v4l2m2m_decoder", \
+@@ -259,9 +633,15 @@ static const AVOption options[] = {
+         .init           = v4l2_decode_init, \
+         .receive_frame  = v4l2_receive_frame, \
+         .close          = v4l2_decode_close, \
++        .flush          = v4l2_decode_flush, \
+         .bsfs           = bsf_name, \
+         .capabilities   = AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_DELAY | AV_CODEC_CAP_AVOID_PROBING, \
+-        .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS, \
++        .caps_internal  = FF_CODEC_CAP_SETS_PKT_DTS | FF_CODEC_CAP_INIT_CLEANUP, \
++        .pix_fmts       = (const enum AVPixelFormat[]) { AV_PIX_FMT_DRM_PRIME, \
++                                                         AV_PIX_FMT_NV12, \
++                                                         AV_PIX_FMT_YUV420P, \
++                                                         AV_PIX_FMT_NONE}, \
++        .hw_configs     = v4l2_m2m_hw_configs, \
+         .wrapper_name   = "v4l2m2m", \
+     }
+
+--- /dev/null
++++ b/libavcodec/v4l2_req_decode_q.c
+@@ -0,0 +1,84 @@
++#include <memory.h>
++#include <semaphore.h>
++#include <pthread.h>
++
++#include "v4l2_req_decode_q.h"
++
++int decode_q_in_q(const req_decode_ent * const d)
++{
++    return d->in_q;
++}
++
++void decode_q_add(req_decode_q * const q, req_decode_ent * const d)
++{
++    pthread_mutex_lock(&q->q_lock);
++    if (!q->head) {
++        q->head = d;
++        q->tail = d;
++        d->prev = NULL;
++    }
++    else {
++        q->tail->next = d;
++        d->prev = q->tail;
++        q->tail = d;
++    }
++    d->next = NULL;
++    d->in_q = 1;
++    pthread_mutex_unlock(&q->q_lock);
++}
++
++// Remove entry from Q - if head wake-up anything that was waiting
++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d)
++{
++    int try_signal = 0;
++
++    if (!d->in_q)
++        return;
++
++    pthread_mutex_lock(&q->q_lock);
++    if (d->prev)
++        d->prev->next = d->next;
++    else {
++        try_signal = 1;  // Only need to signal if we were head
++        q->head = d->next;
++    }
++
++    if (d->next)
++        d->next->prev = d->prev;
++    else
++        q->tail = d->prev;
++
++    // Not strictly needed but makes debug easier
++    d->next = NULL;
++    d->prev = NULL;
++    d->in_q = 0;
++    pthread_mutex_unlock(&q->q_lock);
++
++    if (try_signal)
++        pthread_cond_broadcast(&q->q_cond);
++}
++
++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d)
++{
++    pthread_mutex_lock(&q->q_lock);
++
++    while (q->head != d)
++        pthread_cond_wait(&q->q_cond, &q->q_lock);
++
++    pthread_mutex_unlock(&q->q_lock);
++}
++
++void decode_q_uninit(req_decode_q * const q)
++{
++    pthread_mutex_destroy(&q->q_lock);
++    pthread_cond_destroy(&q->q_cond);
++}
++
++void decode_q_init(req_decode_q * const q)
++{
++    memset(q, 0, sizeof(*q));
++    pthread_mutex_init(&q->q_lock, NULL);
++    pthread_cond_init(&q->q_cond, NULL);
++}
++
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_decode_q.h
+@@ -0,0 +1,25 @@
++#ifndef AVCODEC_V4L2_REQ_DECODE_Q_H
++#define AVCODEC_V4L2_REQ_DECODE_Q_H
++
++typedef struct req_decode_ent {
++    struct req_decode_ent * next;
++    struct req_decode_ent * prev;
++    int in_q;
++} req_decode_ent;
++
++typedef struct req_decode_q {
++    pthread_mutex_t q_lock;
++    pthread_cond_t q_cond;
++    req_decode_ent * head;
++    req_decode_ent * tail;
++} req_decode_q;
++
++int decode_q_in_q(const req_decode_ent * const d);
++void decode_q_add(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_remove(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_wait(req_decode_q * const q, req_decode_ent * const d);
++void decode_q_uninit(req_decode_q * const q);
++void decode_q_init(req_decode_q * const q);
++
++#endif
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_devscan.c
+@@ -0,0 +1,449 @@
++#include <errno.h>
++#include <fcntl.h>
++#include <libudev.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++
++#include <sys/ioctl.h>
++#include <sys/sysmacros.h>
++
++#include <linux/media.h>
++#include <linux/videodev2.h>
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_utils.h"
++
++struct decdev {
++    enum v4l2_buf_type src_type;
++    uint32_t src_fmt_v4l2;
++    const char * vname;
++    const char * mname;
++};
++
++struct devscan {
++    struct decdev env;
++    unsigned int dev_size;
++    unsigned int dev_count;
++    struct decdev *devs;
++};
++
++static int video_src_pixfmt_supported(uint32_t fmt)
++{
++    return 1;
++}
++
++static void v4l2_setup_format(struct v4l2_format *format, unsigned int type,
++                  unsigned int width, unsigned int height,
++                  unsigned int pixelformat)
++{
++    unsigned int sizeimage;
++
++    memset(format, 0, sizeof(*format));
++    format->type = type;
++
++    sizeimage = V4L2_TYPE_IS_OUTPUT(type) ? 4 * 1024 * 1024 : 0;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(type)) {
++        format->fmt.pix_mp.width = width;
++        format->fmt.pix_mp.height = height;
++        format->fmt.pix_mp.plane_fmt[0].sizeimage = sizeimage;
++        format->fmt.pix_mp.pixelformat = pixelformat;
++    } else {
++        format->fmt.pix.width = width;
++        format->fmt.pix.height = height;
++        format->fmt.pix.sizeimage = sizeimage;
++        format->fmt.pix.pixelformat = pixelformat;
++    }
++}
++
++static int v4l2_set_format(int video_fd, unsigned int type, unsigned int pixelformat,
++            unsigned int width, unsigned int height)
++{
++    struct v4l2_format format;
++
++    v4l2_setup_format(&format, type, width, height, pixelformat);
++
++    return ioctl(video_fd, VIDIOC_S_FMT, &format) ? -errno : 0;
++}
++
++static int v4l2_query_capabilities(int video_fd, unsigned int *capabilities)
++{
++    struct v4l2_capability capability = { 0 };
++    int rc;
++
++    rc = ioctl(video_fd, VIDIOC_QUERYCAP, &capability);
++    if (rc < 0)
++        return -errno;
++
++    if (capabilities != NULL) {
++        if ((capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0)
++            *capabilities = capability.device_caps;
++        else
++            *capabilities = capability.capabilities;
++    }
++
++    return 0;
++}
++
++static int devscan_add(struct devscan *const scan,
++                       enum v4l2_buf_type src_type,
++                       uint32_t src_fmt_v4l2,
++                       const char * vname,
++                       const char * mname)
++{
++    struct decdev *d;
++
++    if (scan->dev_size <= scan->dev_count) {
++        unsigned int n = !scan->dev_size ? 4 : scan->dev_size * 2;
++        d = realloc(scan->devs, n * sizeof(*d));
++        if (!d)
++            return -ENOMEM;
++        scan->devs = d;
++        scan->dev_size = n;
++    }
++
++    d = scan->devs + scan->dev_count;
++    d->src_type = src_type;
++    d->src_fmt_v4l2 = src_fmt_v4l2;
++    d->vname = strdup(vname);
++    if (!d->vname)
++        return -ENOMEM;
++    d->mname = strdup(mname);
++    if (!d->mname) {
++        free((char *)d->vname);
++        return -ENOMEM;
++    }
++    ++scan->dev_count;
++    return 0;
++}
++
++void devscan_delete(struct devscan **const pScan)
++{
++    unsigned int i;
++    struct devscan * const scan = *pScan;
++
++    if (!scan)
++        return;
++    *pScan = NULL;
++
++    for (i = 0; i < scan->dev_count; ++i) {
++        free((char*)scan->devs[i].mname);
++        free((char*)scan->devs[i].vname);
++    }
++    free(scan->devs);
++    free(scan);
++}
++
++#define REQ_BUF_CAPS (\
++    V4L2_BUF_CAP_SUPPORTS_DMABUF |\
++    V4L2_BUF_CAP_SUPPORTS_REQUESTS |\
++    V4L2_BUF_CAP_SUPPORTS_M2M_HOLD_CAPTURE_BUF)
++
++static void probe_formats(void * const dc,
++              struct devscan *const scan,
++              const int fd,
++              const unsigned int type_v4l2,
++              const char *const mpath,
++              const char *const vpath)
++{
++    unsigned int i;
++    for (i = 0;; ++i) {
++        struct v4l2_fmtdesc fmtdesc = {
++            .index = i,
++            .type = type_v4l2
++        };
++        struct v4l2_requestbuffers rbufs = {
++            .count = 0,
++            .type = type_v4l2,
++            .memory = V4L2_MEMORY_MMAP
++        };
++        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
++            if (errno == EINTR)
++                continue;
++            if (errno != EINVAL)
++                request_err(dc, "Enum[%d] failed for type=%d\n", i, type_v4l2);
++            return;
++        }
++        if (!video_src_pixfmt_supported(fmtdesc.pixelformat))
++            continue;
++
++        if (v4l2_set_format(fd, type_v4l2, fmtdesc.pixelformat, 720, 480)) {
++            request_debug(dc, "Set failed for type=%d, pf=%.4s\n", type_v4l2, (char*)&fmtdesc.pixelformat);
++            continue;
++        }
++
++        while (ioctl(fd, VIDIOC_REQBUFS, &rbufs)) {
++            if (errno != EINTR) {
++                request_debug(dc, "%s: Reqbufs failed\n", vpath);
++                continue;
++            }
++        }
++
++        if ((rbufs.capabilities & REQ_BUF_CAPS) != REQ_BUF_CAPS) {
++            request_debug(dc, "%s: Buf caps %#x insufficient\n", vpath, rbufs.capabilities);
++            continue;
++        }
++
++        request_debug(dc, "Adding: %s,%s pix=%#x, type=%d\n",
++                 mpath, vpath, fmtdesc.pixelformat, type_v4l2);
++        devscan_add(scan, type_v4l2, fmtdesc.pixelformat, vpath, mpath);
++    }
++}
++
++
++static int probe_video_device(void * const dc,
++                   struct udev_device *const device,
++                   struct devscan *const scan,
++                   const char *const mpath)
++{
++    int ret;
++    unsigned int capabilities = 0;
++    int video_fd = -1;
++
++    const char *path = udev_device_get_devnode(device);
++    if (!path) {
++        request_err(dc, "%s: get video device devnode failed\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    video_fd = open(path, O_RDWR, 0);
++    if (video_fd == -1) {
++        ret = -errno;
++        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(errno), errno);
++        goto fail;
++    }
++
++    ret = v4l2_query_capabilities(video_fd, &capabilities);
++    if (ret < 0) {
++        request_err(dc, "%s: get video capability failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    request_debug(dc, "%s: path=%s capabilities=%#x\n", __func__, path, capabilities);
++
++    if (!(capabilities & V4L2_CAP_STREAMING)) {
++        request_debug(dc, "%s: missing required streaming capability\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    if (!(capabilities & (V4L2_CAP_VIDEO_M2M_MPLANE | V4L2_CAP_VIDEO_M2M))) {
++        request_debug(dc, "%s: missing required mem2mem capability\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    /* Should check capture formats too... */
++    if ((capabilities & V4L2_CAP_VIDEO_M2M) != 0)
++        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT, mpath, path);
++    if ((capabilities & V4L2_CAP_VIDEO_M2M_MPLANE) != 0)
++        probe_formats(dc, scan, video_fd, V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE, mpath, path);
++
++    close(video_fd);
++    return 0;
++
++fail:
++    if (video_fd >= 0)
++        close(video_fd);
++    return ret;
++}
++
++static int probe_media_device(void * const dc,
++                   struct udev_device *const device,
++                   struct devscan *const scan)
++{
++    int ret;
++    int rv;
++    struct media_device_info device_info = { 0 };
++    struct media_v2_topology topology = { 0 };
++    struct media_v2_interface *interfaces = NULL;
++    struct udev *udev = udev_device_get_udev(device);
++    struct udev_device *video_device;
++    dev_t devnum;
++    int media_fd = -1;
++
++    const char *path = udev_device_get_devnode(device);
++    if (!path) {
++        request_err(dc, "%s: get media device devnode failed\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    media_fd = open(path, O_RDWR, 0);
++    if (media_fd < 0) {
++        ret = -errno;
++        request_err(dc, "%s: opening %s failed, %s (%d)\n", __func__, path, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    rv = ioctl(media_fd, MEDIA_IOC_DEVICE_INFO, &device_info);
++    if (rv < 0) {
++        ret = -errno;
++        request_err(dc, "%s: get media device info failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
++    if (rv < 0) {
++        ret = -errno;
++        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    if (topology.num_interfaces <= 0) {
++        request_err(dc, "%s: media device has no interfaces\n", __func__);
++        ret = -EINVAL;
++        goto fail;
++    }
++
++    interfaces = calloc(topology.num_interfaces, sizeof(*interfaces));
++    if (!interfaces) {
++        request_err(dc, "%s: allocating media interface struct failed\n", __func__);
++        ret = -ENOMEM;
++        goto fail;
++    }
++
++    topology.ptr_interfaces = (__u64)(uintptr_t)interfaces;
++    rv = ioctl(media_fd, MEDIA_IOC_G_TOPOLOGY, &topology);
++    if (rv < 0) {
++        ret = -errno;
++        request_err(dc, "%s: get media topology failed, %s (%d)\n", __func__, strerror(-ret), -ret);
++        goto fail;
++    }
++
++    for (int i = 0; i < topology.num_interfaces; i++) {
++        if (interfaces[i].intf_type != MEDIA_INTF_T_V4L_VIDEO)
++            continue;
++
++        devnum = makedev(interfaces[i].devnode.major, interfaces[i].devnode.minor);
++        video_device = udev_device_new_from_devnum(udev, 'c', devnum);
++        if (!video_device) {
++            ret = -errno;
++            request_err(dc, "%s: video_device[%d]=%p\n", __func__, i, video_device);
++            continue;
++        }
++
++        ret = probe_video_device(dc, video_device, scan, path);
++        udev_device_unref(video_device);
++
++        if (ret != 0)
++            goto fail;
++    }
++
++fail:
++    free(interfaces);
++    if (media_fd != -1)
++        close(media_fd);
++    return ret;
++}
++
++const char *decdev_media_path(const struct decdev *const dev)
++{
++    return !dev ? NULL : dev->mname;
++}
++
++const char *decdev_video_path(const struct decdev *const dev)
++{
++    return !dev ? NULL : dev->vname;
++}
++
++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev)
++{
++    return !dev ? 0 : dev->src_type;
++}
++
++uint32_t decdev_src_pixelformat(const struct decdev *const dev)
++{
++    return !dev ? 0 : dev->src_fmt_v4l2;
++}
++
++
++const struct decdev *devscan_find(struct devscan *const scan,
++                  const uint32_t src_fmt_v4l2)
++{
++    unsigned int i;
++
++    if (scan->env.mname && scan->env.vname)
++        return &scan->env;
++
++    if (!src_fmt_v4l2)
++        return scan->dev_count ? scan->devs + 0 : NULL;
++
++    for (i = 0; i != scan->dev_count; ++i) {
++        if (scan->devs[i].src_fmt_v4l2 == src_fmt_v4l2)
++            return scan->devs + i;
++    }
++    return NULL;
++}
++
++int devscan_build(void * const dc, struct devscan **pscan)
++{
++    int ret;
++    struct udev *udev;
++    struct udev_enumerate *enumerate;
++    struct udev_list_entry *devices;
++    struct udev_list_entry *entry;
++    struct udev_device *device;
++    struct devscan * scan;
++
++    *pscan = NULL;
++
++    scan = calloc(1, sizeof(*scan));
++    if (!scan) {
++        ret = -ENOMEM;
++        goto fail;
++    }
++
++    scan->env.mname = getenv("LIBVA_V4L2_REQUEST_MEDIA_PATH");
++    scan->env.vname = getenv("LIBVA_V4L2_REQUEST_VIDEO_PATH");
++    if (scan->env.mname && scan->env.vname) {
++        request_info(dc, "Media/video device env overrides found: %s,%s\n",
++                 scan->env.mname, scan->env.vname);
++        *pscan = scan;
++        return 0;
++    }
++
++    udev = udev_new();
++    if (!udev) {
++        request_err(dc, "%s: allocating udev context failed\n", __func__);
++        ret = -ENOMEM;
++        goto fail;
++    }
++
++    enumerate = udev_enumerate_new(udev);
++    if (!enumerate) {
++        request_err(dc, "%s: allocating udev enumerator failed\n", __func__);
++        ret = -ENOMEM;
++        goto fail;
++    }
++
++    udev_enumerate_add_match_subsystem(enumerate, "media");
++    udev_enumerate_scan_devices(enumerate);
++
++    devices = udev_enumerate_get_list_entry(enumerate);
++    udev_list_entry_foreach(entry, devices) {
++        const char *path = udev_list_entry_get_name(entry);
++        if (!path)
++            continue;
++
++        device = udev_device_new_from_syspath(udev, path);
++        if (!device)
++            continue;
++
++        probe_media_device(dc, device, scan);
++        udev_device_unref(device);
++    }
++
++    udev_enumerate_unref(enumerate);
++
++    *pscan = scan;
++    return 0;
++
++fail:
++    udev_unref(udev);
++    devscan_delete(&scan);
++    return ret;
++}
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_devscan.h
+@@ -0,0 +1,21 @@
++#ifndef _DEVSCAN_H_
++#define _DEVSCAN_H_
++
++struct devscan;
++struct decdev;
++enum v4l2_buf_type;
++
++/* These return pointers to data in the devscan structure and so are vaild
++ * for the lifetime of that
++ */
++const char *decdev_media_path(const struct decdev *const dev);
++const char *decdev_video_path(const struct decdev *const dev);
++enum v4l2_buf_type decdev_src_type(const struct decdev *const dev);
++uint32_t decdev_src_pixelformat(const struct decdev *const dev);
++
++const struct decdev *devscan_find(struct devscan *const scan, const uint32_t src_fmt_v4l2);
++
++int devscan_build(void * const dc, struct devscan **pscan);
++void devscan_delete(struct devscan **const pScan);
++
++#endif
+--- /dev/null
++++ b/libavcodec/v4l2_req_dmabufs.c
+@@ -0,0 +1,266 @@
++#include <stdio.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <inttypes.h>
++#include <fcntl.h>
++#include <errno.h>
++#include <string.h>
++#include <sys/ioctl.h>
++#include <sys/mman.h>
++#include <linux/mman.h>
++#include <linux/dma-buf.h>
++#include <linux/dma-heap.h>
++
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_utils.h"
++
++#define DMABUF_NAME1  "/dev/dma_heap/linux,cma"
++#define DMABUF_NAME2  "/dev/dma_heap/reserved"
++
++#define TRACE_ALLOC 0
++
++struct dmabufs_ctl {
++    int fd;
++    size_t page_size;
++};
++
++struct dmabuf_h {
++    int fd;
++    size_t size;
++    size_t len;
++    void * mapptr;
++};
++
++#if TRACE_ALLOC
++static unsigned int total_bufs = 0;
++static size_t total_size = 0;
++#endif
++
++struct dmabuf_h * dmabuf_import(int fd, size_t size)
++{
++    struct dmabuf_h *dh;
++
++    fd = dup(fd);
++    if (fd < 0  || size == 0)
++        return NULL;
++
++    dh = malloc(sizeof(*dh));
++    if (!dh) {
++        close(fd);
++        return NULL;
++    }
++
++    *dh = (struct dmabuf_h) {
++        .fd = fd,
++        .size = size,
++        .mapptr = MAP_FAILED
++    };
++
++#if TRACE_ALLOC
++    ++total_bufs;
++    total_size += dh->size;
++    request_log("%s: Import: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++    return dh;
++}
++
++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h * old, size_t size)
++{
++    struct dmabuf_h * dh;
++    struct dma_heap_allocation_data data = {
++        .len = (size + dbsc->page_size - 1) & ~(dbsc->page_size - 1),
++        .fd = 0,
++        .fd_flags = O_RDWR,
++        .heap_flags = 0
++    };
++
++    if (old != NULL) {
++        if (old->size == data.len) {
++            return old;
++        }
++        dmabuf_free(old);
++    }
++
++    if (size == 0 ||
++        (dh = malloc(sizeof(*dh))) == NULL)
++        return NULL;
++
++    while (ioctl(dbsc->fd, DMA_HEAP_IOCTL_ALLOC, &data)) {
++        int err = errno;
++        request_log("Failed to alloc %" PRIu64 " from dma-heap(fd=%d): %d (%s)\n",
++                (uint64_t)data.len,
++                dbsc->fd,
++                err,
++                strerror(err));
++        if (err == EINTR)
++            continue;
++        goto fail;
++    }
++
++    *dh = (struct dmabuf_h){
++        .fd = data.fd,
++        .size = (size_t)data.len,
++        .mapptr = MAP_FAILED
++    };
++
++#if TRACE_ALLOC
++    ++total_bufs;
++    total_size += dh->size;
++    request_log("%s: Alloc: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++    return dh;
++
++fail:
++    free(dh);
++    return NULL;
++}
++
++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags)
++{
++    struct dma_buf_sync sync = {
++        .flags = flags
++    };
++    while (ioctl(dh->fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
++        const int err = errno;
++        if (errno == EINTR)
++            continue;
++        request_log("%s: ioctl failed: flags=%#x\n", __func__, flags);
++        return -err;
++    }
++    return 0;
++}
++
++int dmabuf_write_start(struct dmabuf_h * const dh)
++{
++    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_WRITE);
++}
++
++int dmabuf_write_end(struct dmabuf_h * const dh)
++{
++    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_WRITE);
++}
++
++int dmabuf_read_start(struct dmabuf_h * const dh)
++{
++    if (!dmabuf_map(dh))
++        return -1;
++    return dmabuf_sync(dh, DMA_BUF_SYNC_START | DMA_BUF_SYNC_READ);
++}
++
++int dmabuf_read_end(struct dmabuf_h * const dh)
++{
++    return dmabuf_sync(dh, DMA_BUF_SYNC_END | DMA_BUF_SYNC_READ);
++}
++
++
++void * dmabuf_map(struct dmabuf_h * const dh)
++{
++    if (!dh)
++        return NULL;
++    if (dh->mapptr != MAP_FAILED)
++        return dh->mapptr;
++    dh->mapptr = mmap(NULL, dh->size,
++              PROT_READ | PROT_WRITE,
++              MAP_SHARED | MAP_POPULATE,
++              dh->fd, 0);
++    if (dh->mapptr == MAP_FAILED) {
++        request_log("%s: Map failed\n", __func__);
++        return NULL;
++    }
++    return dh->mapptr;
++}
++
++int dmabuf_fd(const struct dmabuf_h * const dh)
++{
++    if (!dh)
++        return -1;
++    return dh->fd;
++}
++
++size_t dmabuf_size(const struct dmabuf_h * const dh)
++{
++    if (!dh)
++        return 0;
++    return dh->size;
++}
++
++size_t dmabuf_len(const struct dmabuf_h * const dh)
++{
++    if (!dh)
++        return 0;
++    return dh->len;
++}
++
++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len)
++{
++    dh->len = len;
++}
++
++
++
++void dmabuf_free(struct dmabuf_h * dh)
++{
++    if (!dh)
++        return;
++
++#if TRACE_ALLOC
++    --total_bufs;
++    total_size -= dh->size;
++    request_log("%s: Free: %zd, total=%zd, bufs=%d\n", __func__, dh->size, total_size, total_bufs);
++#endif
++
++    if (dh->mapptr != MAP_FAILED)
++        munmap(dh->mapptr, dh->size);
++    while (close(dh->fd) == -1 && errno == EINTR)
++        /* loop */;
++    free(dh);
++}
++
++struct dmabufs_ctl * dmabufs_ctl_new(void)
++{
++    struct dmabufs_ctl * dbsc = malloc(sizeof(*dbsc));
++
++    if (!dbsc)
++        return NULL;
++
++    while ((dbsc->fd = open(DMABUF_NAME1, O_RDWR)) == -1 &&
++           errno == EINTR)
++        /* Loop */;
++
++    if (dbsc->fd == -1) {
++        while ((dbsc->fd = open(DMABUF_NAME2, O_RDWR)) == -1 &&
++               errno == EINTR)
++            /* Loop */;
++        if (dbsc->fd == -1) {
++            request_log("Unable to open either %s or %s\n",
++                    DMABUF_NAME1, DMABUF_NAME2);
++            goto fail;
++        }
++    }
++
++    dbsc->page_size = (size_t)sysconf(_SC_PAGE_SIZE);
++
++    return dbsc;
++
++fail:
++    free(dbsc);
++    return NULL;
++}
++
++void dmabufs_ctl_delete(struct dmabufs_ctl ** const pDbsc)
++{
++    struct dmabufs_ctl * const dbsc = *pDbsc;
++
++    if (!dbsc)
++        return;
++    *pDbsc = NULL;
++
++    while (close(dbsc->fd) == -1 && errno == EINTR)
++        /* loop */;
++
++    free(dbsc);
++}
++
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_dmabufs.h
+@@ -0,0 +1,38 @@
++#ifndef DMABUFS_H
++#define DMABUFS_H
++
++struct dmabufs_ctl;
++struct dmabuf_h;
++
++struct dmabufs_ctl * dmabufs_ctl_new(void);
++void dmabufs_ctl_delete(struct dmabufs_ctl ** const pdbsc);
++
++// Need not preserve old contents
++// On NULL return old buffer is freed
++struct dmabuf_h * dmabuf_realloc(struct dmabufs_ctl * dbsc, struct dmabuf_h *, size_t size);
++
++static inline struct dmabuf_h * dmabuf_alloc(struct dmabufs_ctl * dbsc, size_t size) {
++    return dmabuf_realloc(dbsc, NULL, size);
++}
++/* Create from existing fd - dups(fd) */
++struct dmabuf_h * dmabuf_import(int fd, size_t size);
++void * dmabuf_map(struct dmabuf_h * const dh);
++
++/* flags from linux/dmabuf.h DMA_BUF_SYNC_xxx */
++int dmabuf_sync(struct dmabuf_h * const dh, unsigned int flags);
++
++int dmabuf_write_start(struct dmabuf_h * const dh);
++int dmabuf_write_end(struct dmabuf_h * const dh);
++int dmabuf_read_start(struct dmabuf_h * const dh);
++int dmabuf_read_end(struct dmabuf_h * const dh);
++
++int dmabuf_fd(const struct dmabuf_h * const dh);
++/* Allocated size */
++size_t dmabuf_size(const struct dmabuf_h * const dh);
++/* Bytes in use */
++size_t dmabuf_len(const struct dmabuf_h * const dh);
++/* Set bytes in use */
++void dmabuf_len_set(struct dmabuf_h * const dh, const size_t len);
++void dmabuf_free(struct dmabuf_h * dh);
++
++#endif
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v1.c
+@@ -0,0 +1,3 @@
++#define HEVC_CTRLS_VERSION 1
++#include "v4l2_req_hevc_vx.c"
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_v2.c
+@@ -0,0 +1,3 @@
++#define HEVC_CTRLS_VERSION 2
++#include "v4l2_req_hevc_vx.c"
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_hevc_vx.c
+@@ -0,0 +1,1188 @@
++// File included by v4l2_req_hevc_v* - not compiled on its own
++
++#include "decode.h"
++#include "hevcdec.h"
++#include "hwconfig.h"
++
++#include "v4l2_request_hevc.h"
++
++#if HEVC_CTRLS_VERSION == 1
++#include "hevc-ctrls-v1.h"
++
++// Fixup renamed entries
++#define V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT
++
++#elif HEVC_CTRLS_VERSION == 2
++#include "hevc-ctrls-v2.h"
++#else
++#error Unknown HEVC_CTRLS_VERSION
++#endif
++
++#include "libavutil/hwcontext_drm.h"
++
++#include <semaphore.h>
++#include <pthread.h>
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_utils.h"
++
++// Attached to buf[0] in frame
++// Pooled in hwcontext so generally create once - 1/frame
++typedef struct V4L2MediaReqDescriptor {
++    AVDRMFrameDescriptor drm;
++
++    // Media
++    uint64_t timestamp;
++    struct qent_dst * qe_dst;
++
++    // Decode only - should be NULL by the time we emit the frame
++    struct req_decode_ent decode_ent;
++
++    struct media_request *req;
++    struct qent_src *qe_src;
++
++#if HEVC_CTRLS_VERSION >= 2
++    struct v4l2_ctrl_hevc_decode_params dec;
++#endif
++
++    size_t num_slices;
++    size_t alloced_slices;
++    struct v4l2_ctrl_hevc_slice_params * slice_params;
++    struct slice_info * slices;
++
++} V4L2MediaReqDescriptor;
++
++struct slice_info {
++    const uint8_t * ptr;
++    size_t len; // bytes
++};
++
++// Handy container for accumulating controls before setting
++struct req_controls {
++    int has_scaling;
++    struct timeval tv;
++    struct v4l2_ctrl_hevc_sps sps;
++    struct v4l2_ctrl_hevc_pps pps;
++    struct v4l2_ctrl_hevc_scaling_matrix scaling_matrix;
++};
++
++//static uint8_t nalu_slice_start_code[] = { 0x00, 0x00, 0x01 };
++
++
++// Get an FFmpeg format from the v4l2 format
++static enum AVPixelFormat pixel_format_from_format(const struct v4l2_format *const format)
++{
++    switch (V4L2_TYPE_IS_MULTIPLANAR(format->type) ?
++            format->fmt.pix_mp.pixelformat : format->fmt.pix.pixelformat) {
++    case V4L2_PIX_FMT_YUV420:
++        return AV_PIX_FMT_YUV420P;
++    case V4L2_PIX_FMT_NV12:
++        return AV_PIX_FMT_NV12;
++#if CONFIG_SAND
++    case V4L2_PIX_FMT_NV12_COL128:
++        return AV_PIX_FMT_RPI4_8;
++    case V4L2_PIX_FMT_NV12_10_COL128:
++        return AV_PIX_FMT_RPI4_10;
++#endif
++    default:
++        break;
++    }
++    return AV_PIX_FMT_NONE;
++}
++
++static inline uint64_t frame_capture_dpb(const AVFrame * const frame)
++{
++    const V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
++    return rd->timestamp;
++}
++
++static inline void frame_set_capture_dpb(AVFrame * const frame, const uint64_t dpb_stamp)
++{
++    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)frame->data[0];
++    rd->timestamp = dpb_stamp;
++}
++
++static void fill_pred_table(const HEVCContext *h, struct v4l2_hevc_pred_weight_table *table)
++{
++    int32_t luma_weight_denom, chroma_weight_denom;
++    const SliceHeader *sh = &h->sh;
++
++    if (sh->slice_type == HEVC_SLICE_I ||
++        (sh->slice_type == HEVC_SLICE_P && !h->ps.pps->weighted_pred_flag) ||
++        (sh->slice_type == HEVC_SLICE_B && !h->ps.pps->weighted_bipred_flag))
++        return;
++
++    table->luma_log2_weight_denom = sh->luma_log2_weight_denom;
++
++    if (h->ps.sps->chroma_format_idc)
++        table->delta_chroma_log2_weight_denom = sh->chroma_log2_weight_denom - sh->luma_log2_weight_denom;
++
++    luma_weight_denom = (1 << sh->luma_log2_weight_denom);
++    chroma_weight_denom = (1 << sh->chroma_log2_weight_denom);
++
++    for (int i = 0; i < 15 && i < sh->nb_refs[L0]; i++) {
++        table->delta_luma_weight_l0[i] = sh->luma_weight_l0[i] - luma_weight_denom;
++        table->luma_offset_l0[i] = sh->luma_offset_l0[i];
++        table->delta_chroma_weight_l0[i][0] = sh->chroma_weight_l0[i][0] - chroma_weight_denom;
++        table->delta_chroma_weight_l0[i][1] = sh->chroma_weight_l0[i][1] - chroma_weight_denom;
++        table->chroma_offset_l0[i][0] = sh->chroma_offset_l0[i][0];
++        table->chroma_offset_l0[i][1] = sh->chroma_offset_l0[i][1];
++    }
++
++    if (sh->slice_type != HEVC_SLICE_B)
++        return;
++
++    for (int i = 0; i < 15 && i < sh->nb_refs[L1]; i++) {
++        table->delta_luma_weight_l1[i] = sh->luma_weight_l1[i] - luma_weight_denom;
++        table->luma_offset_l1[i] = sh->luma_offset_l1[i];
++        table->delta_chroma_weight_l1[i][0] = sh->chroma_weight_l1[i][0] - chroma_weight_denom;
++        table->delta_chroma_weight_l1[i][1] = sh->chroma_weight_l1[i][1] - chroma_weight_denom;
++        table->chroma_offset_l1[i][0] = sh->chroma_offset_l1[i][0];
++        table->chroma_offset_l1[i][1] = sh->chroma_offset_l1[i][1];
++    }
++}
++
++static int find_frame_rps_type(const HEVCContext *h, uint64_t timestamp)
++{
++    const HEVCFrame *frame;
++    int i;
++
++    for (i = 0; i < h->rps[ST_CURR_BEF].nb_refs; i++) {
++        frame = h->rps[ST_CURR_BEF].ref[i];
++        if (frame && timestamp == frame_capture_dpb(frame->frame))
++            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_BEFORE;
++    }
++
++    for (i = 0; i < h->rps[ST_CURR_AFT].nb_refs; i++) {
++        frame = h->rps[ST_CURR_AFT].ref[i];
++        if (frame && timestamp == frame_capture_dpb(frame->frame))
++            return V4L2_HEVC_DPB_ENTRY_RPS_ST_CURR_AFTER;
++    }
++
++    for (i = 0; i < h->rps[LT_CURR].nb_refs; i++) {
++        frame = h->rps[LT_CURR].ref[i];
++        if (frame && timestamp == frame_capture_dpb(frame->frame))
++            return V4L2_HEVC_DPB_ENTRY_RPS_LT_CURR;
++    }
++
++    return 0;
++}
++
++static unsigned int
++get_ref_pic_index(const HEVCContext *h, const HEVCFrame *frame,
++                  const struct v4l2_hevc_dpb_entry * const entries,
++                  const unsigned int num_entries)
++{
++    uint64_t timestamp;
++
++    if (!frame)
++        return 0;
++
++    timestamp = frame_capture_dpb(frame->frame);
++
++    for (unsigned int i = 0; i < num_entries; i++) {
++        if (entries[i].timestamp == timestamp)
++            return i;
++    }
++
++    return 0;
++}
++
++static const uint8_t * ptr_from_index(const uint8_t * b, unsigned int idx)
++{
++    unsigned int z = 0;
++    while (idx--) {
++        if (*b++ == 0) {
++            ++z;
++            if (z >= 2 && *b == 3) {
++                ++b;
++                z = 0;
++            }
++        }
++        else {
++            z = 0;
++        }
++    }
++    return b;
++}
++
++static int slice_add(V4L2MediaReqDescriptor * const rd)
++{
++    if (rd->num_slices >= rd->alloced_slices) {
++        struct v4l2_ctrl_hevc_slice_params * p2;
++        struct slice_info * s2;
++        size_t n2 = rd->num_slices == 0 ? 8 : rd->num_slices * 2;
++
++        p2 = av_realloc_array(rd->slice_params, n2, sizeof(*p2));
++        if (p2 == NULL)
++            return AVERROR(ENOMEM);
++        rd->slice_params = p2;
++
++        s2 = av_realloc_array(rd->slices, n2, sizeof(*s2));
++        if (s2 == NULL)
++            return AVERROR(ENOMEM);
++        rd->slices = s2;
++
++        rd->alloced_slices = n2;
++    }
++    ++rd->num_slices;
++    return 0;
++}
++
++static unsigned int
++fill_dpb_entries(const HEVCContext * const h, struct v4l2_hevc_dpb_entry * const entries)
++{
++    unsigned int i;
++    unsigned int n = 0;
++    const HEVCFrame * const pic = h->ref;
++
++    for (i = 0; i < FF_ARRAY_ELEMS(h->DPB); i++) {
++        const HEVCFrame * const frame = &h->DPB[i];
++        if (frame != pic && (frame->flags & (HEVC_FRAME_FLAG_LONG_REF | HEVC_FRAME_FLAG_SHORT_REF))) {
++            struct v4l2_hevc_dpb_entry * const entry = entries + n++;
++
++            entry->timestamp = frame_capture_dpb(frame->frame);
++            entry->rps = find_frame_rps_type(h, entry->timestamp);
++            entry->field_pic = frame->frame->interlaced_frame;
++
++            /* TODO: Interleaved: Get the POC for each field. */
++            entry->pic_order_cnt[0] = frame->poc;
++            entry->pic_order_cnt[1] = frame->poc;
++        }
++    }
++    return n;
++}
++
++static void fill_slice_params(const HEVCContext * const h,
++#if HEVC_CTRLS_VERSION >= 2
++                              const struct v4l2_ctrl_hevc_decode_params * const dec,
++#endif
++                              struct v4l2_ctrl_hevc_slice_params *slice_params,
++                              uint32_t bit_size, uint32_t bit_offset)
++{
++    const SliceHeader * const sh = &h->sh;
++#if HEVC_CTRLS_VERSION >= 2
++    const struct v4l2_hevc_dpb_entry *const dpb = dec->dpb;
++    const unsigned int dpb_n = dec->num_active_dpb_entries;
++#else
++    struct v4l2_hevc_dpb_entry *const dpb = slice_params->dpb;
++    unsigned int dpb_n;
++#endif
++    unsigned int i;
++    RefPicList *rpl;
++
++    *slice_params = (struct v4l2_ctrl_hevc_slice_params) {
++        .bit_size = bit_size,
++        .data_bit_offset = bit_offset,
++
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++        .slice_segment_addr = sh->slice_segment_addr,
++
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: NAL unit header */
++        .nal_unit_type = h->nal_unit_type,
++        .nuh_temporal_id_plus1 = h->temporal_id + 1,
++
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++        .slice_type = sh->slice_type,
++        .colour_plane_id = sh->colour_plane_id,
++        .slice_pic_order_cnt = h->ref->poc,
++        .num_ref_idx_l0_active_minus1 = sh->nb_refs[L0] ? sh->nb_refs[L0] - 1 : 0,
++        .num_ref_idx_l1_active_minus1 = sh->nb_refs[L1] ? sh->nb_refs[L1] - 1 : 0,
++        .collocated_ref_idx = sh->slice_temporal_mvp_enabled_flag ? sh->collocated_ref_idx : 0,
++        .five_minus_max_num_merge_cand = sh->slice_type == HEVC_SLICE_I ? 0 : 5 - sh->max_num_merge_cand,
++        .slice_qp_delta = sh->slice_qp_delta,
++        .slice_cb_qp_offset = sh->slice_cb_qp_offset,
++        .slice_cr_qp_offset = sh->slice_cr_qp_offset,
++        .slice_act_y_qp_offset = 0,
++        .slice_act_cb_qp_offset = 0,
++        .slice_act_cr_qp_offset = 0,
++        .slice_beta_offset_div2 = sh->beta_offset / 2,
++        .slice_tc_offset_div2 = sh->tc_offset / 2,
++
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture timing SEI message */
++        .pic_struct = h->sei.picture_timing.picture_struct,
++
++#if HEVC_CTRLS_VERSION < 2
++        /* ISO/IEC 23008-2, ITU-T Rec. H.265: General slice segment header */
++        .num_rps_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
++        .num_rps_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
++        .num_rps_poc_lt_curr = h->rps[LT_CURR].nb_refs,
++#endif
++    };
++
++    if (sh->slice_sample_adaptive_offset_flag[0])
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_LUMA;
++
++    if (sh->slice_sample_adaptive_offset_flag[1])
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_SAO_CHROMA;
++
++    if (sh->slice_temporal_mvp_enabled_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_TEMPORAL_MVP_ENABLED;
++
++    if (sh->mvd_l1_zero_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_MVD_L1_ZERO;
++
++    if (sh->cabac_init_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_CABAC_INIT;
++
++    if (sh->collocated_list == L0)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_COLLOCATED_FROM_L0;
++
++    if (sh->disable_deblocking_filter_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_DEBLOCKING_FILTER_DISABLED;
++
++    if (sh->slice_loop_filter_across_slices_enabled_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_SLICE_LOOP_FILTER_ACROSS_SLICES_ENABLED;
++
++    if (sh->dependent_slice_segment_flag)
++        slice_params->flags |= V4L2_HEVC_SLICE_PARAMS_FLAG_DEPENDENT_SLICE_SEGMENT;
++
++#if HEVC_CTRLS_VERSION < 2
++    dpb_n = fill_dpb_entries(h, dpb);
++    slice_params->num_active_dpb_entries = dpb_n;
++#endif
++
++    if (sh->slice_type != HEVC_SLICE_I) {
++        rpl = &h->ref->refPicList[0];
++        for (i = 0; i < rpl->nb_refs; i++)
++            slice_params->ref_idx_l0[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
++    }
++
++    if (sh->slice_type == HEVC_SLICE_B) {
++        rpl = &h->ref->refPicList[1];
++        for (i = 0; i < rpl->nb_refs; i++)
++            slice_params->ref_idx_l1[i] = get_ref_pic_index(h, rpl->ref[i], dpb, dpb_n);
++    }
++
++    fill_pred_table(h, &slice_params->pred_weight_table);
++
++    slice_params->num_entry_point_offsets = sh->num_entry_point_offsets;
++    if (slice_params->num_entry_point_offsets > 256) {
++        slice_params->num_entry_point_offsets = 256;
++        av_log(NULL, AV_LOG_ERROR, "%s: Currently only 256 entry points are supported, but slice has %d entry points.\n", __func__, sh->num_entry_point_offsets);
++    }
++
++    for (i = 0; i < slice_params->num_entry_point_offsets; i++)
++        slice_params->entry_point_offset_minus1[i] = sh->entry_point_offset[i] - 1;
++}
++
++#if HEVC_CTRLS_VERSION >= 2
++static void
++fill_decode_params(const HEVCContext * const h,
++                   struct v4l2_ctrl_hevc_decode_params * const dec)
++{
++    unsigned int i;
++
++    *dec = (struct v4l2_ctrl_hevc_decode_params){
++        .pic_order_cnt_val = h->poc,
++        .num_poc_st_curr_before = h->rps[ST_CURR_BEF].nb_refs,
++        .num_poc_st_curr_after = h->rps[ST_CURR_AFT].nb_refs,
++        .num_poc_lt_curr = h->rps[LT_CURR].nb_refs,
++    };
++
++    dec->num_active_dpb_entries = fill_dpb_entries(h, dec->dpb);
++
++    // The docn does seem to ask that we fit our 32 bit signed POC into
++    // a U8 so... (To be fair 16 bits would be enough)
++    // Luckily we (Pi) don't use these fields
++    for (i = 0; i != h->rps[ST_CURR_BEF].nb_refs; ++i)
++        dec->poc_st_curr_before[i] = h->rps[ST_CURR_BEF].ref[i]->poc;
++    for (i = 0; i != h->rps[ST_CURR_AFT].nb_refs; ++i)
++        dec->poc_st_curr_after[i] = h->rps[ST_CURR_AFT].ref[i]->poc;
++    for (i = 0; i != h->rps[LT_CURR].nb_refs; ++i)
++        dec->poc_lt_curr[i] = h->rps[LT_CURR].ref[i]->poc;
++
++    if (IS_IRAP(h))
++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IRAP_PIC;
++    if (IS_IDR(h))
++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_IDR_PIC;
++    if (h->sh.no_output_of_prior_pics_flag)
++        dec->flags |= V4L2_HEVC_DECODE_PARAM_FLAG_NO_OUTPUT_OF_PRIOR;
++
++}
++#endif
++
++static void fill_sps(struct v4l2_ctrl_hevc_sps *ctrl, const HEVCSPS *sps)
++{
++    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Sequence parameter set */
++    *ctrl = (struct v4l2_ctrl_hevc_sps) {
++        .chroma_format_idc = sps->chroma_format_idc,
++        .pic_width_in_luma_samples = sps->width,
++        .pic_height_in_luma_samples = sps->height,
++        .bit_depth_luma_minus8 = sps->bit_depth - 8,
++        .bit_depth_chroma_minus8 = sps->bit_depth - 8,
++        .log2_max_pic_order_cnt_lsb_minus4 = sps->log2_max_poc_lsb - 4,
++        .sps_max_dec_pic_buffering_minus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_dec_pic_buffering - 1,
++        .sps_max_num_reorder_pics = sps->temporal_layer[sps->max_sub_layers - 1].num_reorder_pics,
++        .sps_max_latency_increase_plus1 = sps->temporal_layer[sps->max_sub_layers - 1].max_latency_increase + 1,
++        .log2_min_luma_coding_block_size_minus3 = sps->log2_min_cb_size - 3,
++        .log2_diff_max_min_luma_coding_block_size = sps->log2_diff_max_min_coding_block_size,
++        .log2_min_luma_transform_block_size_minus2 = sps->log2_min_tb_size - 2,
++        .log2_diff_max_min_luma_transform_block_size = sps->log2_max_trafo_size - sps->log2_min_tb_size,
++        .max_transform_hierarchy_depth_inter = sps->max_transform_hierarchy_depth_inter,
++        .max_transform_hierarchy_depth_intra = sps->max_transform_hierarchy_depth_intra,
++        .pcm_sample_bit_depth_luma_minus1 = sps->pcm.bit_depth - 1,
++        .pcm_sample_bit_depth_chroma_minus1 = sps->pcm.bit_depth_chroma - 1,
++        .log2_min_pcm_luma_coding_block_size_minus3 = sps->pcm.log2_min_pcm_cb_size - 3,
++        .log2_diff_max_min_pcm_luma_coding_block_size = sps->pcm.log2_max_pcm_cb_size - sps->pcm.log2_min_pcm_cb_size,
++        .num_short_term_ref_pic_sets = sps->nb_st_rps,
++        .num_long_term_ref_pics_sps = sps->num_long_term_ref_pics_sps,
++        .chroma_format_idc = sps->chroma_format_idc,
++        .sps_max_sub_layers_minus1 = sps->max_sub_layers - 1,
++    };
++
++    if (sps->separate_colour_plane_flag)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SEPARATE_COLOUR_PLANE;
++
++    if (sps->scaling_list_enable_flag)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SCALING_LIST_ENABLED;
++
++    if (sps->amp_enabled_flag)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_AMP_ENABLED;
++
++    if (sps->sao_enabled)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SAMPLE_ADAPTIVE_OFFSET;
++
++    if (sps->pcm_enabled_flag)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_ENABLED;
++
++    if (sps->pcm.loop_filter_disable_flag)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_PCM_LOOP_FILTER_DISABLED;
++
++    if (sps->long_term_ref_pics_present_flag)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_LONG_TERM_REF_PICS_PRESENT;
++
++    if (sps->sps_temporal_mvp_enabled_flag)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_SPS_TEMPORAL_MVP_ENABLED;
++
++    if (sps->sps_strong_intra_smoothing_enable_flag)
++        ctrl->flags |= V4L2_HEVC_SPS_FLAG_STRONG_INTRA_SMOOTHING_ENABLED;
++}
++
++static void fill_scaling_matrix(const ScalingList * const sl,
++                                struct v4l2_ctrl_hevc_scaling_matrix * const sm)
++{
++    unsigned int i;
++
++    for (i = 0; i < 6; i++) {
++        unsigned int j;
++
++        for (j = 0; j < 16; j++)
++            sm->scaling_list_4x4[i][j] = sl->sl[0][i][j];
++        for (j = 0; j < 64; j++) {
++            sm->scaling_list_8x8[i][j]   = sl->sl[1][i][j];
++            sm->scaling_list_16x16[i][j] = sl->sl[2][i][j];
++            if (i < 2)
++                sm->scaling_list_32x32[i][j] = sl->sl[3][i * 3][j];
++        }
++        sm->scaling_list_dc_coef_16x16[i] = sl->sl_dc[0][i];
++        if (i < 2)
++            sm->scaling_list_dc_coef_32x32[i] = sl->sl_dc[1][i * 3];
++    }
++}
++
++static void fill_pps(struct v4l2_ctrl_hevc_pps * const ctrl, const HEVCPPS * const pps)
++{
++    uint64_t flags = 0;
++
++    if (pps->dependent_slice_segments_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_DEPENDENT_SLICE_SEGMENT_ENABLED;
++
++    if (pps->output_flag_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_OUTPUT_FLAG_PRESENT;
++
++    if (pps->sign_data_hiding_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_SIGN_DATA_HIDING_ENABLED;
++
++    if (pps->cabac_init_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_CABAC_INIT_PRESENT;
++
++    if (pps->constrained_intra_pred_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_CONSTRAINED_INTRA_PRED;
++
++    if (pps->transform_skip_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_TRANSFORM_SKIP_ENABLED;
++
++    if (pps->cu_qp_delta_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_CU_QP_DELTA_ENABLED;
++
++    if (pps->pic_slice_level_chroma_qp_offsets_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_PPS_SLICE_CHROMA_QP_OFFSETS_PRESENT;
++
++    if (pps->weighted_pred_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_PRED;
++
++    if (pps->weighted_bipred_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_WEIGHTED_BIPRED;
++
++    if (pps->transquant_bypass_enable_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_TRANSQUANT_BYPASS_ENABLED;
++
++    if (pps->tiles_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_TILES_ENABLED;
++
++    if (pps->entropy_coding_sync_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_ENTROPY_CODING_SYNC_ENABLED;
++
++    if (pps->loop_filter_across_tiles_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_LOOP_FILTER_ACROSS_TILES_ENABLED;
++
++    if (pps->seq_loop_filter_across_slices_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_PPS_LOOP_FILTER_ACROSS_SLICES_ENABLED;
++
++    if (pps->deblocking_filter_override_enabled_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_DEBLOCKING_FILTER_OVERRIDE_ENABLED;
++
++    if (pps->disable_dbf)
++        flags |= V4L2_HEVC_PPS_FLAG_PPS_DISABLE_DEBLOCKING_FILTER;
++
++    if (pps->lists_modification_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_LISTS_MODIFICATION_PRESENT;
++
++    if (pps->slice_header_extension_present_flag)
++        flags |= V4L2_HEVC_PPS_FLAG_SLICE_SEGMENT_HEADER_EXTENSION_PRESENT;
++
++    /* ISO/IEC 23008-2, ITU-T Rec. H.265: Picture parameter set */
++    *ctrl = (struct v4l2_ctrl_hevc_pps) {
++        .num_extra_slice_header_bits = pps->num_extra_slice_header_bits,
++        .init_qp_minus26 = pps->pic_init_qp_minus26,
++        .diff_cu_qp_delta_depth = pps->diff_cu_qp_delta_depth,
++        .pps_cb_qp_offset = pps->cb_qp_offset,
++        .pps_cr_qp_offset = pps->cr_qp_offset,
++        .pps_beta_offset_div2 = pps->beta_offset / 2,
++        .pps_tc_offset_div2 = pps->tc_offset / 2,
++        .log2_parallel_merge_level_minus2 = pps->log2_parallel_merge_level - 2,
++        .flags = flags
++    };
++
++
++    if (pps->tiles_enabled_flag) {
++        ctrl->num_tile_columns_minus1 = pps->num_tile_columns - 1;
++        ctrl->num_tile_rows_minus1 = pps->num_tile_rows - 1;
++
++        for (int i = 0; i < pps->num_tile_columns; i++)
++            ctrl->column_width_minus1[i] = pps->column_width[i] - 1;
++
++        for (int i = 0; i < pps->num_tile_rows; i++)
++            ctrl->row_height_minus1[i] = pps->row_height[i] - 1;
++    }
++}
++
++// Called before finally returning the frame to the user
++// Set corrupt flag here as this is actually the frame structure that
++// is going to the user (in MT land each thread has its own pool)
++static int frame_post_process(void *logctx, AVFrame *frame)
++{
++    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)frame->data[0];
++
++//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
++    frame->flags &= ~AV_FRAME_FLAG_CORRUPT;
++    if (rd->qe_dst) {
++        MediaBufsStatus stat = qent_dst_wait(rd->qe_dst);
++        if (stat != MEDIABUFS_STATUS_SUCCESS) {
++            av_log(logctx, AV_LOG_ERROR, "%s: Decode fail\n", __func__);
++            frame->flags |= AV_FRAME_FLAG_CORRUPT;
++        }
++    }
++
++    return 0;
++}
++
++static inline struct timeval cvt_dpb_to_tv(uint64_t t)
++{
++    t /= 1000;
++    return (struct timeval){
++        .tv_usec = t % 1000000,
++        .tv_sec = t / 1000000
++    };
++}
++
++static inline uint64_t cvt_timestamp_to_dpb(const unsigned int t)
++{
++    return (uint64_t)t * 1000;
++}
++
++static int v4l2_request_hevc_start_frame(AVCodecContext *avctx,
++                                         av_unused const uint8_t *buffer,
++                                         av_unused uint32_t size)
++{
++    const HEVCContext *h = avctx->priv_data;
++    V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++
++//    av_log(NULL, AV_LOG_INFO, "%s\n", __func__);
++    decode_q_add(&ctx->decode_q, &rd->decode_ent);
++
++    rd->num_slices = 0;
++    ctx->timestamp++;
++    rd->timestamp = cvt_timestamp_to_dpb(ctx->timestamp);
++
++    {
++        FrameDecodeData * const fdd = (FrameDecodeData*)h->ref->frame->private_ref->data;
++        fdd->post_process = frame_post_process;
++    }
++
++    // qe_dst needs to be bound to the data buffer and only returned when that is
++    if (!rd->qe_dst)
++    {
++        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
++            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
++            return AVERROR(ENOMEM);
++        }
++    }
++
++    ff_thread_finish_setup(avctx); // Allow next thread to enter rpi_hevc_start_frame
++
++    return 0;
++}
++
++// Object fd & size will be zapped by this & need setting later
++static int drm_from_format(AVDRMFrameDescriptor * const desc, const struct v4l2_format * const format)
++{
++    AVDRMLayerDescriptor *layer = &desc->layers[0];
++    unsigned int width;
++    unsigned int height;
++    unsigned int bpl;
++    uint32_t pixelformat;
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(format->type)) {
++        width       = format->fmt.pix_mp.width;
++        height      = format->fmt.pix_mp.height;
++        pixelformat = format->fmt.pix_mp.pixelformat;
++        bpl         = format->fmt.pix_mp.plane_fmt[0].bytesperline;
++    }
++    else {
++        width       = format->fmt.pix.width;
++        height      = format->fmt.pix.height;
++        pixelformat = format->fmt.pix.pixelformat;
++        bpl         = format->fmt.pix.bytesperline;
++    }
++
++    switch (pixelformat) {
++    case V4L2_PIX_FMT_NV12:
++        layer->format = DRM_FORMAT_NV12;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        break;
++#if CONFIG_SAND
++    case V4L2_PIX_FMT_NV12_COL128:
++        layer->format = DRM_FORMAT_NV12;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
++        break;
++    case V4L2_PIX_FMT_NV12_10_COL128:
++        layer->format = DRM_FORMAT_P030;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_BROADCOM_SAND128_COL_HEIGHT(bpl);
++        break;
++#endif
++#ifdef DRM_FORMAT_MOD_ALLWINNER_TILED
++    case V4L2_PIX_FMT_SUNXI_TILED_NV12:
++        layer->format = DRM_FORMAT_NV12;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_ALLWINNER_TILED;
++        break;
++#endif
++#if defined(V4L2_PIX_FMT_NV15) && defined(DRM_FORMAT_NV15)
++    case V4L2_PIX_FMT_NV15:
++        layer->format = DRM_FORMAT_NV15;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        break;
++#endif
++    case V4L2_PIX_FMT_NV16:
++        layer->format = DRM_FORMAT_NV16;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        break;
++#if defined(V4L2_PIX_FMT_NV20) && defined(DRM_FORMAT_NV20)
++    case V4L2_PIX_FMT_NV20:
++        layer->format = DRM_FORMAT_NV20;
++        desc->objects[0].format_modifier = DRM_FORMAT_MOD_LINEAR;
++        break;
++#endif
++    default:
++        return -1;
++    }
++
++    desc->nb_objects = 1;
++    desc->objects[0].fd = -1;
++    desc->objects[0].size = 0;
++
++    desc->nb_layers = 1;
++    layer->nb_planes = 2;
++
++    layer->planes[0].object_index = 0;
++    layer->planes[0].offset = 0;
++    layer->planes[0].pitch = bpl;
++#if CONFIG_SAND
++    if (pixelformat == V4L2_PIX_FMT_NV12_COL128) {
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = height * 128;
++        layer->planes[0].pitch = width;
++        layer->planes[1].pitch = width;
++    }
++    else if (pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = height * 128;
++        layer->planes[0].pitch = width * 2; // Lies but it keeps DRM import happy
++        layer->planes[1].pitch = width * 2;
++    }
++    else
++#endif
++    {
++        layer->planes[1].object_index = 0;
++        layer->planes[1].offset = layer->planes[0].pitch * height;
++        layer->planes[1].pitch = layer->planes[0].pitch;
++    }
++
++    return 0;
++}
++
++static int
++set_req_ctls(V4L2RequestContextHEVC *ctx, struct media_request * const mreq,
++    struct req_controls *const controls,
++#if HEVC_CTRLS_VERSION >= 2
++    struct v4l2_ctrl_hevc_decode_params * const dec,
++#endif
++    struct v4l2_ctrl_hevc_slice_params * const slices,
++    const unsigned int slice_no,
++    const unsigned int slice_count)
++{
++    int rv;
++
++    struct v4l2_ext_control control[] = {
++        {
++            .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS,
++            .ptr = &controls->sps,
++            .size = sizeof(controls->sps),
++        },
++        {
++            .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS,
++            .ptr = &controls->pps,
++            .size = sizeof(controls->pps),
++        },
++#if HEVC_CTRLS_VERSION >= 2
++        {
++            .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS,
++            .ptr = dec,
++            .size = sizeof(*dec),
++        },
++#endif
++        {
++            .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS,
++            .ptr = slices + slice_no,
++            .size = sizeof(*slices) * slice_count,
++        },
++        // Optional
++        {
++            .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX,
++            .ptr = &controls->scaling_matrix,
++            .size = sizeof(controls->scaling_matrix),
++        },
++    };
++
++    rv = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, mreq, control,
++            controls->has_scaling ?
++                FF_ARRAY_ELEMS(control) :
++                FF_ARRAY_ELEMS(control) - 1);
++
++    return rv;
++}
++
++static int v4l2_request_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
++{
++    const HEVCContext * const h = avctx->priv_data;
++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
++    int bcount = get_bits_count(&h->HEVClc->gb);
++    uint32_t boff = (ptr_from_index(buffer, bcount/8 + 1) - (buffer + bcount/8 + 1)) * 8 + bcount;
++
++    int rv;
++    struct slice_info * si;
++
++    if ((rv = slice_add(rd)) != 0)
++        return rv;
++
++    si = rd->slices + rd->num_slices - 1;
++    si->ptr = buffer;
++    si->len = size;
++
++    if (ctx->multi_slice && rd->num_slices > 1) {
++        struct slice_info *const si0 = rd->slices;
++        const size_t offset = (buffer - si0->ptr);
++        boff += offset * 8;
++        size += offset;
++        si0->len = si->len + offset;
++    }
++
++#if HEVC_CTRLS_VERSION >= 2
++    if (rd->num_slices == 1)
++        fill_decode_params(h, &rd->dec);
++    fill_slice_params(h, &rd->dec, rd->slice_params + rd->num_slices - 1, size * 8, boff);
++#else
++    fill_slice_params(h, rd->slice_params + rd->num_slices - 1, size * 8, boff);
++#endif
++
++    return 0;
++}
++
++static void v4l2_request_hevc_abort_frame(AVCodecContext * const avctx)
++{
++    const HEVCContext * const h = avctx->priv_data;
++    if (h->ref != NULL) {
++        V4L2MediaReqDescriptor *const rd = (V4L2MediaReqDescriptor *)h->ref->frame->data[0];
++        V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++
++        media_request_abort(&rd->req);
++        mediabufs_src_qent_abort(ctx->mbufs, &rd->qe_src);
++
++        decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++    }
++}
++
++static int send_slice(AVCodecContext * const avctx,
++                      V4L2MediaReqDescriptor * const rd,
++                      struct req_controls *const controls,
++                      const unsigned int i, const unsigned int j)
++{
++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++
++    struct slice_info *const si = rd->slices + i;
++    struct media_request * req = NULL;
++    struct qent_src * src = NULL;
++    MediaBufsStatus stat;
++
++    if ((req = media_request_get(ctx->mpool)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to alloc media request\n", __func__);
++        return AVERROR(ENOMEM);
++    }
++
++    if (set_req_ctls(ctx, req,
++                     controls,
++#if HEVC_CTRLS_VERSION >= 2
++                     &rd->dec,
++#endif
++                     rd->slice_params,
++                     i, j - i)) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to set req ctls\n", __func__);
++        goto fail1;
++    }
++
++    if ((src = mediabufs_src_qent_get(ctx->mbufs)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to get src buffer\n", __func__);
++        goto fail1;
++    }
++
++    if (qent_src_data_copy(src, 0, si->ptr, si->len, ctx->dbufs) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed data copy\n", __func__);
++        goto fail2;
++    }
++
++    if (qent_src_params_set(src, &controls->tv)) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed src param set\n", __func__);
++        goto fail2;
++    }
++
++#warning ANNEX_B start code
++//        if (ctx->start_code == V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) {
++//        }
++
++    stat = mediabufs_start_request(ctx->mbufs, &req, &src,
++                                   i == 0 ? rd->qe_dst : NULL,
++                                   j == rd->num_slices);
++
++    if (stat != MEDIABUFS_STATUS_SUCCESS) {
++        av_log(avctx, AV_LOG_ERROR, "%s: Failed to start request\n", __func__);
++        return AVERROR_UNKNOWN;
++    }
++    return 0;
++
++fail2:
++    mediabufs_src_qent_abort(ctx->mbufs, &src);
++fail1:
++    media_request_abort(&req);
++    return AVERROR_UNKNOWN;
++}
++
++static int v4l2_request_hevc_end_frame(AVCodecContext *avctx)
++{
++    const HEVCContext * const h = avctx->priv_data;
++    V4L2MediaReqDescriptor *rd = (V4L2MediaReqDescriptor*)h->ref->frame->data[0];
++    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++    struct req_controls rc;
++    unsigned int i;
++    int rv;
++
++    // It is possible, though maybe a bug, to get an end_frame without
++    // a previous start_frame.  If we do then give up.
++    if (!decode_q_in_q(&rd->decode_ent)) {
++        av_log(avctx, AV_LOG_DEBUG, "%s: Frame not in decode Q\n", __func__);
++        return AVERROR_INVALIDDATA;
++    }
++
++    {
++        const ScalingList *sl = h->ps.pps->scaling_list_data_present_flag ?
++                                    &h->ps.pps->scaling_list :
++                                h->ps.sps->scaling_list_enable_flag ?
++                                    &h->ps.sps->scaling_list : NULL;
++
++
++        memset(&rc, 0, sizeof(rc));
++        rc.tv = cvt_dpb_to_tv(rd->timestamp);
++        fill_sps(&rc.sps, h->ps.sps);
++        fill_pps(&rc.pps, h->ps.pps);
++        if (sl) {
++            rc.has_scaling = 1;
++            fill_scaling_matrix(sl, &rc.scaling_matrix);
++        }
++    }
++
++    decode_q_wait(&ctx->decode_q, &rd->decode_ent);
++
++    // qe_dst needs to be bound to the data buffer and only returned when that is
++    // Alloc almost certainly wants to be serialised if there is any chance of blocking
++    // so we get the next frame to be free in the thread that needs it for decode first.
++    //
++    // In our current world this probably isn't a concern but put it here anyway
++    if (!rd->qe_dst)
++    {
++        if ((rd->qe_dst = mediabufs_dst_qent_alloc(ctx->mbufs, ctx->dbufs)) == NULL) {
++            av_log(avctx, AV_LOG_ERROR, "%s: Failed to get dst buffer\n", __func__);
++            rv = AVERROR(ENOMEM);
++            goto fail;
++        }
++    }
++
++    // Send as slices
++    if (ctx->multi_slice)
++    {
++        if ((rv = send_slice(avctx, rd, &rc, 0, rd->num_slices)) != 0)
++            goto fail;
++    }
++    else
++    {
++        for (i = 0; i != rd->num_slices; ++i) {
++            if ((rv = send_slice(avctx, rd, &rc, i, i + 1)) != 0)
++                goto fail;
++        }
++    }
++
++    // Set the drm_prime desriptor
++    drm_from_format(&rd->drm, mediabufs_dst_fmt(ctx->mbufs));
++    rd->drm.objects[0].fd = dmabuf_fd(qent_dst_dmabuf(rd->qe_dst, 0));
++    rd->drm.objects[0].size = dmabuf_size(qent_dst_dmabuf(rd->qe_dst, 0));
++
++    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++    return 0;
++
++fail:
++    decode_q_remove(&ctx->decode_q, &rd->decode_ent);
++    return rv;
++}
++
++// Initial check & init
++static int
++probe(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
++{
++    const HEVCContext *h = avctx->priv_data;
++    const HEVCSPS * const sps = h->ps.sps;
++    struct v4l2_ctrl_hevc_sps ctrl_sps;
++    unsigned int i;
++
++    // Check for var slice array
++    struct v4l2_query_ext_ctrl qc[] = {
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS },
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SPS },
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_PPS },
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SCALING_MATRIX },
++#if HEVC_CTRLS_VERSION >= 2
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_PARAMS },
++#endif
++    };
++    // Order & size must match!
++    static const size_t ctrl_sizes[] = {
++        sizeof(struct v4l2_ctrl_hevc_slice_params),
++        sizeof(struct v4l2_ctrl_hevc_sps),
++        sizeof(struct v4l2_ctrl_hevc_pps),
++        sizeof(struct v4l2_ctrl_hevc_scaling_matrix),
++#if HEVC_CTRLS_VERSION >= 2
++        sizeof(struct v4l2_ctrl_hevc_decode_params),
++#endif
++    };
++    const unsigned int noof_ctrls = FF_ARRAY_ELEMS(qc);
++
++    if (mediabufs_ctl_query_ext_ctrls(ctx->mbufs, qc, noof_ctrls)) {
++        av_log(avctx, AV_LOG_DEBUG, "Probed V%d control missing\n", HEVC_CTRLS_VERSION);
++        return AVERROR(EINVAL);
++    }
++    for (i = 0; i != noof_ctrls; ++i) {
++        if (ctrl_sizes[i] != qc[i].elem_size) {
++            av_log(avctx, AV_LOG_DEBUG, "Probed V%d control %d size mismatch %u != %u\n",
++                   HEVC_CTRLS_VERSION, i, ctrl_sizes[i], qc[i].elem_size);
++            return AVERROR(EINVAL);
++        }
++    }
++
++    fill_sps(&ctrl_sps, sps);
++
++    if (mediabufs_set_ext_ctrl(ctx->mbufs, NULL, V4L2_CID_MPEG_VIDEO_HEVC_SPS, &ctrl_sps, sizeof(ctrl_sps))) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to set initial SPS\n");
++        return AVERROR(EINVAL);
++    }
++
++    ctx->multi_slice = (qc[0].flags & V4L2_CTRL_FLAG_DYNAMIC_ARRAY) != 0;
++    return 0;
++}
++
++// Final init
++static int
++set_controls(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx)
++{
++    int ret;
++
++    struct v4l2_query_ext_ctrl querys[] = {
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, },
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, },
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_SLICE_PARAMS, },
++    };
++
++    struct v4l2_ext_control ctrls[] = {
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_DECODE_MODE, },
++        { .id = V4L2_CID_MPEG_VIDEO_HEVC_START_CODE, },
++    };
++
++    mediabufs_ctl_query_ext_ctrls(ctx->mbufs, querys, FF_ARRAY_ELEMS(querys));
++
++    ctx->decode_mode = querys[0].default_value;
++
++    if (ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_SLICE_BASED &&
++        ctx->decode_mode != V4L2_MPEG_VIDEO_HEVC_DECODE_MODE_FRAME_BASED) {
++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported decode mode, %d\n", __func__, ctx->decode_mode);
++        return AVERROR(EINVAL);
++    }
++
++    ctx->start_code = querys[1].default_value;
++    if (ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_NONE &&
++        ctx->start_code != V4L2_MPEG_VIDEO_HEVC_START_CODE_ANNEX_B) {
++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported start code, %d\n", __func__, ctx->start_code);
++        return AVERROR(EINVAL);
++    }
++
++    ctx->max_slices = querys[2].elems;
++    if (ctx->max_slices > MAX_SLICES) {
++        av_log(avctx, AV_LOG_ERROR, "%s: unsupported max slices, %d\n", __func__, ctx->max_slices);
++        return AVERROR(EINVAL);
++    }
++
++    ctrls[0].value = ctx->decode_mode;
++    ctrls[1].value = ctx->start_code;
++
++    ret = mediabufs_ctl_set_ext_ctrls(ctx->mbufs, NULL, ctrls, FF_ARRAY_ELEMS(ctrls));
++    return !ret ? 0 : AVERROR(-ret);
++}
++
++static void v4l2_req_frame_free(void *opaque, uint8_t *data)
++{
++    AVCodecContext *avctx = opaque;
++    V4L2MediaReqDescriptor * const rd = (V4L2MediaReqDescriptor*)data;
++
++    av_log(NULL, AV_LOG_DEBUG, "%s: avctx=%p data=%p\n", __func__, avctx, data);
++
++    qent_dst_unref(&rd->qe_dst);
++
++    // We don't expect req or qe_src to be set
++    if (rd->req || rd->qe_src)
++        av_log(NULL, AV_LOG_ERROR, "%s: qe_src %p or req %p not NULL\n", __func__, rd->req, rd->qe_src);
++
++    av_freep(&rd->slices);
++    av_freep(&rd->slice_params);
++
++    av_free(rd);
++}
++
++static AVBufferRef *v4l2_req_frame_alloc(void *opaque, int size)
++{
++    AVCodecContext *avctx = opaque;
++//    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++//    V4L2MediaReqDescriptor *req;
++    AVBufferRef *ref;
++    uint8_t *data;
++//    int ret;
++
++    data = av_mallocz(size);
++    if (!data)
++        return NULL;
++
++    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p size=%d data=%p\n", __func__, avctx, size, data);
++    ref = av_buffer_create(data, size, v4l2_req_frame_free, avctx, 0);
++    if (!ref) {
++        av_freep(&data);
++        return NULL;
++    }
++    return ref;
++}
++
++static void v4l2_req_pool_free(void *opaque)
++{
++    av_log(NULL, AV_LOG_DEBUG, "%s: opaque=%p\n", __func__, opaque);
++}
++
++static void v4l2_req_hwframe_ctx_free(AVHWFramesContext *hwfc)
++{
++    av_log(NULL, AV_LOG_DEBUG, "%s: hwfc=%p pool=%p\n", __func__, hwfc, hwfc->pool);
++
++    av_buffer_pool_uninit(&hwfc->pool);
++}
++
++static int frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
++{
++    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++    AVHWFramesContext *hwfc = (AVHWFramesContext*)hw_frames_ctx->data;
++    const struct v4l2_format *vfmt = mediabufs_dst_fmt(ctx->mbufs);
++
++    hwfc->format = AV_PIX_FMT_DRM_PRIME;
++    hwfc->sw_format = pixel_format_from_format(vfmt);
++    if (V4L2_TYPE_IS_MULTIPLANAR(vfmt->type)) {
++        hwfc->width = vfmt->fmt.pix_mp.width;
++        hwfc->height = vfmt->fmt.pix_mp.height;
++    } else {
++        hwfc->width = vfmt->fmt.pix.width;
++        hwfc->height = vfmt->fmt.pix.height;
++    }
++
++    hwfc->pool = av_buffer_pool_init2(sizeof(V4L2MediaReqDescriptor), avctx, v4l2_req_frame_alloc, v4l2_req_pool_free);
++    if (!hwfc->pool)
++        return AVERROR(ENOMEM);
++
++    hwfc->free = v4l2_req_hwframe_ctx_free;
++
++    hwfc->initial_pool_size = 1;
++
++    switch (avctx->codec_id) {
++    case AV_CODEC_ID_VP9:
++        hwfc->initial_pool_size += 8;
++        break;
++    case AV_CODEC_ID_VP8:
++        hwfc->initial_pool_size += 3;
++        break;
++    default:
++        hwfc->initial_pool_size += 2;
++    }
++
++    av_log(avctx, AV_LOG_DEBUG, "%s: avctx=%p ctx=%p hw_frames_ctx=%p hwfc=%p pool=%p width=%d height=%d initial_pool_size=%d\n", __func__, avctx, ctx, hw_frames_ctx, hwfc, hwfc->pool, hwfc->width, hwfc->height, hwfc->initial_pool_size);
++
++    return 0;
++}
++
++
++const v4l2_req_decode_fns V(ff_v4l2_req_hevc) = {
++    .src_pix_fmt_v4l2 = V4L2_PIX_FMT_HEVC_SLICE,
++    .name = "V4L2 HEVC stateless V" STR(HEVC_CTRLS_VERSION),
++    .probe = probe,
++    .set_controls = set_controls,
++
++    .start_frame    = v4l2_request_hevc_start_frame,
++    .decode_slice   = v4l2_request_hevc_decode_slice,
++    .end_frame      = v4l2_request_hevc_end_frame,
++    .abort_frame    = v4l2_request_hevc_abort_frame,
++    .frame_params   = frame_params,
++};
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_media.c
+@@ -0,0 +1,1569 @@
++/*
++ * Copyright (C) 2018 Paul Kocialkowski <paul.kocialkowski@bootlin.com>
++ *
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#include <errno.h>
++#include <fcntl.h>
++#include <poll.h>
++#include <pthread.h>
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <stdbool.h>
++#include <stdlib.h>
++#include <string.h>
++#include <unistd.h>
++#include <linux/media.h>
++#include <sys/ioctl.h>
++#include <sys/select.h>
++#include <sys/ioctl.h>
++
++#include <linux/videodev2.h>
++
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_utils.h"
++#include "weak_link.h"
++
++
++/* floor(log2(x)) */
++static unsigned int log2_size(size_t x)
++{
++    unsigned int n = 0;
++
++    if (x & ~0xffff) {
++        n += 16;
++        x >>= 16;
++    }
++    if (x & ~0xff) {
++        n += 8;
++        x >>= 8;
++    }
++    if (x & ~0xf) {
++        n += 4;
++        x >>= 4;
++    }
++    if (x & ~3) {
++        n += 2;
++        x >>= 2;
++    }
++    return (x & ~1) ? n + 1 : n;
++}
++
++static size_t round_up_size(const size_t x)
++{
++    /* Admit no size < 256 */
++    const unsigned int n = x < 256 ? 8 : log2_size(x) - 1;
++
++    return x >= (3 << n) ? 4 << n : (3 << n);
++}
++
++struct media_request;
++
++struct media_pool {
++    int fd;
++    sem_t sem;
++    pthread_mutex_t lock;
++    struct media_request * free_reqs;
++    struct pollqueue * pq;
++};
++
++struct media_request {
++    struct media_request * next;
++    struct media_pool * mp;
++    int fd;
++    struct polltask * pt;
++};
++
++
++static inline int do_trywait(sem_t *const sem)
++{
++    while (sem_trywait(sem)) {
++        if (errno != EINTR)
++            return -errno;
++    }
++    return 0;
++}
++
++static inline int do_wait(sem_t *const sem)
++{
++    while (sem_wait(sem)) {
++        if (errno != EINTR)
++            return -errno;
++    }
++    return 0;
++}
++
++static int request_buffers(int video_fd, unsigned int type,
++                           enum v4l2_memory memory, unsigned int buffers_count)
++{
++    struct v4l2_requestbuffers buffers;
++    int rc;
++
++    memset(&buffers, 0, sizeof(buffers));
++    buffers.type = type;
++    buffers.memory = memory;
++    buffers.count = buffers_count;
++
++    rc = ioctl(video_fd, VIDIOC_REQBUFS, &buffers);
++    if (rc < 0) {
++        rc = -errno;
++        request_log("Unable to request %d type %d buffers: %s\n", buffers_count, type, strerror(-rc));
++        return rc;
++    }
++
++    return 0;
++}
++
++
++static int set_stream(int video_fd, unsigned int type, bool enable)
++{
++    enum v4l2_buf_type buf_type = type;
++    int rc;
++
++    rc = ioctl(video_fd, enable ? VIDIOC_STREAMON : VIDIOC_STREAMOFF,
++           &buf_type);
++    if (rc < 0) {
++        rc = -errno;
++        request_log("Unable to %sable stream: %s\n",
++                enable ? "en" : "dis", strerror(-rc));
++        return rc;
++    }
++
++    return 0;
++}
++
++
++
++struct media_request * media_request_get(struct media_pool * const mp)
++{
++    struct media_request *req = NULL;
++
++    /* Timeout handled by poll code */
++    if (do_wait(&mp->sem))
++        return NULL;
++
++    pthread_mutex_lock(&mp->lock);
++    req = mp->free_reqs;
++    if (req) {
++        mp->free_reqs = req->next;
++        req->next = NULL;
++    }
++    pthread_mutex_unlock(&mp->lock);
++    return req;
++}
++
++int media_request_fd(const struct media_request * const req)
++{
++    return req->fd;
++}
++
++int media_request_start(struct media_request * const req)
++{
++    while (ioctl(req->fd, MEDIA_REQUEST_IOC_QUEUE, NULL) == -1)
++    {
++        const int err = errno;
++        if (err == EINTR)
++            continue;
++        request_log("%s: Failed to Q media: (%d) %s\n", __func__, err, strerror(err));
++        return -err;
++    }
++
++    pollqueue_add_task(req->pt, 2000);
++    return 0;
++}
++
++static void media_request_done(void *v, short revents)
++{
++    struct media_request *const req = v;
++    struct media_pool *const mp = req->mp;
++
++    /* ** Not sure what to do about timeout */
++
++    if (ioctl(req->fd, MEDIA_REQUEST_IOC_REINIT, NULL) < 0)
++        request_log("Unable to reinit media request: %s\n",
++                strerror(errno));
++
++    pthread_mutex_lock(&mp->lock);
++    req->next = mp->free_reqs;
++    mp->free_reqs = req;
++    pthread_mutex_unlock(&mp->lock);
++    sem_post(&mp->sem);
++}
++
++int media_request_abort(struct media_request ** const preq)
++{
++    struct media_request * const req = *preq;
++
++    if (req == NULL)
++        return 0;
++    *preq = NULL;
++
++    media_request_done(req, 0);
++    return 0;
++}
++
++static void delete_req_chain(struct media_request * const chain)
++{
++    struct media_request * next = chain;
++    while (next) {
++        struct media_request * const req = next;
++        next = req->next;
++        if (req->pt)
++            polltask_delete(&req->pt);
++        if (req->fd != -1)
++            close(req->fd);
++        free(req);
++    }
++}
++
++struct media_pool * media_pool_new(const char * const media_path,
++                   struct pollqueue * const pq,
++                   const unsigned int n)
++{
++    struct media_pool * const mp = calloc(1, sizeof(*mp));
++    unsigned int i;
++
++    if (!mp)
++        goto fail0;
++
++    mp->pq = pq;
++    pthread_mutex_init(&mp->lock, NULL);
++    mp->fd = open(media_path, O_RDWR | O_NONBLOCK);
++    if (mp->fd == -1) {
++        request_log("Failed to open '%s': %s\n", media_path, strerror(errno));
++        goto fail1;
++    }
++
++    for (i = 0; i != n; ++i) {
++        struct media_request * req = malloc(sizeof(*req));
++        if (!req)
++            goto fail4;
++
++        *req = (struct media_request){
++            .next = mp->free_reqs,
++            .mp = mp,
++            .fd = -1
++        };
++        mp->free_reqs = req;
++
++        if (ioctl(mp->fd, MEDIA_IOC_REQUEST_ALLOC, &req->fd) == -1) {
++            request_log("Failed to alloc request %d: %s\n", i, strerror(errno));
++            goto fail4;
++        }
++
++        req->pt = polltask_new(pq, req->fd, POLLPRI, media_request_done, req);
++        if (!req->pt)
++            goto fail4;
++    }
++
++    sem_init(&mp->sem, 0, n);
++
++    return mp;
++
++fail4:
++    delete_req_chain(mp->free_reqs);
++    close(mp->fd);
++    pthread_mutex_destroy(&mp->lock);
++fail1:
++    free(mp);
++fail0:
++    return NULL;
++}
++
++void media_pool_delete(struct media_pool ** pMp)
++{
++    struct media_pool * const mp = *pMp;
++
++    if (!mp)
++        return;
++    *pMp = NULL;
++
++    delete_req_chain(mp->free_reqs);
++    close(mp->fd);
++    sem_destroy(&mp->sem);
++    pthread_mutex_destroy(&mp->lock);
++    free(mp);
++}
++
++
++#define INDEX_UNSET (~(uint32_t)0)
++
++enum qent_status {
++    QENT_NEW = 0,       // Initial state - shouldn't last
++    QENT_FREE,          // On free chain
++    QENT_PENDING,       // User has ent
++    QENT_WAITING,       // On inuse
++    QENT_DONE,          // Frame rx
++    QENT_ERROR,         // Error
++    QENT_IMPORT
++};
++
++struct qent_base {
++    atomic_int ref_count;
++    struct qent_base *next;
++    struct qent_base *prev;
++    enum qent_status status;
++    uint32_t index;
++    struct dmabuf_h *dh[VIDEO_MAX_PLANES];
++    struct timeval timestamp;
++};
++
++struct qent_src {
++    struct qent_base base;
++    int fixed_size;
++};
++
++struct qent_dst {
++    struct qent_base base;
++    bool waiting;
++    pthread_mutex_t lock;
++    pthread_cond_t cond;
++    struct ff_weak_link_client * mbc_wl;
++};
++
++struct qe_list_head {
++    struct qent_base *head;
++    struct qent_base *tail;
++};
++
++struct buf_pool {
++    pthread_mutex_t lock;
++    sem_t free_sem;
++    enum v4l2_buf_type buf_type;
++    struct qe_list_head free;
++    struct qe_list_head inuse;
++};
++
++
++static inline struct qent_dst *base_to_dst(struct qent_base *be)
++{
++    return (struct qent_dst *)be;
++}
++
++static inline struct qent_src *base_to_src(struct qent_base *be)
++{
++    return (struct qent_src *)be;
++}
++
++
++#define QENT_BASE_INITIALIZER {\
++    .ref_count = ATOMIC_VAR_INIT(0),\
++    .status = QENT_NEW,\
++    .index  = INDEX_UNSET\
++}
++
++static void qe_base_uninit(struct qent_base *const be)
++{
++    unsigned int i;
++    for (i = 0; i != VIDEO_MAX_PLANES; ++i) {
++        dmabuf_free(be->dh[i]);
++        be->dh[i] = NULL;
++    }
++}
++
++static void qe_src_free(struct qent_src *const be_src)
++{
++    if (!be_src)
++        return;
++    qe_base_uninit(&be_src->base);
++    free(be_src);
++}
++
++static struct qent_src * qe_src_new(void)
++{
++    struct qent_src *const be_src = malloc(sizeof(*be_src));
++    if (!be_src)
++        return NULL;
++    *be_src = (struct qent_src){
++        .base = QENT_BASE_INITIALIZER
++    };
++    return be_src;
++}
++
++static void qe_dst_free(struct qent_dst *const be_dst)
++{
++    if (!be_dst)
++        return;
++
++    ff_weak_link_unref(&be_dst->mbc_wl);
++    pthread_cond_destroy(&be_dst->cond);
++    pthread_mutex_destroy(&be_dst->lock);
++    qe_base_uninit(&be_dst->base);
++    free(be_dst);
++}
++
++static struct qent_dst * qe_dst_new(void)
++{
++    struct qent_dst *const be_dst = malloc(sizeof(*be_dst));
++    if (!be_dst)
++        return NULL;
++    *be_dst = (struct qent_dst){
++        .base = QENT_BASE_INITIALIZER,
++        .lock = PTHREAD_MUTEX_INITIALIZER,
++        .cond = PTHREAD_COND_INITIALIZER
++    };
++    return be_dst;
++}
++
++static void ql_add_tail(struct qe_list_head * const ql, struct qent_base * be)
++{
++    if (ql->tail)
++        ql->tail->next = be;
++    else
++        ql->head = be;
++    be->prev = ql->tail;
++    be->next = NULL;
++    ql->tail = be;
++}
++
++static struct qent_base * ql_extract(struct qe_list_head * const ql, struct qent_base * be)
++{
++    if (!be)
++        return NULL;
++
++    if (be->next)
++        be->next->prev = be->prev;
++    else
++        ql->tail = be->prev;
++    if (be->prev)
++        be->prev->next = be->next;
++    else
++        ql->head = be->next;
++    be->next = NULL;
++    be->prev = NULL;
++    return be;
++}
++
++
++static void bq_put_free(struct buf_pool *const bp, struct qent_base * be)
++{
++    ql_add_tail(&bp->free, be);
++}
++
++static struct qent_base * bq_get_free(struct buf_pool *const bp)
++{
++    return ql_extract(&bp->free, bp->free.head);
++}
++
++static struct qent_base * bq_extract_inuse(struct buf_pool *const bp, struct qent_base *const be)
++{
++    return ql_extract(&bp->inuse, be);
++}
++
++static struct qent_base * bq_get_inuse(struct buf_pool *const bp)
++{
++    return ql_extract(&bp->inuse, bp->inuse.head);
++}
++
++static void bq_free_all_free_src(struct buf_pool *const bp)
++{
++    struct qent_base *be;
++    while ((be = bq_get_free(bp)) != NULL)
++        qe_src_free(base_to_src(be));
++}
++
++static void bq_free_all_inuse_src(struct buf_pool *const bp)
++{
++    struct qent_base *be;
++    while ((be = bq_get_inuse(bp)) != NULL)
++        qe_src_free(base_to_src(be));
++}
++
++static void bq_free_all_free_dst(struct buf_pool *const bp)
++{
++    struct qent_base *be;
++    while ((be = bq_get_free(bp)) != NULL)
++        qe_dst_free(base_to_dst(be));
++}
++
++static void queue_put_free(struct buf_pool *const bp, struct qent_base *be)
++{
++    unsigned int i;
++
++    pthread_mutex_lock(&bp->lock);
++    /* Clear out state vars */
++    be->timestamp.tv_sec = 0;
++    be->timestamp.tv_usec = 0;
++    be->status = QENT_FREE;
++    for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i)
++        dmabuf_len_set(be->dh[i], 0);
++    bq_put_free(bp, be);
++    pthread_mutex_unlock(&bp->lock);
++    sem_post(&bp->free_sem);
++}
++
++static bool queue_is_inuse(const struct buf_pool *const bp)
++{
++    return bp->inuse.tail != NULL;
++}
++
++static void queue_put_inuse(struct buf_pool *const bp, struct qent_base *be)
++{
++    if (!be)
++        return;
++    pthread_mutex_lock(&bp->lock);
++    ql_add_tail(&bp->inuse, be);
++    be->status = QENT_WAITING;
++    pthread_mutex_unlock(&bp->lock);
++}
++
++static struct qent_base *queue_get_free(struct buf_pool *const bp)
++{
++    struct qent_base *buf;
++
++    if (do_wait(&bp->free_sem))
++        return NULL;
++    pthread_mutex_lock(&bp->lock);
++    buf = bq_get_free(bp);
++    pthread_mutex_unlock(&bp->lock);
++    return buf;
++}
++
++static struct qent_base *queue_tryget_free(struct buf_pool *const bp)
++{
++    struct qent_base *buf;
++
++    if (do_trywait(&bp->free_sem))
++        return NULL;
++    pthread_mutex_lock(&bp->lock);
++    buf = bq_get_free(bp);
++    pthread_mutex_unlock(&bp->lock);
++    return buf;
++}
++
++static struct qent_base * queue_find_extract_fd(struct buf_pool *const bp, const int fd)
++{
++    struct qent_base *be;
++
++    pthread_mutex_lock(&bp->lock);
++    /* Expect 1st in Q, but allow anywhere */
++    for (be = bp->inuse.head; be; be = be->next) {
++        if (dmabuf_fd(be->dh[0]) == fd) {
++            bq_extract_inuse(bp, be);
++            break;
++        }
++    }
++    pthread_mutex_unlock(&bp->lock);
++
++    return be;
++}
++
++static void queue_delete(struct buf_pool *const bp)
++{
++    sem_destroy(&bp->free_sem);
++    pthread_mutex_destroy(&bp->lock);
++    free(bp);
++}
++
++static struct buf_pool* queue_new(const int vfd)
++{
++    struct buf_pool *bp = calloc(1, sizeof(*bp));
++    if (!bp)
++        return NULL;
++    pthread_mutex_init(&bp->lock, NULL);
++    sem_init(&bp->free_sem, 0, 0);
++    return bp;
++}
++
++
++struct mediabufs_ctl {
++    atomic_int ref_count;  /* 0 is single ref for easier atomics */
++    void * dc;
++    int vfd;
++    bool stream_on;
++    bool polling;
++    pthread_mutex_t lock;
++    struct buf_pool * src;
++    struct buf_pool * dst;
++    struct polltask * pt;
++    struct pollqueue * pq;
++    struct ff_weak_link_master * this_wlm;
++
++    struct v4l2_format src_fmt;
++    struct v4l2_format dst_fmt;
++};
++
++static int qe_v4l2_queue(struct qent_base *const be,
++               const int vfd, struct media_request *const mreq,
++               const struct v4l2_format *const fmt,
++               const bool is_dst, const bool hold_flag)
++{
++    struct v4l2_buffer buffer = {
++        .type = fmt->type,
++        .memory = V4L2_MEMORY_DMABUF,
++        .index = be->index
++    };
++    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++        unsigned int i;
++        for (i = 0; i < VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++            if (is_dst)
++                dmabuf_len_set(be->dh[i], 0);
++
++            /* *** Really need a pixdesc rather than a format so we can fill in data_offset */
++            planes[i].length = dmabuf_size(be->dh[i]);
++            planes[i].bytesused = dmabuf_len(be->dh[i]);
++            planes[i].m.fd = dmabuf_fd(be->dh[i]);
++        }
++        buffer.m.planes = planes;
++        buffer.length = i;
++    }
++    else {
++        if (is_dst)
++            dmabuf_len_set(be->dh[0], 0);
++
++        buffer.bytesused = dmabuf_len(be->dh[0]);
++        buffer.length = dmabuf_size(be->dh[0]);
++        buffer.m.fd = dmabuf_fd(be->dh[0]);
++    }
++
++    if (!is_dst && mreq) {
++        buffer.flags |= V4L2_BUF_FLAG_REQUEST_FD;
++        buffer.request_fd = media_request_fd(mreq);
++        if (hold_flag)
++            buffer.flags |= V4L2_BUF_FLAG_M2M_HOLD_CAPTURE_BUF;
++    }
++
++    if (is_dst)
++        be->timestamp = (struct timeval){0,0};
++
++    buffer.timestamp = be->timestamp;
++
++    while (ioctl(vfd, VIDIOC_QBUF, &buffer)) {
++        const int err = errno;
++        if (err != EINTR) {
++            request_log("%s: Failed to Q buffer: err=%d (%s)\n", __func__, err, strerror(err));
++            return -err;
++        }
++    }
++    return 0;
++}
++
++static struct qent_base * qe_dequeue(struct buf_pool *const bp,
++                     const int vfd,
++                     const struct v4l2_format * const f)
++{
++    int fd;
++    struct qent_base *be;
++    int rc;
++    const bool mp = V4L2_TYPE_IS_MULTIPLANAR(f->type);
++    struct v4l2_plane planes[VIDEO_MAX_PLANES] = {{0}};
++    struct v4l2_buffer buffer = {
++        .type =  f->type,
++        .memory = V4L2_MEMORY_DMABUF
++    };
++    if (mp) {
++        buffer.length = f->fmt.pix_mp.num_planes;
++        buffer.m.planes = planes;
++    }
++
++    while ((rc = ioctl(vfd, VIDIOC_DQBUF, &buffer)) != 0 &&
++           errno == EINTR)
++        /* Loop */;
++    if (rc) {
++        request_log("Error DQing buffer type %d: %s\n", f->type, strerror(errno));
++        return NULL;
++    }
++
++    fd = mp ? planes[0].m.fd : buffer.m.fd;
++    be = queue_find_extract_fd(bp, fd);
++    if (!be) {
++        request_log("Failed to find fd %d in Q\n", fd);
++        return NULL;
++    }
++
++    be->timestamp = buffer.timestamp;
++    be->status = (buffer.flags & V4L2_BUF_FLAG_ERROR) ? QENT_ERROR : QENT_DONE;
++    return be;
++}
++
++static void qe_dst_done(struct qent_dst * dst_be)
++{
++    pthread_mutex_lock(&dst_be->lock);
++    dst_be->waiting = false;
++    pthread_cond_broadcast(&dst_be->cond);
++    pthread_mutex_unlock(&dst_be->lock);
++
++    qent_dst_unref(&dst_be);
++}
++
++static bool qe_dst_waiting(struct qent_dst *const dst_be)
++{
++    bool waiting;
++    pthread_mutex_lock(&dst_be->lock);
++    waiting = dst_be->waiting;
++    dst_be->waiting = true;
++    pthread_mutex_unlock(&dst_be->lock);
++    return waiting;
++}
++
++
++static bool mediabufs_wants_poll(const struct mediabufs_ctl *const mbc)
++{
++    return queue_is_inuse(mbc->src) || queue_is_inuse(mbc->dst);
++}
++
++static void mediabufs_poll_cb(void * v, short revents)
++{
++    struct mediabufs_ctl *mbc = v;
++    struct qent_src *src_be = NULL;
++    struct qent_dst *dst_be = NULL;
++
++    if (!revents)
++        request_err(mbc->dc, "%s: Timeout\n", __func__);
++
++    pthread_mutex_lock(&mbc->lock);
++    mbc->polling = false;
++
++    if ((revents & POLLOUT) != 0)
++        src_be = base_to_src(qe_dequeue(mbc->src, mbc->vfd, &mbc->src_fmt));
++    if ((revents & POLLIN) != 0)
++        dst_be = base_to_dst(qe_dequeue(mbc->dst, mbc->vfd, &mbc->dst_fmt));
++
++    /* Reschedule */
++    if (mediabufs_wants_poll(mbc)) {
++        mbc->polling = true;
++        pollqueue_add_task(mbc->pt, 2000);
++    }
++    pthread_mutex_unlock(&mbc->lock);
++
++    if (src_be)
++        queue_put_free(mbc->src, &src_be->base);
++    if (dst_be)
++        qe_dst_done(dst_be);
++}
++
++int qent_src_params_set(struct qent_src *const be_src, const struct timeval * timestamp)
++{
++    struct qent_base *const be = &be_src->base;
++
++    be->timestamp = *timestamp;
++    return 0;
++}
++
++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst)
++{
++    return be_dst->base.timestamp;
++}
++
++static int qent_base_realloc(struct qent_base *const be, const size_t len, struct dmabufs_ctl * dbsc)
++{
++    if (!be->dh[0] || len > dmabuf_size(be->dh[0])) {
++        size_t newsize = round_up_size(len);
++        request_log("%s: Overrun %d > %d; trying %d\n", __func__, len, dmabuf_size(be->dh[0]), newsize);
++        if (!dbsc) {
++            request_log("%s: No dmbabuf_ctrl for realloc\n", __func__);
++            return -ENOMEM;
++        }
++        if ((be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], newsize)) == NULL) {
++            request_log("%s: Realloc %d failed\n", __func__, newsize);
++            return -ENOMEM;
++        }
++    }
++    return 0;
++}
++
++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc)
++{
++    struct qent_base *const be = &be_src->base;
++    return qent_base_realloc(be, len, dbsc);
++}
++
++
++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc)
++{
++    void * dst;
++    struct qent_base *const be = &be_src->base;
++    int rv;
++
++    // Realloc doesn't copy so don't alloc if offset != 0
++    if ((rv = qent_base_realloc(be, offset + len,
++                                be_src->fixed_size || offset ? NULL : dbsc)) != 0)
++        return rv;
++
++    dmabuf_write_start(be->dh[0]);
++    dst = dmabuf_map(be->dh[0]);
++    if (!dst)
++        return -1;
++    memcpy((char*)dst + offset, src, len);
++    dmabuf_len_set(be->dh[0], len);
++    dmabuf_write_end(be->dh[0]);
++    return 0;
++}
++
++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be_dst, unsigned int plane)
++{
++    const struct qent_base *const be = &be_dst->base;
++
++    return (plane >= sizeof(be->dh)/sizeof(be->dh[0])) ? NULL : be->dh[plane];
++}
++
++int qent_dst_dup_fd(const struct qent_dst *const be_dst, unsigned int plane)
++{
++    return dup(dmabuf_fd(qent_dst_dmabuf(be_dst, plane)));
++}
++
++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
++                struct media_request **const pmreq,
++                struct qent_src **const psrc_be,
++                struct qent_dst *const dst_be,
++                const bool is_final)
++{
++    struct media_request * mreq = *pmreq;
++    struct qent_src *const src_be = *psrc_be;
++
++    // Req & src are always both "consumed"
++    *pmreq = NULL;
++    *psrc_be = NULL;
++
++    pthread_mutex_lock(&mbc->lock);
++
++    if (!src_be)
++        goto fail1;
++
++    if (dst_be) {
++        if (qe_dst_waiting(dst_be)) {
++            request_info(mbc->dc, "Request buffer already waiting on start\n");
++            goto fail1;
++        }
++        dst_be->base.timestamp = (struct timeval){0,0};
++        if (qe_v4l2_queue(&dst_be->base, mbc->vfd, NULL, &mbc->dst_fmt, true, false))
++            goto fail1;
++
++        qent_dst_ref(dst_be);
++        queue_put_inuse(mbc->dst, &dst_be->base);
++    }
++
++    if (qe_v4l2_queue(&src_be->base, mbc->vfd, mreq, &mbc->src_fmt, false, !is_final))
++        goto fail1;
++    queue_put_inuse(mbc->src, &src_be->base);
++
++    if (!mbc->polling && mediabufs_wants_poll(mbc)) {
++        mbc->polling = true;
++        pollqueue_add_task(mbc->pt, 2000);
++    }
++    pthread_mutex_unlock(&mbc->lock);
++
++    if (media_request_start(mreq))
++        return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++    return MEDIABUFS_STATUS_SUCCESS;
++
++fail1:
++    media_request_abort(&mreq);
++    if (src_be)
++        queue_put_free(mbc->src, &src_be->base);
++
++// *** TODO: If src Q fails this doesnt unwind properly - separate dst Q from src Q
++    if (dst_be) {
++        dst_be->base.status = QENT_ERROR;
++        qe_dst_done(dst_be);
++    }
++    pthread_mutex_unlock(&mbc->lock);
++    return MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++
++static int qe_alloc_from_fmt(struct qent_base *const be,
++                   struct dmabufs_ctl *const dbsc,
++                   const struct v4l2_format *const fmt)
++{
++    if (V4L2_TYPE_IS_MULTIPLANAR(fmt->type)) {
++        unsigned int i;
++        for (i = 0; i != fmt->fmt.pix_mp.num_planes; ++i) {
++            be->dh[i] = dmabuf_realloc(dbsc, be->dh[i],
++                fmt->fmt.pix_mp.plane_fmt[i].sizeimage);
++            /* On failure tidy up and die */
++            if (!be->dh[i]) {
++                while (i--) {
++                    dmabuf_free(be->dh[i]);
++                    be->dh[i] = NULL;
++                }
++                return -1;
++            }
++        }
++    }
++    else {
++//      be->dh[0] = dmabuf_alloc(dbsc, fmt->fmt.pix.sizeimage);
++        size_t size = fmt->fmt.pix.sizeimage;
++        be->dh[0] = dmabuf_realloc(dbsc, be->dh[0], size);
++        if (!be->dh[0])
++            return -1;
++    }
++    return 0;
++}
++
++static MediaBufsStatus fmt_set(struct v4l2_format *const fmt, const int fd,
++            const enum v4l2_buf_type buftype,
++            uint32_t pixfmt,
++            const unsigned int width, const unsigned int height,
++                               const size_t bufsize)
++{
++    *fmt = (struct v4l2_format){.type = buftype};
++
++    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
++        fmt->fmt.pix_mp.width = width;
++        fmt->fmt.pix_mp.height = height;
++        fmt->fmt.pix_mp.pixelformat = pixfmt;
++        if (bufsize) {
++            fmt->fmt.pix_mp.num_planes = 1;
++            fmt->fmt.pix_mp.plane_fmt[0].sizeimage = bufsize;
++        }
++    }
++    else {
++        fmt->fmt.pix.width = width;
++        fmt->fmt.pix.height = height;
++        fmt->fmt.pix.pixelformat = pixfmt;
++        fmt->fmt.pix.sizeimage = bufsize;
++    }
++
++    while (ioctl(fd, VIDIOC_S_FMT, fmt))
++        if (errno != EINTR)
++            return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++    // Treat anything where we don't get at least what we asked for as a fail
++    if (V4L2_TYPE_IS_MULTIPLANAR(buftype)) {
++        if (fmt->fmt.pix_mp.width < width ||
++            fmt->fmt.pix_mp.height < height ||
++            fmt->fmt.pix_mp.pixelformat != pixfmt) {
++            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++        }
++    }
++    else {
++        if (fmt->fmt.pix.width < width ||
++            fmt->fmt.pix.height < height ||
++            fmt->fmt.pix.pixelformat != pixfmt) {
++            return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++        }
++    }
++
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++static MediaBufsStatus find_fmt_flags(struct v4l2_format *const fmt,
++                   const int fd,
++                   const unsigned int type_v4l2,
++                   const uint32_t flags_must,
++                   const uint32_t flags_not,
++                   const unsigned int width,
++                   const unsigned int height,
++                   mediabufs_dst_fmt_accept_fn *const accept_fn,
++                   void *const accept_v)
++{
++    unsigned int i;
++
++    for (i = 0;; ++i) {
++        struct v4l2_fmtdesc fmtdesc = {
++            .index = i,
++            .type = type_v4l2
++        };
++        while (ioctl(fd, VIDIOC_ENUM_FMT, &fmtdesc)) {
++            if (errno != EINTR)
++                return MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE;
++        }
++        if ((fmtdesc.flags & flags_must) != flags_must ||
++            (fmtdesc.flags & flags_not))
++            continue;
++        if (!accept_fn(accept_v, &fmtdesc))
++            continue;
++
++        if (fmt_set(fmt, fd, fmtdesc.type, fmtdesc.pixelformat,
++                width, height, 0) == MEDIABUFS_STATUS_SUCCESS)
++            return MEDIABUFS_STATUS_SUCCESS;
++    }
++    return 0;
++}
++
++
++/* Wait for qent done */
++
++MediaBufsStatus qent_dst_wait(struct qent_dst *const be_dst)
++{
++    struct qent_base *const be = &be_dst->base;
++    enum qent_status estat;
++
++    pthread_mutex_lock(&be_dst->lock);
++    while (be_dst->waiting &&
++           !pthread_cond_wait(&be_dst->cond, &be_dst->lock))
++        /* Loop */;
++    estat = be->status;
++    pthread_mutex_unlock(&be_dst->lock);
++
++    return estat == QENT_DONE ? MEDIABUFS_STATUS_SUCCESS :
++        estat == QENT_ERROR ? MEDIABUFS_ERROR_DECODING_ERROR :
++            MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++const uint8_t * qent_dst_data(struct qent_dst *const be_dst, unsigned int buf_no)
++{
++    struct qent_base *const be = &be_dst->base;
++    return dmabuf_map(be->dh[buf_no]);
++}
++
++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be_dst)
++{
++    struct qent_base *const be = &be_dst->base;
++    unsigned int i;
++    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++        if (dmabuf_read_start(be->dh[i])) {
++            while (i--)
++                dmabuf_read_end(be->dh[i]);
++            return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++        }
++    }
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be_dst)
++{
++    struct qent_base *const be = &be_dst->base;
++    unsigned int i;
++    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
++
++    for (i = 0; i != VIDEO_MAX_PLANES && be->dh[i]; ++i) {
++        if (dmabuf_read_end(be->dh[i]))
++            status = MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++    return status;
++}
++
++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst)
++{
++    if (be_dst)
++        atomic_fetch_add(&be_dst->base.ref_count, 1);
++    return be_dst;
++}
++
++void qent_dst_unref(struct qent_dst ** const pbe_dst)
++{
++    struct qent_dst * const be_dst = *pbe_dst;
++    struct mediabufs_ctl * mbc;
++    if (!be_dst)
++        return;
++    *pbe_dst = NULL;
++
++    if (atomic_fetch_sub(&be_dst->base.ref_count, 1) != 0)
++        return;
++
++    if ((mbc = ff_weak_link_lock(&be_dst->mbc_wl)) != NULL) {
++        queue_put_free(mbc->dst, &be_dst->base);
++        ff_weak_link_unlock(be_dst->mbc_wl);
++    }
++    else {
++        qe_dst_free(be_dst);
++    }
++}
++
++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
++                unsigned int plane,
++                int fd, size_t size)
++{
++    struct qent_base *const be = &be_dst->base;
++    struct dmabuf_h * dh;
++
++    if (be->status != QENT_IMPORT || be->dh[plane])
++        return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++    dh = dmabuf_import(fd, size);
++    if (!dh)
++        return MEDIABUFS_ERROR_ALLOCATION_FAILED;
++
++    be->dh[plane] = dh;
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++static int create_dst_buf(struct mediabufs_ctl *const mbc)
++{
++    struct v4l2_create_buffers cbuf = {
++        .count = 1,
++        .memory = V4L2_MEMORY_DMABUF,
++        .format = mbc->dst_fmt,
++    };
++
++    while (ioctl(mbc->vfd, VIDIOC_CREATE_BUFS, &cbuf)) {
++        const int err = -errno;
++        if (err != EINTR) {
++            request_err(mbc->dc, "%s: Failed to create V4L2 buffer\n", __func__);
++            return -err;
++        }
++    }
++    return cbuf.index;
++}
++
++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc, struct dmabufs_ctl *const dbsc)
++{
++    struct qent_dst * be_dst;
++
++    if (mbc == NULL) {
++        be_dst = qe_dst_new();
++        if (be_dst)
++            be_dst->base.status = QENT_IMPORT;
++        return be_dst;
++    }
++
++    be_dst = base_to_dst(queue_tryget_free(mbc->dst));
++    if (!be_dst) {
++        int index;
++
++        be_dst = qe_dst_new();
++        if (!be_dst)
++            return NULL;
++
++        if ((be_dst->mbc_wl = ff_weak_link_ref(mbc->this_wlm)) == NULL ||
++            (index = create_dst_buf(mbc)) < 0) {
++            qe_dst_free(be_dst);
++            return NULL;
++        }
++
++        be_dst->base.index = (uint32_t)index;
++    }
++
++    if (qe_alloc_from_fmt(&be_dst->base, dbsc, &mbc->dst_fmt)) {
++        /* Given  how create buf works we can't uncreate it on alloc failure
++         * all we can do is put it on the free Q
++        */
++        queue_put_free(mbc->dst, &be_dst->base);
++        return NULL;
++    }
++
++    be_dst->base.status = QENT_PENDING;
++    atomic_store(&be_dst->base.ref_count, 0);
++    return be_dst;
++}
++
++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc)
++{
++    return &mbc->dst_fmt;
++}
++
++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
++               const unsigned int width,
++               const unsigned int height,
++               mediabufs_dst_fmt_accept_fn *const accept_fn,
++               void *const accept_v)
++{
++    MediaBufsStatus status;
++    unsigned int i;
++    const enum v4l2_buf_type buf_type = mbc->dst_fmt.type;
++    static const struct {
++        unsigned int flags_must;
++        unsigned int flags_not;
++    } trys[] = {
++        {0, V4L2_FMT_FLAG_EMULATED},
++        {V4L2_FMT_FLAG_EMULATED, 0},
++    };
++    for (i = 0; i != sizeof(trys)/sizeof(trys[0]); ++i) {
++        status = find_fmt_flags(&mbc->dst_fmt, mbc->vfd,
++                                buf_type,
++                                trys[i].flags_must,
++                                trys[i].flags_not,
++                                width, height, accept_fn, accept_v);
++        if (status != MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE)
++            return status;
++    }
++
++    if (status != MEDIABUFS_STATUS_SUCCESS)
++        return status;
++
++    /* Try to create a buffer - don't alloc */
++    return status;
++}
++
++MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, unsigned int n)
++{
++    // **** request buffers
++    unsigned int i;
++
++    for (i = 0; i != n; ++i)
++    {
++        int index;
++        struct qent_dst * const be_dst = qe_dst_new();
++        if (!be_dst)
++            return MEDIABUFS_ERROR_OPERATION_FAILED;
++
++        index = create_dst_buf(mbc);
++        if (index < 0) {
++            qe_dst_free(be_dst);
++            return MEDIABUFS_ERROR_OPERATION_FAILED;
++        }
++
++        // Add index to free chain
++        be_dst->base.index = (uint32_t)index;
++        queue_put_free(mbc->dst, &be_dst->base);
++    }
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc)
++{
++    struct qent_base * buf = queue_get_free(mbc->src);
++    buf->status = QENT_PENDING;
++    return base_to_src(buf);
++}
++
++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src)
++{
++    struct qent_src *const qe_src = *pqe_src;
++    if (!qe_src)
++        return;
++    *pqe_src = NULL;
++    queue_put_free(mbc->src, &qe_src->base);
++}
++
++/* src format must have been set up before this */
++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const mbc,
++                  struct dmabufs_ctl * const dbsc,
++                  unsigned int n)
++{
++    unsigned int i;
++    struct v4l2_requestbuffers req = {
++        .count = n,
++        .type = mbc->src_fmt.type,
++        .memory = V4L2_MEMORY_DMABUF
++    };
++
++    bq_free_all_free_src(mbc->src);
++    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1) {
++        if (errno != EINTR) {
++            request_err(mbc->dc, "%s: Failed to request src bufs\n", __func__);
++            return MEDIABUFS_ERROR_OPERATION_FAILED;
++        }
++    }
++
++    if (n > req.count) {
++        request_info(mbc->dc, "Only allocated %d of %d src buffers requested\n", req.count, n);
++        n = req.count;
++    }
++
++    for (i = 0; i != n; ++i) {
++        struct qent_src *const be_src = qe_src_new();
++        if (!be_src) {
++            request_err(mbc->dc, "Failed to create src be %d\n", i);
++            goto fail;
++        }
++        if (qe_alloc_from_fmt(&be_src->base, dbsc, &mbc->src_fmt)) {
++            qe_src_free(be_src);
++            goto fail;
++        }
++        be_src->base.index = i;
++        be_src->fixed_size = !mediabufs_src_resizable(mbc);
++
++        queue_put_free(mbc->src, &be_src->base);
++    }
++
++    return MEDIABUFS_STATUS_SUCCESS;
++
++fail:
++    bq_free_all_free_src(mbc->src);
++    req.count = 0;
++    while (ioctl(mbc->vfd, VIDIOC_REQBUFS, &req) == -1 &&
++           errno == EINTR)
++        /* Loop */;
++
++    return MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++
++
++/*
++ * Set stuff order:
++ *  Set src fmt
++ *  Set parameters (sps) on vfd
++ *  Negotiate dst format (dst_fmt_set)
++ *  Create src buffers
++ *  Alloc a dst buffer or Create dst slots
++*/
++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc)
++{
++    if (mbc->stream_on)
++        return MEDIABUFS_STATUS_SUCCESS;
++
++    if (set_stream(mbc->vfd, mbc->src_fmt.type, true) < 0) {
++        request_log("Failed to set stream on src type %d\n", mbc->src_fmt.type);
++        return MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++
++    if (set_stream(mbc->vfd, mbc->dst_fmt.type, true) < 0) {
++        request_log("Failed to set stream on dst type %d\n", mbc->dst_fmt.type);
++        set_stream(mbc->vfd, mbc->src_fmt.type, false);
++        return MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++
++    mbc->stream_on = true;
++    return MEDIABUFS_STATUS_SUCCESS;
++}
++
++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc)
++{
++    MediaBufsStatus status = MEDIABUFS_STATUS_SUCCESS;
++
++    if (!mbc->stream_on)
++        return MEDIABUFS_STATUS_SUCCESS;
++
++    if (set_stream(mbc->vfd, mbc->dst_fmt.type, false) < 0) {
++        request_log("Failed to set stream off dst type %d\n", mbc->dst_fmt.type);
++        status = MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++
++    if (set_stream(mbc->vfd, mbc->src_fmt.type, false) < 0) {
++        request_log("Failed to set stream off src type %d\n", mbc->src_fmt.type);
++        status = MEDIABUFS_ERROR_OPERATION_FAILED;
++    }
++
++    mbc->stream_on = false;
++    return status;
++}
++
++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq, struct v4l2_ext_control control_array[], unsigned int n)
++{
++    struct v4l2_ext_controls controls = {
++        .controls = control_array,
++        .count = n
++    };
++
++    if (mreq) {
++        controls.which = V4L2_CTRL_WHICH_REQUEST_VAL;
++        controls.request_fd = media_request_fd(mreq);
++    }
++
++    while (ioctl(mbc->vfd, VIDIOC_S_EXT_CTRLS, &controls))
++    {
++        const int err = errno;
++        if (err != EINTR) {
++            request_err(mbc->dc, "Unable to set controls: %s\n", strerror(err));
++            return -err;
++        }
++    }
++
++    return 0;
++}
++
++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
++                struct media_request * const mreq,
++                unsigned int id, void *data,
++                unsigned int size)
++{
++    struct v4l2_ext_control control = {
++        .id = id,
++        .ptr = data,
++        .size = size
++    };
++
++    int rv = mediabufs_ctl_set_ext_ctrls(mbc, mreq, &control, 1);
++    return !rv ? MEDIABUFS_STATUS_SUCCESS : MEDIABUFS_ERROR_OPERATION_FAILED;
++}
++
++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
++                                      enum v4l2_buf_type buf_type,
++                   const uint32_t pixfmt,
++                   const uint32_t width, const uint32_t height,
++                                      const size_t bufsize)
++{
++    MediaBufsStatus rv = fmt_set(&mbc->src_fmt, mbc->vfd, buf_type, pixfmt, width, height, bufsize);
++    if (rv != MEDIABUFS_STATUS_SUCCESS)
++        request_err(mbc->dc, "Failed to set src buftype %d, format %#x %dx%d\n", buf_type, pixfmt, width, height);
++
++    return rv;
++}
++
++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n)
++{
++    int rv = 0;
++    while (n--) {
++        while (ioctl(mbc->vfd, VIDIOC_QUERY_EXT_CTRL, ctrls)) {
++            const int err = errno;
++            if (err != EINTR) {
++                // Often used for probing - errors are to be expected
++                request_debug(mbc->dc, "Failed to query ext id=%#x, err=%d\n", ctrls->id, err);
++                ctrls->type = 0; // 0 is invalid
++                rv = -err;
++                break;
++            }
++        }
++        ++ctrls;
++    }
++    return rv;
++}
++
++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc)
++{
++    // Single planar OUTPUT can only take exact size buffers
++    // Multiplanar will take larger than negotiated
++    return V4L2_TYPE_IS_MULTIPLANAR(mbc->src_fmt.type);
++}
++
++static void mediabufs_ctl_delete(struct mediabufs_ctl *const mbc)
++{
++    if (!mbc)
++        return;
++
++    // Break the weak link first
++    ff_weak_link_break(&mbc->this_wlm);
++
++    polltask_delete(&mbc->pt);
++
++    mediabufs_stream_off(mbc);
++
++    // Empty v4l2 buffer stash
++    request_buffers(mbc->vfd, mbc->src_fmt.type, V4L2_MEMORY_MMAP, 0);
++    request_buffers(mbc->vfd, mbc->dst_fmt.type, V4L2_MEMORY_MMAP, 0);
++
++    bq_free_all_free_src(mbc->src);
++    bq_free_all_inuse_src(mbc->src);
++    bq_free_all_free_dst(mbc->dst);
++
++    {
++        struct qent_dst *dst_be;
++        while ((dst_be = base_to_dst(bq_get_inuse(mbc->dst))) != NULL) {
++            dst_be->base.timestamp = (struct timeval){0};
++            dst_be->base.status = QENT_ERROR;
++            qe_dst_done(dst_be);
++        }
++    }
++
++    queue_delete(mbc->dst);
++    queue_delete(mbc->src);
++    close(mbc->vfd);
++    pthread_mutex_destroy(&mbc->lock);
++
++    free(mbc);
++}
++
++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc)
++{
++    atomic_fetch_add(&mbc->ref_count, 1);
++    return mbc;
++}
++
++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc)
++{
++    struct mediabufs_ctl *const mbc = *pmbc;
++    int n;
++
++    if (!mbc)
++        return;
++    *pmbc = NULL;
++    n = atomic_fetch_sub(&mbc->ref_count, 1);
++    if (n)
++        return;
++    mediabufs_ctl_delete(mbc);
++}
++
++static int set_capabilities(struct mediabufs_ctl *const mbc)
++{
++    struct v4l2_capability capability = { 0 };
++    uint32_t caps;
++
++    if (ioctl(mbc->vfd, VIDIOC_QUERYCAP, &capability)) {
++        int err = errno;
++        request_err(mbc->dc, "Failed to get capabilities: %s\n", strerror(err));
++        return -err;
++    }
++
++    caps = (capability.capabilities & V4L2_CAP_DEVICE_CAPS) != 0 ?
++            capability.device_caps :
++            capability.capabilities;
++
++    if ((caps & V4L2_CAP_VIDEO_M2M_MPLANE) != 0) {
++        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT_MPLANE;
++        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
++    }
++    else if ((caps & V4L2_CAP_VIDEO_M2M) != 0) {
++        mbc->src_fmt.type = V4L2_BUF_TYPE_VIDEO_OUTPUT;
++        mbc->dst_fmt.type = V4L2_BUF_TYPE_VIDEO_CAPTURE;
++    }
++    else {
++        request_err(mbc->dc, "No M2M capabilities (%#x)\n", caps);
++        return -EINVAL;
++    }
++
++    return 0;
++}
++
++/* One of these per context */
++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc, const char * vpath, struct pollqueue *const pq)
++{
++    struct mediabufs_ctl *const mbc = calloc(1, sizeof(*mbc));
++
++    if (!mbc)
++        return NULL;
++
++    mbc->dc = dc;
++    // Default mono planar
++    mbc->pq = pq;
++    pthread_mutex_init(&mbc->lock, NULL);
++
++    /* Pick a default  - could we scan for this? */
++    if (vpath == NULL)
++        vpath = "/dev/media0";
++
++    while ((mbc->vfd = open(vpath, O_RDWR)) == -1)
++    {
++        const int err = errno;
++        if (err != EINTR) {
++            request_err(dc, "Failed to open video dev '%s': %s\n", vpath, strerror(err));
++            goto fail0;
++        }
++    }
++
++    if (set_capabilities(mbc)) {
++        request_err(dc, "Bad capabilities for video dev '%s'\n", vpath);
++        goto fail1;
++    }
++
++    mbc->src = queue_new(mbc->vfd);
++    if (!mbc->src)
++        goto fail1;
++    mbc->dst = queue_new(mbc->vfd);
++    if (!mbc->dst)
++        goto fail2;
++    mbc->pt = polltask_new(pq, mbc->vfd, POLLIN | POLLOUT, mediabufs_poll_cb, mbc);
++    if (!mbc->pt)
++        goto fail3;
++    mbc->this_wlm = ff_weak_link_new(mbc);
++    if (!mbc->this_wlm)
++        goto fail4;
++
++    /* Cannot add polltask now - polling with nothing pending
++     * generates infinite error polls
++    */
++    return mbc;
++
++fail4:
++    polltask_delete(&mbc->pt);
++fail3:
++    queue_delete(mbc->dst);
++fail2:
++    queue_delete(mbc->src);
++fail1:
++    close(mbc->vfd);
++fail0:
++    free(mbc);
++    request_info(dc, "%s: FAILED\n", __func__);
++    return NULL;
++}
++
++
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_media.h
+@@ -0,0 +1,148 @@
++/*
++e.h
++*
++ * Permission is hereby granted, free of charge, to any person obtaining a
++ * copy of this software and associated documentation files (the
++ * "Software"), to deal in the Software without restriction, including
++ * without limitation the rights to use, copy, modify, merge, publish,
++ * distribute, sub license, and/or sell copies of the Software, and to
++ * permit persons to whom the Software is furnished to do so, subject to
++ * the following conditions:
++ *
++ * The above copyright notice and this permission notice (including the
++ * next paragraph) shall be included in all copies or substantial portions
++ * of the Software.
++ *
++ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
++ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
++ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
++ * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
++ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
++ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
++ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
++ */
++
++#ifndef _MEDIA_H_
++#define _MEDIA_H_
++
++#include <stdbool.h>
++#include <stdint.h>
++
++struct v4l2_format;
++struct v4l2_fmtdesc;
++struct v4l2_query_ext_ctrl;
++
++struct pollqueue;
++struct media_request;
++struct media_pool;
++
++typedef enum media_buf_status {
++    MEDIABUFS_STATUS_SUCCESS = 0,
++    MEDIABUFS_ERROR_OPERATION_FAILED,
++    MEDIABUFS_ERROR_DECODING_ERROR,
++    MEDIABUFS_ERROR_UNSUPPORTED_BUFFERTYPE,
++    MEDIABUFS_ERROR_UNSUPPORTED_RT_FORMAT,
++    MEDIABUFS_ERROR_ALLOCATION_FAILED,
++} MediaBufsStatus;
++
++struct media_pool * media_pool_new(const char * const media_path,
++                   struct pollqueue * const pq,
++                   const unsigned int n);
++void media_pool_delete(struct media_pool ** pmp);
++
++// Obtain a media request
++// Will block if none availible - has a 2sec timeout
++struct media_request * media_request_get(struct media_pool * const mp);
++int media_request_fd(const struct media_request * const req);
++
++// Start this request
++// Request structure is returned to pool once done
++int media_request_start(struct media_request * const req);
++
++// Return an *unstarted* media_request to the pool
++// May later be upgraded to allow for aborting a started req
++int media_request_abort(struct media_request ** const preq);
++
++
++struct mediabufs_ctl;
++struct qent_src;
++struct qent_dst;
++struct dmabuf_h;
++struct dmabufs_ctl;
++
++int qent_src_params_set(struct qent_src *const be, const struct timeval * timestamp);
++struct timeval qent_dst_timestamp_get(const struct qent_dst *const be_dst);
++
++// prealloc
++int qent_src_alloc(struct qent_src *const be_src, const size_t len, struct dmabufs_ctl * dbsc);
++// dbsc may be NULL if realloc not required
++int qent_src_data_copy(struct qent_src *const be_src, const size_t offset, const void *const src, const size_t len, struct dmabufs_ctl * dbsc);
++const struct dmabuf_h * qent_dst_dmabuf(const struct qent_dst *const be, unsigned int plane);
++int qent_dst_dup_fd(const struct qent_dst *const be, unsigned int plane);
++MediaBufsStatus qent_dst_wait(struct qent_dst *const be);
++void qent_dst_delete(struct qent_dst *const be);
++// Returns a qent_dst to its mbc free Q or deletes it if the mbc is dead
++void qent_dst_unref(struct qent_dst ** const pbe_dst);
++struct qent_dst * qent_dst_ref(struct qent_dst * const be_dst);
++
++const uint8_t * qent_dst_data(struct qent_dst *const be, unsigned int buf_no);
++MediaBufsStatus qent_dst_read_start(struct qent_dst *const be);
++MediaBufsStatus qent_dst_read_stop(struct qent_dst *const be);
++/* Import an fd unattached to any mediabuf */
++MediaBufsStatus qent_dst_import_fd(struct qent_dst *const be_dst,
++                unsigned int plane,
++                int fd, size_t size);
++
++MediaBufsStatus mediabufs_start_request(struct mediabufs_ctl *const mbc,
++                struct media_request **const pmreq,
++                struct qent_src **const psrc_be,
++                struct qent_dst *const dst_be,
++                const bool is_final);
++// Get / alloc a dst buffer & associate with a slot
++// * BEWARE * Currently has no alloc limit
++struct qent_dst* mediabufs_dst_qent_alloc(struct mediabufs_ctl *const mbc,
++                           struct dmabufs_ctl *const dbsc);
++// Create dst slots without alloc
++MediaBufsStatus mediabufs_dst_slots_create(struct mediabufs_ctl *const mbc, unsigned int n);
++
++MediaBufsStatus mediabufs_stream_on(struct mediabufs_ctl *const mbc);
++MediaBufsStatus mediabufs_stream_off(struct mediabufs_ctl *const mbc);
++const struct v4l2_format *mediabufs_dst_fmt(struct mediabufs_ctl *const mbc);
++
++typedef int mediabufs_dst_fmt_accept_fn(void * v, const struct v4l2_fmtdesc *fmtdesc);
++
++MediaBufsStatus mediabufs_dst_fmt_set(struct mediabufs_ctl *const mbc,
++               const unsigned int width,
++               const unsigned int height,
++               mediabufs_dst_fmt_accept_fn *const accept_fn,
++               void *const accept_v);
++struct qent_src *mediabufs_src_qent_get(struct mediabufs_ctl *const mbc);
++void mediabufs_src_qent_abort(struct mediabufs_ctl *const mbc, struct qent_src **const pqe_src);
++
++int mediabufs_ctl_set_ext_ctrls(struct mediabufs_ctl * mbc, struct media_request * const mreq,
++                                struct v4l2_ext_control control_array[], unsigned int n);
++MediaBufsStatus mediabufs_set_ext_ctrl(struct mediabufs_ctl *const mbc,
++                struct media_request * const mreq,
++                unsigned int id, void *data,
++                unsigned int size);
++int mediabufs_ctl_query_ext_ctrls(struct mediabufs_ctl * mbc, struct v4l2_query_ext_ctrl ctrls[], unsigned int n);
++
++int mediabufs_src_resizable(const struct mediabufs_ctl *const mbc);
++
++MediaBufsStatus mediabufs_src_fmt_set(struct mediabufs_ctl *const mbc,
++                                      enum v4l2_buf_type buf_type,
++                                      const uint32_t pixfmt,
++                                      const uint32_t width, const uint32_t height,
++                                      const size_t bufsize);
++
++MediaBufsStatus mediabufs_src_pool_create(struct mediabufs_ctl *const rw,
++                  struct dmabufs_ctl * const dbsc,
++                  unsigned int n);
++
++struct mediabufs_ctl * mediabufs_ctl_new(void * const dc,
++                     const char *vpath, struct pollqueue *const pq);
++void mediabufs_ctl_unref(struct mediabufs_ctl **const pmbc);
++struct mediabufs_ctl * mediabufs_ctl_ref(struct mediabufs_ctl *const mbc);
++
++
++#endif
+--- /dev/null
++++ b/libavcodec/v4l2_req_pollqueue.c
+@@ -0,0 +1,363 @@
++#include <errno.h>
++#include <limits.h>
++#include <poll.h>
++#include <pthread.h>
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <stdbool.h>
++#include <stdlib.h>
++#include <stdint.h>
++#include <stdio.h>
++#include <string.h>
++#include <unistd.h>
++#include <sys/eventfd.h>
++
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_utils.h"
++
++
++struct pollqueue;
++
++enum polltask_state {
++    POLLTASK_UNQUEUED = 0,
++    POLLTASK_QUEUED,
++    POLLTASK_RUNNING,
++    POLLTASK_Q_KILL,
++    POLLTASK_RUN_KILL,
++};
++
++struct polltask {
++    struct polltask *next;
++    struct polltask *prev;
++    struct pollqueue *q;
++    enum polltask_state state;
++
++    int fd;
++    short events;
++
++    void (*fn)(void *v, short revents);
++    void * v;
++
++    uint64_t timeout; /* CLOCK_MONOTONIC time, 0 => never */
++    sem_t kill_sem;
++};
++
++struct pollqueue {
++    atomic_int ref_count;
++    pthread_mutex_t lock;
++
++    struct polltask *head;
++    struct polltask *tail;
++
++    bool kill;
++    bool no_prod;
++    int prod_fd;
++    struct polltask *prod_pt;
++    pthread_t worker;
++};
++
++struct polltask *polltask_new(struct pollqueue *const pq,
++                              const int fd, const short events,
++                  void (*const fn)(void *v, short revents),
++                  void *const v)
++{
++    struct polltask *pt;
++
++    if (!events)
++        return NULL;
++
++    pt = malloc(sizeof(*pt));
++    if (!pt)
++        return NULL;
++
++    *pt = (struct polltask){
++        .next = NULL,
++        .prev = NULL,
++        .q = pollqueue_ref(pq),
++        .fd = fd,
++        .events = events,
++        .fn = fn,
++        .v = v
++    };
++
++    sem_init(&pt->kill_sem, 0, 0);
++
++    return pt;
++}
++
++static void pollqueue_rem_task(struct pollqueue *const pq, struct polltask *const pt)
++{
++    if (pt->prev)
++        pt->prev->next = pt->next;
++    else
++        pq->head = pt->next;
++    if (pt->next)
++        pt->next->prev = pt->prev;
++    else
++        pq->tail = pt->prev;
++    pt->next = NULL;
++    pt->prev = NULL;
++}
++
++static void polltask_free(struct polltask * const pt)
++{
++    sem_destroy(&pt->kill_sem);
++    free(pt);
++}
++
++static int pollqueue_prod(const struct pollqueue *const pq)
++{
++    static const uint64_t one = 1;
++    return write(pq->prod_fd, &one, sizeof(one));
++}
++
++void polltask_delete(struct polltask **const ppt)
++{
++    struct polltask *const pt = *ppt;
++    struct pollqueue * pq;
++    enum polltask_state state;
++    bool prodme;
++
++    if (!pt)
++        return;
++
++    pq = pt->q;
++    pthread_mutex_lock(&pq->lock);
++    state = pt->state;
++    pt->state = (state == POLLTASK_RUNNING) ? POLLTASK_RUN_KILL : POLLTASK_Q_KILL;
++    prodme = !pq->no_prod;
++    pthread_mutex_unlock(&pq->lock);
++
++    if (state != POLLTASK_UNQUEUED) {
++        if (prodme)
++            pollqueue_prod(pq);
++        while (sem_wait(&pt->kill_sem) && errno == EINTR)
++            /* loop */;
++    }
++
++    // Leave zapping the ref until we have DQed the PT as might well be
++    // legitimately used in it
++    *ppt = NULL;
++    polltask_free(pt);
++    pollqueue_unref(&pq);
++}
++
++static uint64_t pollqueue_now(int timeout)
++{
++    struct timespec now;
++    uint64_t now_ms;
++
++    if (clock_gettime(CLOCK_MONOTONIC, &now))
++        return 0;
++    now_ms = (now.tv_nsec / 1000000) + (uint64_t)now.tv_sec * 1000 + timeout;
++    return now_ms ? now_ms : (uint64_t)1;
++}
++
++void pollqueue_add_task(struct polltask *const pt, const int timeout)
++{
++    bool prodme = false;
++    struct pollqueue * const pq = pt->q;
++
++    pthread_mutex_lock(&pq->lock);
++    if (pt->state != POLLTASK_Q_KILL && pt->state != POLLTASK_RUN_KILL) {
++        if (pq->tail)
++            pq->tail->next = pt;
++        else
++            pq->head = pt;
++        pt->prev = pq->tail;
++        pt->next = NULL;
++        pt->state = POLLTASK_QUEUED;
++        pt->timeout = timeout < 0 ? 0 : pollqueue_now(timeout);
++        pq->tail = pt;
++        prodme = !pq->no_prod;
++    }
++    pthread_mutex_unlock(&pq->lock);
++    if (prodme)
++        pollqueue_prod(pq);
++}
++
++static void *poll_thread(void *v)
++{
++    struct pollqueue *const pq = v;
++    struct pollfd *a = NULL;
++    size_t asize = 0;
++
++    pthread_mutex_lock(&pq->lock);
++    do {
++        unsigned int i;
++        unsigned int n = 0;
++        struct polltask *pt;
++        uint64_t now = pollqueue_now(0);
++        int timeout = -1;
++        int rv;
++
++        for (pt = pq->head; pt; pt = pt->next) {
++            int64_t t;
++
++            if (pt->state == POLLTASK_Q_KILL) {
++                struct polltask * const prev = pt->prev;
++                pollqueue_rem_task(pq, pt);
++                sem_post(&pt->kill_sem);
++                if ((pt = prev) == NULL)
++                    break;
++                continue;
++            }
++
++            if (n >= asize) {
++                asize = asize ? asize * 2 : 4;
++                a = realloc(a, asize * sizeof(*a));
++                if (!a) {
++                    request_log("Failed to realloc poll array to %zd\n", asize);
++                    goto fail_locked;
++                }
++            }
++
++            a[n++] = (struct pollfd){
++                .fd = pt->fd,
++                .events = pt->events
++            };
++
++            t = (int64_t)(pt->timeout - now);
++            if (pt->timeout && t < INT_MAX &&
++                (timeout < 0 || (int)t < timeout))
++                timeout = (t < 0) ? 0 : (int)t;
++        }
++        pthread_mutex_unlock(&pq->lock);
++
++        if ((rv = poll(a, n, timeout)) == -1) {
++            if (errno != EINTR) {
++                request_log("Poll error: %s\n", strerror(errno));
++                goto fail_unlocked;
++            }
++        }
++
++        pthread_mutex_lock(&pq->lock);
++        now = pollqueue_now(0);
++
++        /* Prodding in this loop is pointless and might lead to
++         * infinite looping
++        */
++        pq->no_prod = true;
++        for (i = 0, pt = pq->head; i < n; ++i) {
++            struct polltask *const pt_next = pt->next;
++
++            /* Pending? */
++            if (a[i].revents ||
++                (pt->timeout && (int64_t)(now - pt->timeout) >= 0)) {
++                pollqueue_rem_task(pq, pt);
++                if (pt->state == POLLTASK_QUEUED)
++                    pt->state = POLLTASK_RUNNING;
++                if (pt->state == POLLTASK_Q_KILL)
++                    pt->state = POLLTASK_RUN_KILL;
++                pthread_mutex_unlock(&pq->lock);
++
++                /* This can add new entries to the Q but as
++                 * those are added to the tail our existing
++                 * chain remains intact
++                */
++                pt->fn(pt->v, a[i].revents);
++
++                pthread_mutex_lock(&pq->lock);
++                if (pt->state == POLLTASK_RUNNING)
++                    pt->state = POLLTASK_UNQUEUED;
++                if (pt->state == POLLTASK_RUN_KILL)
++                    sem_post(&pt->kill_sem);
++            }
++
++            pt = pt_next;
++        }
++        pq->no_prod = false;
++
++    } while (!pq->kill);
++
++fail_locked:
++    pthread_mutex_unlock(&pq->lock);
++fail_unlocked:
++    free(a);
++    return NULL;
++}
++
++static void prod_fn(void *v, short revents)
++{
++    struct pollqueue *const pq = v;
++    char buf[8];
++    if (revents)
++        read(pq->prod_fd, buf, 8);
++    if (!pq->kill)
++        pollqueue_add_task(pq->prod_pt, -1);
++}
++
++struct pollqueue * pollqueue_new(void)
++{
++    struct pollqueue *pq = malloc(sizeof(*pq));
++    if (!pq)
++        return NULL;
++    *pq = (struct pollqueue){
++        .ref_count = ATOMIC_VAR_INIT(0),
++        .lock = PTHREAD_MUTEX_INITIALIZER,
++        .head = NULL,
++        .tail = NULL,
++        .kill = false,
++        .prod_fd = -1
++    };
++
++    pq->prod_fd = eventfd(0, EFD_NONBLOCK);
++    if (pq->prod_fd == 1)
++        goto fail1;
++    pq->prod_pt = polltask_new(pq, pq->prod_fd, POLLIN, prod_fn, pq);
++    if (!pq->prod_pt)
++        goto fail2;
++    pollqueue_add_task(pq->prod_pt, -1);
++    if (pthread_create(&pq->worker, NULL, poll_thread, pq))
++        goto fail3;
++    // Reset ref count which will have been inced by the add_task
++    atomic_store(&pq->ref_count, 0);
++    return pq;
++
++fail3:
++    polltask_free(pq->prod_pt);
++fail2:
++    close(pq->prod_fd);
++fail1:
++    free(pq);
++    return NULL;
++}
++
++static void pollqueue_free(struct pollqueue *const pq)
++{
++    void *rv;
++
++    pthread_mutex_lock(&pq->lock);
++    pq->kill = true;
++    pollqueue_prod(pq);
++    pthread_mutex_unlock(&pq->lock);
++
++    pthread_join(pq->worker, &rv);
++    polltask_free(pq->prod_pt);
++    pthread_mutex_destroy(&pq->lock);
++    close(pq->prod_fd);
++    free(pq);
++}
++
++struct pollqueue * pollqueue_ref(struct pollqueue *const pq)
++{
++    atomic_fetch_add(&pq->ref_count, 1);
++    return pq;
++}
++
++void pollqueue_unref(struct pollqueue **const ppq)
++{
++    struct pollqueue * const pq = *ppq;
++
++    if (!pq)
++        return;
++    *ppq = NULL;
++
++    if (atomic_fetch_sub(&pq->ref_count, 1) != 0)
++        return;
++
++    pollqueue_free(pq);
++}
++
++
++
+--- /dev/null
++++ b/libavcodec/v4l2_req_pollqueue.h
+@@ -0,0 +1,18 @@
++#ifndef POLLQUEUE_H_
++#define POLLQUEUE_H_
++
++struct polltask;
++struct pollqueue;
++
++struct polltask *polltask_new(struct pollqueue *const pq,
++			      const int fd, const short events,
++			      void (*const fn)(void *v, short revents),
++			      void *const v);
++void polltask_delete(struct polltask **const ppt);
++
++void pollqueue_add_task(struct polltask *const pt, const int timeout);
++struct pollqueue * pollqueue_new(void);
++void pollqueue_unref(struct pollqueue **const ppq);
++struct pollqueue * pollqueue_ref(struct pollqueue *const pq);
++
++#endif /* POLLQUEUE_H_ */
+--- /dev/null
++++ b/libavcodec/v4l2_req_utils.h
+@@ -0,0 +1,21 @@
++#include "libavutil/log.h"
++
++#define request_log(...) av_log(NULL, AV_LOG_INFO, __VA_ARGS__)
++
++#define request_err(_ctx, ...) av_log(_ctx, AV_LOG_ERROR, __VA_ARGS__)
++#define request_info(_ctx, ...) av_log(_ctx, AV_LOG_INFO, __VA_ARGS__)
++#define request_debug(_ctx, ...) av_log(_ctx, AV_LOG_DEBUG, __VA_ARGS__)
++
++static inline char safechar(char c) {
++    return c > 0x20 && c < 0x7f ? c : '.';
++}
++
++static inline const char * strfourcc(char tbuf[5], uint32_t fcc) {
++    tbuf[0] = safechar((fcc >>  0) & 0xff);
++    tbuf[1] = safechar((fcc >>  8) & 0xff);
++    tbuf[2] = safechar((fcc >> 16) & 0xff);
++    tbuf[3] = safechar((fcc >> 24) & 0xff);
++    tbuf[4] = '\0';
++    return tbuf;
++}
++
+--- /dev/null
++++ b/libavcodec/v4l2_request_hevc.c
+@@ -0,0 +1,280 @@
++/*
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++
++#include "decode.h"
++#include "hevcdec.h"
++#include "hwconfig.h"
++
++#include "v4l2_request_hevc.h"
++
++#include "libavutil/hwcontext_drm.h"
++
++#include "v4l2_req_devscan.h"
++#include "v4l2_req_dmabufs.h"
++#include "v4l2_req_pollqueue.h"
++#include "v4l2_req_media.h"
++#include "v4l2_req_utils.h"
++
++static size_t bit_buf_size(unsigned int w, unsigned int h, unsigned int bits_minus8)
++{
++    const size_t wxh = w * h;
++    size_t bits_alloc;
++
++    /* Annex A gives a min compression of 2 @ lvl 3.1
++     * (wxh <= 983040) and min 4 thereafter but avoid
++     * the odity of 983041 having a lower limit than
++     * 983040.
++     * Multiply by 3/2 for 4:2:0
++     */
++    bits_alloc = wxh < 983040 ? wxh * 3 / 4 :
++        wxh < 983040 * 2 ? 983040 * 3 / 4 :
++        wxh * 3 / 8;
++    /* Allow for bit depth */
++    bits_alloc += (bits_alloc * bits_minus8) / 8;
++    /* Add a few bytes (16k) for overhead */
++    bits_alloc += 0x4000;
++    return bits_alloc;
++}
++
++static int v4l2_req_hevc_start_frame(AVCodecContext *avctx,
++                                     av_unused const uint8_t *buffer,
++                                     av_unused uint32_t size)
++{
++    const V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++    return ctx->fns->start_frame(avctx, buffer, size);
++}
++
++static int v4l2_req_hevc_decode_slice(AVCodecContext *avctx, const uint8_t *buffer, uint32_t size)
++{
++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++    return ctx->fns->decode_slice(avctx, buffer, size);
++}
++
++static int v4l2_req_hevc_end_frame(AVCodecContext *avctx)
++{
++    V4L2RequestContextHEVC *ctx = avctx->internal->hwaccel_priv_data;
++    return ctx->fns->end_frame(avctx);
++}
++
++static void v4l2_req_hevc_abort_frame(AVCodecContext * const avctx)
++{
++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++    ctx->fns->abort_frame(avctx);
++}
++
++static int v4l2_req_hevc_frame_params(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx)
++{
++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++    return ctx->fns->frame_params(avctx, hw_frames_ctx);
++}
++
++static int v4l2_request_hevc_uninit(AVCodecContext *avctx)
++{
++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++
++    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    decode_q_wait(&ctx->decode_q, NULL);  // Wait for all other threads to be out of decode
++
++    mediabufs_ctl_unref(&ctx->mbufs);
++    media_pool_delete(&ctx->mpool);
++    pollqueue_unref(&ctx->pq);
++    dmabufs_ctl_delete(&ctx->dbufs);
++    devscan_delete(&ctx->devscan);
++
++    decode_q_uninit(&ctx->decode_q);
++
++//    if (avctx->hw_frames_ctx) {
++//        AVHWFramesContext *hwfc = (AVHWFramesContext*)avctx->hw_frames_ctx->data;
++//        av_buffer_pool_flush(hwfc->pool);
++//    }
++    return 0;
++}
++
++static int dst_fmt_accept_cb(void * v, const struct v4l2_fmtdesc *fmtdesc)
++{
++    AVCodecContext *const avctx = v;
++    const HEVCContext *const h = avctx->priv_data;
++
++    if (h->ps.sps->bit_depth == 8) {
++        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_COL128 ||
++            fmtdesc->pixelformat == V4L2_PIX_FMT_NV12) {
++            return 1;
++        }
++    }
++    else if (h->ps.sps->bit_depth == 10) {
++        if (fmtdesc->pixelformat == V4L2_PIX_FMT_NV12_10_COL128) {
++            return 1;
++        }
++    }
++    return 0;
++}
++
++static int v4l2_request_hevc_init(AVCodecContext *avctx)
++{
++    const HEVCContext *h = avctx->priv_data;
++    V4L2RequestContextHEVC * const ctx = avctx->internal->hwaccel_priv_data;
++    const HEVCSPS * const sps = h->ps.sps;
++    int ret;
++    const struct decdev * decdev;
++    const uint32_t src_pix_fmt = V2(ff_v4l2_req_hevc, 1).src_pix_fmt_v4l2;  // Assuming constant for all APIs but avoiding V4L2 includes
++    size_t src_size;
++
++    av_log(avctx, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    if ((ret = devscan_build(avctx, &ctx->devscan)) != 0) {
++        av_log(avctx, AV_LOG_WARNING, "Failed to find any V4L2 devices\n");
++        return (AVERROR(-ret));
++    }
++    ret = AVERROR(ENOMEM);  // Assume mem fail by default for these
++
++    if ((decdev = devscan_find(ctx->devscan, src_pix_fmt)) == NULL)
++    {
++        av_log(avctx, AV_LOG_WARNING, "Failed to find a V4L2 device for H265\n");
++        ret = AVERROR(ENODEV);
++        goto fail0;
++    }
++    av_log(avctx, AV_LOG_DEBUG, "Trying V4L2 devices: %s,%s\n",
++           decdev_media_path(decdev), decdev_video_path(decdev));
++
++    if ((ctx->dbufs = dmabufs_ctl_new()) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to open dmabufs\n");
++        goto fail0;
++    }
++
++    if ((ctx->pq = pollqueue_new()) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to create pollqueue\n");
++        goto fail1;
++    }
++
++    if ((ctx->mpool = media_pool_new(decdev_media_path(decdev), ctx->pq, 4)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to create media pool\n");
++        goto fail2;
++    }
++
++    if ((ctx->mbufs = mediabufs_ctl_new(avctx, decdev_video_path(decdev), ctx->pq)) == NULL) {
++        av_log(avctx, AV_LOG_ERROR, "Unable to create media controls\n");
++        goto fail3;
++    }
++
++    // Ask for an initial bitbuf size of max size / 4
++    // We will realloc if we need more
++    // Must use sps->h/w as avctx contains cropped size
++    src_size = bit_buf_size(sps->width, sps->height, sps->bit_depth - 8);
++    if (mediabufs_src_resizable(ctx->mbufs))
++        src_size /= 4;
++    // Kludge for conformance tests which break Annex A limits
++    else if (src_size < 0x40000)
++        src_size = 0x40000;
++
++    if (mediabufs_src_fmt_set(ctx->mbufs, decdev_src_type(decdev), src_pix_fmt,
++                              sps->width, sps->height, src_size)) {
++        char tbuf1[5];
++        av_log(avctx, AV_LOG_ERROR, "Failed to set source format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
++        goto fail4;
++    }
++
++    if (V2(ff_v4l2_req_hevc, 2).probe(avctx, ctx) == 0) {
++        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 2 probed successfully\n");
++        ctx->fns = &V2(ff_v4l2_req_hevc, 2);
++    }
++    else if (V2(ff_v4l2_req_hevc, 1).probe(avctx, ctx) == 0) {
++        av_log(avctx, AV_LOG_DEBUG, "HEVC API version 1 probed successfully\n");
++        ctx->fns = &V2(ff_v4l2_req_hevc, 1);
++    }
++    else {
++        av_log(avctx, AV_LOG_ERROR, "No HEVC version probed successfully\n");
++        ret = AVERROR(EINVAL);
++        goto fail4;
++    }
++
++    if (mediabufs_dst_fmt_set(ctx->mbufs, sps->width, sps->height, dst_fmt_accept_cb, avctx)) {
++        char tbuf1[5];
++        av_log(avctx, AV_LOG_ERROR, "Failed to set destination format: %s %dx%d\n", strfourcc(tbuf1, src_pix_fmt), sps->width, sps->height);
++        goto fail4;
++    }
++
++    if (mediabufs_src_pool_create(ctx->mbufs, ctx->dbufs, 6)) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to create source pool\n");
++        goto fail4;
++    }
++
++    if (mediabufs_dst_slots_create(ctx->mbufs, 1)) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to create destination slots\n");
++        goto fail4;
++    }
++
++    if (mediabufs_stream_on(ctx->mbufs)) {
++        av_log(avctx, AV_LOG_ERROR, "Failed stream on\n");
++        goto fail4;
++    }
++
++    if ((ret = ff_decode_get_hw_frames_ctx(avctx, AV_HWDEVICE_TYPE_DRM)) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed to create frame ctx\n");
++        goto fail4;
++    }
++
++    if ((ret = ctx->fns->set_controls(avctx, ctx)) != 0) {
++        av_log(avctx, AV_LOG_ERROR, "Failed set controls\n");
++        goto fail5;
++    }
++
++    decode_q_init(&ctx->decode_q);
++
++    // Set our s/w format
++    avctx->sw_pix_fmt = ((AVHWFramesContext *)avctx->hw_frames_ctx->data)->sw_format;
++
++    av_log(avctx, AV_LOG_INFO, "Hwaccel %s; devices: %s,%s\n",
++           ctx->fns->name,
++           decdev_media_path(decdev), decdev_video_path(decdev));
++
++    return 0;
++
++fail5:
++    av_buffer_unref(&avctx->hw_frames_ctx);
++fail4:
++    mediabufs_ctl_unref(&ctx->mbufs);
++fail3:
++    media_pool_delete(&ctx->mpool);
++fail2:
++    pollqueue_unref(&ctx->pq);
++fail1:
++    dmabufs_ctl_delete(&ctx->dbufs);
++fail0:
++    devscan_delete(&ctx->devscan);
++    return ret;
++}
++
++const AVHWAccel ff_hevc_v4l2request_hwaccel = {
++    .name           = "hevc_v4l2request",
++    .type           = AVMEDIA_TYPE_VIDEO,
++    .id             = AV_CODEC_ID_HEVC,
++    .pix_fmt        = AV_PIX_FMT_DRM_PRIME,
++//    .alloc_frame    = v4l2_request_hevc_alloc_frame,
++    .start_frame    = v4l2_req_hevc_start_frame,
++    .decode_slice   = v4l2_req_hevc_decode_slice,
++    .end_frame      = v4l2_req_hevc_end_frame,
++    .abort_frame    = v4l2_req_hevc_abort_frame,
++    .init           = v4l2_request_hevc_init,
++    .uninit         = v4l2_request_hevc_uninit,
++    .priv_data_size = sizeof(V4L2RequestContextHEVC),
++    .frame_params   = v4l2_req_hevc_frame_params,
++    .caps_internal  = HWACCEL_CAP_ASYNC_SAFE | HWACCEL_CAP_MT_SAFE,
++};
+--- /dev/null
++++ b/libavcodec/v4l2_request_hevc.h
+@@ -0,0 +1,100 @@
++#ifndef AVCODEC_V4L2_REQUEST_HEVC_H
++#define AVCODEC_V4L2_REQUEST_HEVC_H
++
++#include <drm_fourcc.h>
++#include "v4l2_req_decode_q.h"
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++// P030 should be defined in drm_fourcc.h and hopefully will be sometime
++// in the future but until then...
++#ifndef DRM_FORMAT_P030
++#define DRM_FORMAT_P030 fourcc_code('P', '0', '3', '0')
++#endif
++
++#ifndef DRM_FORMAT_NV15
++#define DRM_FORMAT_NV15 fourcc_code('N', 'V', '1', '5')
++#endif
++
++#ifndef DRM_FORMAT_NV20
++#define DRM_FORMAT_NV20 fourcc_code('N', 'V', '2', '0')
++#endif
++
++#include <linux/videodev2.h>
++#ifndef V4L2_CID_CODEC_BASE
++#define V4L2_CID_CODEC_BASE V4L2_CID_MPEG_BASE
++#endif
++
++// V4L2_PIX_FMT_NV12_10_COL128 and V4L2_PIX_FMT_NV12_COL128 should be defined
++// in drm_fourcc.h hopefully will be sometime in the future but until then...
++#ifndef V4L2_PIX_FMT_NV12_10_COL128
++#define V4L2_PIX_FMT_NV12_10_COL128 v4l2_fourcc('N', 'C', '3', '0')
++#endif
++
++#ifndef V4L2_PIX_FMT_NV12_COL128
++#define V4L2_PIX_FMT_NV12_COL128 v4l2_fourcc('N', 'C', '1', '2') /* 12  Y/CbCr 4:2:0 128 pixel wide column */
++#endif
++
++#ifndef V4L2_CTRL_FLAG_DYNAMIC_ARRAY
++#define V4L2_CTRL_FLAG_DYNAMIC_ARRAY	0x0800
++#endif
++
++#define MAX_SLICES 128
++
++#define VCAT(name, version) name##_v##version
++#define V2(n,v) VCAT(n, v)
++#define V(n) V2(n, HEVC_CTRLS_VERSION)
++
++#define S2(x) #x
++#define STR(x) S2(x)
++
++// 1 per decoder
++struct v4l2_req_decode_fns;
++
++typedef struct V4L2RequestContextHEVC {
++//    V4L2RequestContext base;
++    const struct v4l2_req_decode_fns * fns;
++
++    unsigned int timestamp;  // ?? maybe uint64_t
++
++    int multi_slice;
++    int decode_mode;
++    int start_code;
++    int max_slices;
++
++    req_decode_q decode_q;
++
++    struct devscan *devscan;
++    struct dmabufs_ctl *dbufs;
++    struct pollqueue *pq;
++    struct media_pool * mpool;
++    struct mediabufs_ctl *mbufs;
++} V4L2RequestContextHEVC;
++
++typedef struct v4l2_req_decode_fns {
++    int src_pix_fmt_v4l2;
++    const char * name;
++
++    // Init setup
++    int (*probe)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
++    int (*set_controls)(AVCodecContext * const avctx, V4L2RequestContextHEVC * const ctx);
++
++    // Passthrough of hwaccel fns
++    int (*start_frame)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
++    int (*decode_slice)(AVCodecContext *avctx, const uint8_t *buf, uint32_t buf_size);
++    int (*end_frame)(AVCodecContext *avctx);
++    void (*abort_frame)(AVCodecContext *avctx);
++    int (*frame_params)(AVCodecContext *avctx, AVBufferRef *hw_frames_ctx);
++} v4l2_req_decode_fns;
++
++
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 1);
++extern const v4l2_req_decode_fns V2(ff_v4l2_req_hevc, 2);
++
++#endif
+--- /dev/null
++++ b/libavcodec/weak_link.c
+@@ -0,0 +1,100 @@
++#include <stdlib.h>
++#include <pthread.h>
++#include <stdatomic.h>
++#include "weak_link.h"
++
++struct ff_weak_link_master {
++    atomic_int ref_count;    /* 0 is single ref for easier atomics */
++    pthread_rwlock_t lock;
++    void * ptr;
++};
++
++static inline struct ff_weak_link_master * weak_link_x(struct ff_weak_link_client * c)
++{
++    return (struct ff_weak_link_master *)c;
++}
++
++struct ff_weak_link_master * ff_weak_link_new(void * p)
++{
++    struct ff_weak_link_master * w = malloc(sizeof(*w));
++    if (!w)
++        return NULL;
++    w->ptr = p;
++    if (pthread_rwlock_init(&w->lock, NULL)) {
++        free(w);
++        return NULL;
++    }
++    return w;
++}
++
++static void weak_link_do_unref(struct ff_weak_link_master * const w)
++{
++    int n = atomic_fetch_sub(&w->ref_count, 1);
++    if (n)
++        return;
++
++    pthread_rwlock_destroy(&w->lock);
++    free(w);
++}
++
++// Unref & break link
++void ff_weak_link_break(struct ff_weak_link_master ** ppLink)
++{
++    struct ff_weak_link_master * const w = *ppLink;
++    if (!w)
++        return;
++
++    *ppLink = NULL;
++    pthread_rwlock_wrlock(&w->lock);
++    w->ptr = NULL;
++    pthread_rwlock_unlock(&w->lock);
++
++    weak_link_do_unref(w);
++}
++
++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w)
++{
++    atomic_fetch_add(&w->ref_count, 1);
++    return (struct ff_weak_link_client*)w;
++}
++
++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink)
++{
++    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
++    if (!w)
++        return;
++
++    *ppLink = NULL;
++    weak_link_do_unref(w);
++}
++
++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink)
++{
++    struct ff_weak_link_master * const w = weak_link_x(*ppLink);
++
++    if (!w)
++        return NULL;
++
++    if (pthread_rwlock_rdlock(&w->lock))
++        goto broken;
++
++    if (w->ptr)
++        return w->ptr;
++
++    pthread_rwlock_unlock(&w->lock);
++
++broken:
++    *ppLink = NULL;
++    weak_link_do_unref(w);
++    return NULL;
++}
++
++// Ignores a NULL c (so can be on the return path of both broken & live links)
++void ff_weak_link_unlock(struct ff_weak_link_client * c)
++{
++    struct ff_weak_link_master * const w = weak_link_x(c);
++    if (w)
++        pthread_rwlock_unlock(&w->lock);
++}
++
++
+--- /dev/null
++++ b/libavcodec/weak_link.h
+@@ -0,0 +1,23 @@
++struct ff_weak_link_master;
++struct ff_weak_link_client;
++
++struct ff_weak_link_master * ff_weak_link_new(void * p);
++void ff_weak_link_break(struct ff_weak_link_master ** ppLink);
++
++struct ff_weak_link_client* ff_weak_link_ref(struct ff_weak_link_master * w);
++void ff_weak_link_unref(struct ff_weak_link_client ** ppLink);
++
++// Returns NULL if link broken - in this case it will also zap
++//   *ppLink and unref the weak_link.
++// Returns NULL if *ppLink is NULL (so a link once broken stays broken)
++//
++// The above does mean that there is a race if this is called simultainiously
++// by two threads using the same weak_link_client (so don't do that)
++void * ff_weak_link_lock(struct ff_weak_link_client ** ppLink);
++void ff_weak_link_unlock(struct ff_weak_link_client * c);
++
++
++
++
++
++
+--- a/libavdevice/Makefile
++++ b/libavdevice/Makefile
+@@ -46,6 +46,9 @@ OBJS-$(CONFIG_SNDIO_OUTDEV)
+ OBJS-$(CONFIG_V4L2_INDEV)                += v4l2.o v4l2-common.o timefilter.o
+ OBJS-$(CONFIG_V4L2_OUTDEV)               += v4l2enc.o v4l2-common.o
+ OBJS-$(CONFIG_VFWCAP_INDEV)              += vfwcap.o
++OBJS-$(CONFIG_VOUT_DRM_OUTDEV)           += drm_vout.o
++OBJS-$(CONFIG_VOUT_EGL_OUTDEV)           += egl_vout.o
++OBJS-$(CONFIG_VOUT_RPI_OUTDEV)           += rpi_vout.o
+ OBJS-$(CONFIG_XCBGRAB_INDEV)             += xcbgrab.o
+ OBJS-$(CONFIG_XV_OUTDEV)                 += xv.o
+
+--- a/libavdevice/alldevices.c
++++ b/libavdevice/alldevices.c
+@@ -52,6 +52,9 @@ extern AVOutputFormat ff_sndio_muxer;
+ extern AVInputFormat  ff_v4l2_demuxer;
+ extern AVOutputFormat ff_v4l2_muxer;
+ extern AVInputFormat  ff_vfwcap_demuxer;
++extern AVOutputFormat ff_vout_drm_muxer;
++extern AVOutputFormat ff_vout_egl_muxer;
++extern AVOutputFormat ff_vout_rpi_muxer;
+ extern AVInputFormat  ff_xcbgrab_demuxer;
+ extern AVOutputFormat ff_xv_muxer;
+
+--- /dev/null
++++ b/libavdevice/drm_vout.c
+@@ -0,0 +1,643 @@
++/*
++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++// *** This module is a work in progress and its utility is strictly
++//     limited to testing.
++
++#include "libavutil/opt.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavformat/internal.h"
++#include "avdevice.h"
++
++#include "pthread.h"
++#include <semaphore.h>
++#include <unistd.h>
++
++#include <xf86drm.h>
++#include <xf86drmMode.h>
++
++#define TRACE_ALL 0
++
++#define DRM_MODULE "vc4"
++
++#define ERRSTR strerror(errno)
++
++struct drm_setup {
++   int conId;
++   uint32_t crtcId;
++   int crtcIdx;
++   uint32_t planeId;
++   unsigned int out_fourcc;
++   struct {
++       int x, y, width, height;
++   } compose;
++};
++
++typedef struct drm_aux_s {
++    unsigned int fb_handle;
++    uint32_t bo_handles[AV_DRM_MAX_PLANES];
++    AVFrame * frame;
++} drm_aux_t;
++
++// Aux size should only need to be 2, but on a few streams (Hobbit) under FKMS
++// we get initial flicker probably due to dodgy drm timing
++#define AUX_SIZE 3
++typedef struct drm_display_env_s
++{
++    AVClass *class;
++
++    int drm_fd;
++    uint32_t con_id;
++    struct drm_setup setup;
++    enum AVPixelFormat avfmt;
++    int show_all;
++
++    unsigned int ano;
++    drm_aux_t aux[AUX_SIZE];
++
++    pthread_t q_thread;
++    sem_t q_sem_in;
++    sem_t q_sem_out;
++    int q_terminate;
++    AVFrame * q_next;
++
++} drm_display_env_t;
++
++
++static int drm_vout_write_trailer(AVFormatContext *s)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++
++    return 0;
++}
++
++static int drm_vout_write_header(AVFormatContext *s)
++{
++    const AVCodecParameters * const par = s->streams[0]->codecpar;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++    if (   s->nb_streams > 1
++        || par->codec_type != AVMEDIA_TYPE_VIDEO
++        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
++        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
++        return AVERROR(EINVAL);
++    }
++
++    return 0;
++}
++
++static int find_plane(struct AVFormatContext * const avctx,
++                      const int drmfd, const int crtcidx, const uint32_t format,
++                      uint32_t * const pplane_id)
++{
++   drmModePlaneResPtr planes;
++   drmModePlanePtr plane;
++   unsigned int i;
++   unsigned int j;
++   int ret = 0;
++
++   planes = drmModeGetPlaneResources(drmfd);
++   if (!planes)
++   {
++       av_log(avctx, AV_LOG_WARNING, "drmModeGetPlaneResources failed: %s\n", ERRSTR);
++       return -1;
++   }
++
++   for (i = 0; i < planes->count_planes; ++i) {
++      plane = drmModeGetPlane(drmfd, planes->planes[i]);
++      if (!planes)
++      {
++          av_log(avctx, AV_LOG_WARNING, "drmModeGetPlane failed: %s\n", ERRSTR);
++          break;
++      }
++
++      if (!(plane->possible_crtcs & (1 << crtcidx))) {
++         drmModeFreePlane(plane);
++         continue;
++      }
++
++      for (j = 0; j < plane->count_formats; ++j) {
++         if (plane->formats[j] == format)
++            break;
++      }
++
++      if (j == plane->count_formats) {
++         drmModeFreePlane(plane);
++         continue;
++      }
++
++      *pplane_id = plane->plane_id;
++      drmModeFreePlane(plane);
++      break;
++   }
++
++   if (i == planes->count_planes)
++      ret = -1;
++
++   drmModeFreePlaneResources(planes);
++   return ret;
++}
++
++static void da_uninit(drm_display_env_t * const de, drm_aux_t * da)
++{
++    if (da->fb_handle != 0) {
++        drmModeRmFB(de->drm_fd, da->fb_handle);
++        da->fb_handle = 0;
++    }
++
++    for (unsigned int i = 0; i != AV_DRM_MAX_PLANES; ++i) {
++        if (da->bo_handles[i]) {
++            struct drm_gem_close gem_close = {.handle = da->bo_handles[i]};
++            drmIoctl(de->drm_fd, DRM_IOCTL_GEM_CLOSE, &gem_close);
++            da->bo_handles[i] = 0;
++        }
++    }
++    av_frame_free(&da->frame);
++}
++
++static int do_display(AVFormatContext * const s, drm_display_env_t * const de, AVFrame * frame)
++{
++    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
++    drm_aux_t * da = de->aux + de->ano;
++    const uint32_t format = desc->layers[0].format;
++    int ret = 0;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "<<< %s: fd=%d\n", __func__, desc->objects[0].fd);
++#endif
++
++    if (de->setup.out_fourcc != format) {
++        if (find_plane(s, de->drm_fd, de->setup.crtcIdx, format, &de->setup.planeId)) {
++            av_frame_free(&frame);
++            av_log(s, AV_LOG_WARNING, "No plane for format: %#x\n", format);
++            return -1;
++        }
++        de->setup.out_fourcc = format;
++    }
++
++    {
++        drmVBlank vbl = {
++            .request = {
++                .type = DRM_VBLANK_RELATIVE,
++                .sequence = 0
++            }
++        };
++
++        while (drmWaitVBlank(de->drm_fd, &vbl)) {
++            if (errno != EINTR) {
++                av_log(s, AV_LOG_WARNING, "drmWaitVBlank failed: %s\n", ERRSTR);
++                break;
++            }
++        }
++    }
++
++    da_uninit(de, da);
++
++    {
++        uint32_t pitches[4] = {0};
++        uint32_t offsets[4] = {0};
++        uint64_t modifiers[4] = {0};
++        uint32_t bo_handles[4] = {0};
++        int i, j, n;
++
++        da->frame = frame;
++
++        for (i = 0; i < desc->nb_objects; ++i) {
++            if (drmPrimeFDToHandle(de->drm_fd, desc->objects[i].fd, da->bo_handles + i) != 0) {
++                av_log(s, AV_LOG_WARNING, "drmPrimeFDToHandle[%d](%d) failed: %s\n", i, desc->objects[i].fd, ERRSTR);
++                return -1;
++            }
++        }
++
++        n = 0;
++        for (i = 0; i < desc->nb_layers; ++i) {
++            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
++                const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
++                const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
++                pitches[n] = p->pitch;
++                offsets[n] = p->offset;
++                modifiers[n] = obj->format_modifier;
++                bo_handles[n] = da->bo_handles[p->object_index];
++                ++n;
++            }
++        }
++
++#if 1 && TRACE_ALL
++        av_log(s, AV_LOG_DEBUG, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
++               " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
++               av_frame_cropped_width(frame),
++               av_frame_cropped_height(frame),
++               desc->layers[0].format,
++               bo_handles[0],
++               bo_handles[1],
++               bo_handles[2],
++               bo_handles[3],
++               pitches[0],
++               pitches[1],
++               pitches[2],
++               pitches[3],
++               offsets[0],
++               offsets[1],
++               offsets[2],
++               offsets[3],
++               (long long)modifiers[0],
++               (long long)modifiers[1],
++               (long long)modifiers[2],
++               (long long)modifiers[3]
++               );
++#endif
++
++        if (drmModeAddFB2WithModifiers(de->drm_fd,
++                                         av_frame_cropped_width(frame),
++                                         av_frame_cropped_height(frame),
++                                         desc->layers[0].format, bo_handles,
++                                         pitches, offsets, modifiers,
++                                         &da->fb_handle, DRM_MODE_FB_MODIFIERS /** 0 if no mods */) != 0) {
++            av_log(s, AV_LOG_WARNING, "drmModeAddFB2WithModifiers failed: %s\n", ERRSTR);
++            return -1;
++        }
++    }
++
++    ret = drmModeSetPlane(de->drm_fd, de->setup.planeId, de->setup.crtcId,
++                              da->fb_handle, 0,
++                de->setup.compose.x, de->setup.compose.y,
++                de->setup.compose.width,
++                de->setup.compose.height,
++                0, 0,
++                av_frame_cropped_width(frame) << 16,
++                av_frame_cropped_height(frame) << 16);
++
++    if (ret != 0) {
++        av_log(s, AV_LOG_WARNING, "drmModeSetPlane failed: %s\n", ERRSTR);
++    }
++
++    de->ano = de->ano + 1 >= AUX_SIZE ? 0 : de->ano + 1;
++
++    return ret;
++}
++
++static int do_sem_wait(sem_t * const sem, const int nowait)
++{
++    while (nowait ? sem_trywait(sem) : sem_wait(sem)) {
++        if (errno != EINTR)
++            return -errno;
++    }
++    return 0;
++}
++
++static void * display_thread(void * v)
++{
++    AVFormatContext * const s = v;
++    drm_display_env_t * const de = s->priv_data;
++    int i;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++#endif
++
++    sem_post(&de->q_sem_out);
++
++    for (;;) {
++        AVFrame * frame;
++
++        do_sem_wait(&de->q_sem_in, 0);
++
++        if (de->q_terminate)
++            break;
++
++        frame = de->q_next;
++        de->q_next = NULL;
++        sem_post(&de->q_sem_out);
++
++        do_display(s, de, frame);
++    }
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++#endif
++
++    for (i = 0; i != AUX_SIZE; ++i)
++        da_uninit(de, de->aux + i);
++
++    av_frame_free(&de->q_next);
++
++    return NULL;
++}
++
++static int drm_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
++{
++    const AVFrame * const src_frame = (AVFrame *)pkt->data;
++    AVFrame * frame;
++    drm_display_env_t * const de = s->priv_data;
++    int ret;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s\n", __func__);
++#endif
++
++    if ((src_frame->flags & AV_FRAME_FLAG_CORRUPT) != 0) {
++        av_log(s, AV_LOG_WARNING, "Discard corrupt frame: fmt=%d, ts=%" PRId64 "\n", src_frame->format, src_frame->pts);
++        return 0;
++    }
++
++    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
++        frame = av_frame_alloc();
++        av_frame_ref(frame, src_frame);
++    }
++    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
++        frame = av_frame_alloc();
++        frame->format = AV_PIX_FMT_DRM_PRIME;
++        if (av_hwframe_map(frame, src_frame, 0) != 0)
++        {
++            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
++            av_frame_free(&frame);
++            return AVERROR(EINVAL);
++        }
++    }
++    else {
++        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
++        return AVERROR(EINVAL);
++    }
++
++    ret = do_sem_wait(&de->q_sem_out, !de->show_all);
++    if (ret) {
++        av_frame_free(&frame);
++    }
++    else {
++        de->q_next = frame;
++        sem_post(&de->q_sem_in);
++    }
++
++    return 0;
++}
++
++static int drm_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
++                          unsigned flags)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s: idx=%d, flags=%#x\n", __func__, stream_index, flags);
++#endif
++
++    /* drm_vout_write_header() should have accepted only supported formats */
++    if ((flags & AV_WRITE_UNCODED_FRAME_QUERY))
++        return 0;
++
++    return 0;
++}
++
++static int drm_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_DEBUG, "%s: %d\n", __func__, type);
++#endif
++    switch(type) {
++    case AV_APP_TO_DEV_WINDOW_REPAINT:
++        return 0;
++    default:
++        break;
++    }
++    return AVERROR(ENOSYS);
++}
++
++static int find_crtc(struct AVFormatContext * const avctx, int drmfd, struct drm_setup *s, uint32_t * const pConId)
++{
++   int ret = -1;
++   int i;
++   drmModeRes *res = drmModeGetResources(drmfd);
++   drmModeConnector *c;
++
++   if(!res)
++   {
++      printf( "drmModeGetResources failed: %s\n", ERRSTR);
++      return -1;
++   }
++
++   if (res->count_crtcs <= 0)
++   {
++      printf( "drm: no crts\n");
++      goto fail_res;
++   }
++
++   if (!s->conId) {
++      fprintf(stderr,
++         "No connector ID specified.  Choosing default from list:\n");
++
++      for (i = 0; i < res->count_connectors; i++) {
++         drmModeConnector *con =
++            drmModeGetConnector(drmfd, res->connectors[i]);
++         drmModeEncoder *enc = NULL;
++         drmModeCrtc *crtc = NULL;
++
++         if (con->encoder_id) {
++            enc = drmModeGetEncoder(drmfd, con->encoder_id);
++            if (enc->crtc_id) {
++               crtc = drmModeGetCrtc(drmfd, enc->crtc_id);
++            }
++         }
++
++         if (!s->conId && crtc) {
++            s->conId = con->connector_id;
++            s->crtcId = crtc->crtc_id;
++         }
++
++         av_log(avctx, AV_LOG_DEBUG, "Connector %d (crtc %d): type %d, %dx%d%s\n",
++                con->connector_id,
++                crtc ? crtc->crtc_id : 0,
++                con->connector_type,
++                crtc ? crtc->width : 0,
++                crtc ? crtc->height : 0,
++                (s->conId == (int)con->connector_id ?
++            " (chosen)" : ""));
++      }
++
++      if (!s->conId) {
++         av_log(avctx, AV_LOG_ERROR,
++            "No suitable enabled connector found.\n");
++         return -1;;
++      }
++   }
++
++   s->crtcIdx = -1;
++
++   for (i = 0; i < res->count_crtcs; ++i) {
++      if (s->crtcId == res->crtcs[i]) {
++         s->crtcIdx = i;
++         break;
++      }
++   }
++
++   if (s->crtcIdx == -1)
++   {
++       av_log(avctx, AV_LOG_WARNING, "drm: CRTC %u not found\n", s->crtcId);
++       goto fail_res;
++   }
++
++   if (res->count_connectors <= 0)
++   {
++       av_log(avctx, AV_LOG_WARNING, "drm: no connectors\n");
++       goto fail_res;
++   }
++
++   c = drmModeGetConnector(drmfd, s->conId);
++   if (!c)
++   {
++       av_log(avctx, AV_LOG_WARNING, "drmModeGetConnector failed: %s\n", ERRSTR);
++       goto fail_res;
++   }
++
++   if (!c->count_modes)
++   {
++       av_log(avctx, AV_LOG_WARNING, "connector supports no mode\n");
++       goto fail_conn;
++   }
++
++   {
++      drmModeCrtc *crtc = drmModeGetCrtc(drmfd, s->crtcId);
++      s->compose.x = crtc->x;
++      s->compose.y = crtc->y;
++      s->compose.width = crtc->width;
++      s->compose.height = crtc->height;
++      drmModeFreeCrtc(crtc);
++   }
++
++   if (pConId)
++      *pConId = c->connector_id;
++   ret = 0;
++
++fail_conn:
++   drmModeFreeConnector(c);
++
++fail_res:
++   drmModeFreeResources(res);
++
++   return ret;
++}
++
++// deinit is called if init fails so no need to clean up explicity here
++static int drm_vout_init(struct AVFormatContext * s)
++{
++    drm_display_env_t * const de = s->priv_data;
++    int rv;
++    const char * drm_module = DRM_MODULE;
++
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    de->drm_fd = -1;
++    de->con_id = 0;
++    de->setup = (struct drm_setup){0};
++    de->q_terminate = 0;
++
++    if ((de->drm_fd = drmOpen(drm_module, NULL)) < 0)
++    {
++        rv = AVERROR(errno);
++        av_log(s, AV_LOG_ERROR, "Failed to drmOpen %s: %s\n", drm_module, av_err2str(rv));
++        return rv;
++    }
++
++    if (find_crtc(s, de->drm_fd, &de->setup, &de->con_id) != 0)
++    {
++        av_log(s, AV_LOG_ERROR, "failed to find valid mode\n");
++        rv = AVERROR(EINVAL);
++        goto fail_close;
++    }
++
++    sem_init(&de->q_sem_in, 0, 0);
++    sem_init(&de->q_sem_out, 0, 0);
++    if (pthread_create(&de->q_thread, NULL, display_thread, s)) {
++        rv = AVERROR(errno);
++        av_log(s, AV_LOG_ERROR, "Failed to creatye display thread: %s\n", av_err2str(rv));
++        goto fail_close;
++    }
++
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++
++    return 0;
++
++fail_close:
++    close(de->drm_fd);
++    de->drm_fd = -1;
++    av_log(s, AV_LOG_DEBUG, ">>> %s: FAIL\n", __func__);
++
++    return rv;
++}
++
++static void drm_vout_deinit(struct AVFormatContext * s)
++{
++    drm_display_env_t * const de = s->priv_data;
++
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    de->q_terminate = 1;
++    sem_post(&de->q_sem_in);
++    pthread_join(de->q_thread, NULL);
++    sem_destroy(&de->q_sem_in);
++    sem_destroy(&de->q_sem_out);
++
++    for (unsigned int i = 0; i != AUX_SIZE; ++i)
++        da_uninit(de, de->aux + i);
++
++    av_frame_free(&de->q_next);
++
++    if (de->drm_fd >= 0) {
++        close(de->drm_fd);
++        de->drm_fd = -1;
++    }
++
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++}
++
++
++#define OFFSET(x) offsetof(drm_display_env_t, x)
++static const AVOption options[] = {
++    { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++    { NULL }
++};
++
++static const AVClass drm_vout_class = {
++    .class_name = "drm vid outdev",
++    .item_name  = av_default_item_name,
++    .option     = options,
++    .version    = LIBAVUTIL_VERSION_INT,
++    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
++};
++
++AVOutputFormat ff_vout_drm_muxer = {
++    .name           = "vout_drm",
++    .long_name      = NULL_IF_CONFIG_SMALL("Drm video output device"),
++    .priv_data_size = sizeof(drm_display_env_t),
++    .audio_codec    = AV_CODEC_ID_NONE,
++    .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
++    .write_header   = drm_vout_write_header,
++    .write_packet   = drm_vout_write_packet,
++    .write_uncoded_frame = drm_vout_write_frame,
++    .write_trailer  = drm_vout_write_trailer,
++    .control_message = drm_vout_control_message,
++    .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
++    .priv_class     = &drm_vout_class,
++    .init           = drm_vout_init,
++    .deinit         = drm_vout_deinit,
++};
++
+--- /dev/null
++++ b/libavdevice/egl_vout.c
+@@ -0,0 +1,825 @@
++/*
++ * Copyright (c) 2020 John Cox for Raspberry Pi Trading
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++
++// *** This module is a work in progress and its utility is strictly
++//     limited to testing.
++//     Amongst other issues it doesn't wait for the pic to be displayed before
++//     returning the buffer so flikering does occur.
++
++#include <epoxy/gl.h>
++#include <epoxy/egl.h>
++
++#include "libavutil/opt.h"
++#include "libavutil/avassert.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/imgutils.h"
++#include "libavutil/hwcontext_drm.h"
++#include "libavformat/internal.h"
++#include "avdevice.h"
++
++#include "pthread.h"
++#include <semaphore.h>
++#include <stdatomic.h>
++#include <unistd.h>
++
++#include "drm_fourcc.h"
++#include <drm.h>
++#include <drm_mode.h>
++#include <xf86drm.h>
++#include <xf86drmMode.h>
++#include <X11/Xlib.h>
++#include <X11/Xutil.h>
++#include <X11/Xlib-xcb.h>
++#include <xcb/xcb.h>
++#include <xcb/dri3.h>
++
++#include "libavutil/rpi_sand_fns.h"
++
++#define TRACE_ALL 0
++
++struct egl_setup {
++   int conId;
++
++   Display *dpy;
++   EGLDisplay egl_dpy;
++   EGLContext ctx;
++   EGLSurface surf;
++   Window win;
++
++   uint32_t crtcId;
++   int crtcIdx;
++   uint32_t planeId;
++   struct {
++       int x, y, width, height;
++   } compose;
++};
++
++typedef struct egl_aux_s {
++    int fd;
++    GLuint texture;
++
++} egl_aux_t;
++
++typedef struct egl_display_env_s
++{
++    AVClass *class;
++
++    struct egl_setup setup;
++    enum AVPixelFormat avfmt;
++
++    int show_all;
++    int window_width, window_height;
++    int window_x, window_y;
++    int fullscreen;
++
++    egl_aux_t aux[32];
++
++    pthread_t q_thread;
++    pthread_mutex_t q_lock;
++    sem_t display_start_sem;
++    sem_t q_sem;
++    int q_terminate;
++    AVFrame * q_this;
++    AVFrame * q_next;
++
++} egl_display_env_t;
++
++
++/**
++ * Remove window border/decorations.
++ */
++static void
++no_border( Display *dpy, Window w)
++{
++   static const unsigned MWM_HINTS_DECORATIONS = (1 << 1);
++   static const int PROP_MOTIF_WM_HINTS_ELEMENTS = 5;
++
++   typedef struct
++   {
++      unsigned long       flags;
++      unsigned long       functions;
++      unsigned long       decorations;
++      long                inputMode;
++      unsigned long       status;
++   } PropMotifWmHints;
++
++   PropMotifWmHints motif_hints;
++   Atom prop, proptype;
++   unsigned long flags = 0;
++
++   /* setup the property */
++   motif_hints.flags = MWM_HINTS_DECORATIONS;
++   motif_hints.decorations = flags;
++
++   /* get the atom for the property */
++   prop = XInternAtom( dpy, "_MOTIF_WM_HINTS", True );
++   if (!prop) {
++      /* something went wrong! */
++      return;
++   }
++
++   /* not sure this is correct, seems to work, XA_WM_HINTS didn't work */
++   proptype = prop;
++
++   XChangeProperty( dpy, w,                         /* display, window */
++                    prop, proptype,                 /* property, type */
++                    32,                             /* format: 32-bit datums */
++                    PropModeReplace,                /* mode */
++                    (unsigned char *) &motif_hints, /* data */
++                    PROP_MOTIF_WM_HINTS_ELEMENTS    /* nelements */
++                  );
++}
++
++
++/*
++ * Create an RGB, double-buffered window.
++ * Return the window and context handles.
++ */
++static int
++make_window(struct AVFormatContext * const s,
++            egl_display_env_t * const de,
++            Display *dpy, EGLDisplay egl_dpy, const char *name,
++            Window *winRet, EGLContext *ctxRet, EGLSurface *surfRet)
++{
++   int scrnum = DefaultScreen( dpy );
++   XSetWindowAttributes attr;
++   unsigned long mask;
++   Window root = RootWindow( dpy, scrnum );
++   Window win;
++   EGLContext ctx;
++   const int fullscreen = de->fullscreen;
++   EGLConfig config;
++   int x = de->window_x;
++   int y = de->window_y;
++   int width = de->window_width ? de->window_width : 1280;
++   int height = de->window_height ? de->window_height : 720;
++
++
++   if (fullscreen) {
++      int scrnum = DefaultScreen(dpy);
++
++      x = 0; y = 0;
++      width = DisplayWidth(dpy, scrnum);
++      height = DisplayHeight(dpy, scrnum);
++   }
++
++   {
++      EGLint num_configs;
++      static const EGLint attribs[] = {
++         EGL_RED_SIZE, 1,
++         EGL_GREEN_SIZE, 1,
++         EGL_BLUE_SIZE, 1,
++         EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
++         EGL_NONE
++      };
++
++      if (!eglChooseConfig(egl_dpy, attribs, &config, 1, &num_configs)) {
++         av_log(s, AV_LOG_ERROR, "Error: couldn't get an EGL visual config\n");
++         return -1;
++      }
++   }
++
++   {
++      EGLint vid;
++      if (!eglGetConfigAttrib(egl_dpy, config, EGL_NATIVE_VISUAL_ID, &vid)) {
++         av_log(s, AV_LOG_ERROR, "Error: eglGetConfigAttrib() failed\n");
++         return -1;
++      }
++
++      {
++         XVisualInfo visTemplate = {
++            .visualid = vid,
++         };
++         int num_visuals;
++         XVisualInfo *visinfo = XGetVisualInfo(dpy, VisualIDMask,
++                                               &visTemplate, &num_visuals);
++
++         /* window attributes */
++         attr.background_pixel = 0;
++         attr.border_pixel = 0;
++         attr.colormap = XCreateColormap( dpy, root, visinfo->visual, AllocNone);
++         attr.event_mask = StructureNotifyMask | ExposureMask | KeyPressMask;
++         /* XXX this is a bad way to get a borderless window! */
++         mask = CWBackPixel | CWBorderPixel | CWColormap | CWEventMask;
++
++         win = XCreateWindow( dpy, root, x, y, width, height,
++                              0, visinfo->depth, InputOutput,
++                              visinfo->visual, mask, &attr );
++         XFree(visinfo);
++      }
++   }
++
++   if (fullscreen)
++      no_border(dpy, win);
++
++   /* set hints and properties */
++   {
++      XSizeHints sizehints;
++      sizehints.x = x;
++      sizehints.y = y;
++      sizehints.width  = width;
++      sizehints.height = height;
++      sizehints.flags = USSize | USPosition;
++      XSetNormalHints(dpy, win, &sizehints);
++      XSetStandardProperties(dpy, win, name, name,
++                              None, (char **)NULL, 0, &sizehints);
++   }
++
++   eglBindAPI(EGL_OPENGL_ES_API);
++
++   {
++      static const EGLint ctx_attribs[] = {
++         EGL_CONTEXT_CLIENT_VERSION, 2,
++         EGL_NONE
++      };
++      ctx = eglCreateContext(egl_dpy, config, EGL_NO_CONTEXT, ctx_attribs );
++      if (!ctx) {
++         av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
++         return -1;
++      }
++   }
++
++
++   XMapWindow(dpy, win);
++
++   {
++      EGLSurface surf = eglCreateWindowSurface(egl_dpy, config,
++                                               (void *)(uintptr_t)win, NULL);
++      if (!surf) {
++         av_log(s, AV_LOG_ERROR, "Error: eglCreateWindowSurface failed\n");
++         return -1;
++      }
++
++      if (!eglMakeCurrent(egl_dpy, surf, surf, ctx)) {
++         av_log(s, AV_LOG_ERROR, "Error: eglCreateContext failed\n");
++         return -1;
++      }
++
++      *winRet = win;
++      *ctxRet = ctx;
++      *surfRet = surf;
++   }
++
++   return 0;
++}
++
++static GLint
++compile_shader(struct AVFormatContext * const avctx, GLenum target, const char *source)
++{
++   GLuint s = glCreateShader(target);
++
++   if (s == 0) {
++      av_log(avctx, AV_LOG_ERROR, "Failed to create shader\n");
++      return 0;
++   }
++
++   glShaderSource(s, 1, (const GLchar **) &source, NULL);
++   glCompileShader(s);
++
++   {
++      GLint ok;
++      glGetShaderiv(s, GL_COMPILE_STATUS, &ok);
++
++      if (!ok) {
++         GLchar *info;
++         GLint size;
++
++         glGetShaderiv(s, GL_INFO_LOG_LENGTH, &size);
++         info = malloc(size);
++
++         glGetShaderInfoLog(s, size, NULL, info);
++         av_log(avctx, AV_LOG_ERROR, "Failed to compile shader: %ssource:\n%s\n", info, source);
++
++         return 0;
++      }
++   }
++
++   return s;
++}
++
++static GLuint link_program(struct AVFormatContext * const s, GLint vs, GLint fs)
++{
++   GLuint prog = glCreateProgram();
++
++   if (prog == 0) {
++      av_log(s, AV_LOG_ERROR, "Failed to create program\n");
++      return 0;
++   }
++
++   glAttachShader(prog, vs);
++   glAttachShader(prog, fs);
++   glLinkProgram(prog);
++
++   {
++      GLint ok;
++      glGetProgramiv(prog, GL_LINK_STATUS, &ok);
++      if (!ok) {
++         /* Some drivers return a size of 1 for an empty log.  This is the size
++          * of a log that contains only a terminating NUL character.
++          */
++         GLint size;
++         GLchar *info = NULL;
++         glGetProgramiv(prog, GL_INFO_LOG_LENGTH, &size);
++         if (size > 1) {
++            info = malloc(size);
++            glGetProgramInfoLog(prog, size, NULL, info);
++         }
++
++         av_log(s, AV_LOG_ERROR, "Failed to link: %s\n",
++                 (info != NULL) ? info : "<empty log>");
++         return 0;
++      }
++   }
++
++   return prog;
++}
++
++static int
++gl_setup(struct AVFormatContext * const s)
++{
++   const char *vs =
++      "attribute vec4 pos;\n"
++      "varying vec2 texcoord;\n"
++      "\n"
++      "void main() {\n"
++      "  gl_Position = pos;\n"
++      "  texcoord.x = (pos.x + 1.0) / 2.0;\n"
++      "  texcoord.y = (-pos.y + 1.0) / 2.0;\n"
++      "}\n";
++   const char *fs =
++      "#extension GL_OES_EGL_image_external : enable\n"
++      "precision mediump float;\n"
++      "uniform samplerExternalOES s;\n"
++      "varying vec2 texcoord;\n"
++      "void main() {\n"
++      "  gl_FragColor = texture2D(s, texcoord);\n"
++      "}\n";
++
++   GLuint vs_s;
++   GLuint fs_s;
++   GLuint prog;
++
++   if (!(vs_s = compile_shader(s, GL_VERTEX_SHADER, vs)) ||
++       !(fs_s = compile_shader(s, GL_FRAGMENT_SHADER, fs)) ||
++       !(prog = link_program(s, vs_s, fs_s)))
++      return -1;
++
++   glUseProgram(prog);
++
++   {
++      static const float verts[] = {
++         -1, -1,
++         1, -1,
++         1, 1,
++         -1, 1,
++      };
++      glVertexAttribPointer(0, 2, GL_FLOAT, GL_FALSE, 0, verts);
++   }
++
++   glEnableVertexAttribArray(0);
++   return 0;
++}
++
++static int egl_vout_write_trailer(AVFormatContext *s)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++
++    return 0;
++}
++
++static int egl_vout_write_header(AVFormatContext *s)
++{
++    const AVCodecParameters * const par = s->streams[0]->codecpar;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++    if (   s->nb_streams > 1
++        || par->codec_type != AVMEDIA_TYPE_VIDEO
++        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
++        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
++        return AVERROR(EINVAL);
++    }
++
++    return 0;
++}
++
++
++static int do_display(AVFormatContext * const s, egl_display_env_t * const de, AVFrame * const frame)
++{
++    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor*)frame->data[0];
++    egl_aux_t * da = NULL;
++    unsigned int i;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
++#endif
++
++    for (i = 0; i != 32; ++i) {
++        if (de->aux[i].fd == -1 || de->aux[i].fd == desc->objects[0].fd) {
++            da = de->aux + i;
++            break;
++        }
++    }
++
++    if (da == NULL) {
++        av_log(s, AV_LOG_INFO, "%s: Out of handles\n", __func__);
++        return AVERROR(EINVAL);
++    }
++
++    if (da->texture == 0) {
++        EGLint attribs[50];
++        EGLint * a = attribs;
++        int i, j;
++        static const EGLint anames[] = {
++           EGL_DMA_BUF_PLANE0_FD_EXT,
++           EGL_DMA_BUF_PLANE0_OFFSET_EXT,
++           EGL_DMA_BUF_PLANE0_PITCH_EXT,
++           EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT,
++           EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT,
++           EGL_DMA_BUF_PLANE1_FD_EXT,
++           EGL_DMA_BUF_PLANE1_OFFSET_EXT,
++           EGL_DMA_BUF_PLANE1_PITCH_EXT,
++           EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT,
++           EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT,
++           EGL_DMA_BUF_PLANE2_FD_EXT,
++           EGL_DMA_BUF_PLANE2_OFFSET_EXT,
++           EGL_DMA_BUF_PLANE2_PITCH_EXT,
++           EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT,
++           EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT,
++        };
++        const EGLint * b = anames;
++
++        *a++ = EGL_WIDTH;
++        *a++ = av_frame_cropped_width(frame);
++        *a++ = EGL_HEIGHT;
++        *a++ = av_frame_cropped_height(frame);
++        *a++ = EGL_LINUX_DRM_FOURCC_EXT;
++        *a++ = desc->layers[0].format;
++
++        for (i = 0; i < desc->nb_layers; ++i) {
++            for (j = 0; j < desc->layers[i].nb_planes; ++j) {
++                const AVDRMPlaneDescriptor * const p = desc->layers[i].planes + j;
++                const AVDRMObjectDescriptor * const obj = desc->objects + p->object_index;
++                *a++ = *b++;
++                *a++ = obj->fd;
++                *a++ = *b++;
++                *a++ = p->offset;
++                *a++ = *b++;
++                *a++ = p->pitch;
++                if (obj->format_modifier == 0) {
++                   b += 2;
++                }
++                else {
++                   *a++ = *b++;
++                   *a++ = (EGLint)(obj->format_modifier & 0xFFFFFFFF);
++                   *a++ = *b++;
++                   *a++ = (EGLint)(obj->format_modifier >> 32);
++                }
++            }
++        }
++
++        *a = EGL_NONE;
++
++#if TRACE_ALL
++        for (a = attribs, i = 0; *a != EGL_NONE; a += 2, ++i) {
++           av_log(s, AV_LOG_INFO, "[%2d] %4x: %d\n", i, a[0], a[1]);
++        }
++#endif
++        {
++           const EGLImage image = eglCreateImageKHR(de->setup.egl_dpy,
++                                              EGL_NO_CONTEXT,
++                                              EGL_LINUX_DMA_BUF_EXT,
++                                              NULL, attribs);
++           if (!image) {
++              av_log(s, AV_LOG_ERROR, "Failed to import fd %d\n", desc->objects[0].fd);
++              return -1;
++           }
++
++           glGenTextures(1, &da->texture);
++           glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
++           glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
++           glTexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
++           glEGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, image);
++
++           eglDestroyImageKHR(de->setup.egl_dpy, image);
++        }
++
++        da->fd = desc->objects[0].fd;
++
++#if 0
++        av_log(s, AV_LOG_INFO, "%dx%d, fmt: %x, boh=%d,%d,%d,%d, pitch=%d,%d,%d,%d,"
++               " offset=%d,%d,%d,%d, mod=%llx,%llx,%llx,%llx\n",
++               av_frame_cropped_width(frame),
++               av_frame_cropped_height(frame),
++               desc->layers[0].format,
++               bo_plane_handles[0],
++               bo_plane_handles[1],
++               bo_plane_handles[2],
++               bo_plane_handles[3],
++               pitches[0],
++               pitches[1],
++               pitches[2],
++               pitches[3],
++               offsets[0],
++               offsets[1],
++               offsets[2],
++               offsets[3],
++               (long long)modifiers[0],
++               (long long)modifiers[1],
++               (long long)modifiers[2],
++               (long long)modifiers[3]
++               );
++#endif
++    }
++
++    glClearColor(0.5, 0.5, 0.5, 0.5);
++    glClear(GL_COLOR_BUFFER_BIT);
++
++    glBindTexture(GL_TEXTURE_EXTERNAL_OES, da->texture);
++    glDrawArrays(GL_TRIANGLE_FAN, 0, 4);
++    eglSwapBuffers(de->setup.egl_dpy, de->setup.surf);
++
++    glDeleteTextures(1, &da->texture);
++    da->texture = 0;
++    da->fd = -1;
++
++    return 0;
++}
++
++static void * display_thread(void * v)
++{
++    AVFormatContext * const s = v;
++    egl_display_env_t * const de = s->priv_data;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "<<< %s\n", __func__);
++#endif
++    {
++       EGLint egl_major, egl_minor;
++
++       de->setup.dpy = XOpenDisplay(NULL);
++       if (!de->setup.dpy) {
++          av_log(s, AV_LOG_ERROR, "Couldn't open X display\n");
++          goto fail;
++       }
++
++       de->setup.egl_dpy = eglGetDisplay(de->setup.dpy);
++       if (!de->setup.egl_dpy) {
++          av_log(s, AV_LOG_ERROR, "eglGetDisplay() failed\n");
++          goto fail;
++       }
++
++       if (!eglInitialize(de->setup.egl_dpy, &egl_major, &egl_minor)) {
++           av_log(s, AV_LOG_ERROR, "Error: eglInitialize() failed\n");
++           goto fail;
++       }
++
++       av_log(s, AV_LOG_INFO, "EGL version %d.%d\n", egl_major, egl_minor);
++
++       if (!epoxy_has_egl_extension(de->setup.egl_dpy, "EGL_KHR_image_base")) {
++          av_log(s, AV_LOG_ERROR, "Missing EGL KHR image extension\n");
++          goto fail;
++       }
++    }
++
++    if (!de->window_width || !de->window_height) {
++       de->window_width = 1280;
++       de->window_height = 720;
++    }
++    if (make_window(s, de, de->setup.dpy, de->setup.egl_dpy, "ffmpeg-vout",
++                    &de->setup.win, &de->setup.ctx, &de->setup.surf)) {
++       av_log(s, AV_LOG_ERROR, "%s: make_window failed\n", __func__);
++       goto fail;
++    }
++
++    if (gl_setup(s)) {
++       av_log(s, AV_LOG_ERROR, "%s: gl_setup failed\n", __func__);
++       goto fail;
++    }
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "--- %s: Start done\n", __func__);
++#endif
++    sem_post(&de->display_start_sem);
++
++    for (;;) {
++        AVFrame * frame;
++
++        while (sem_wait(&de->q_sem) != 0) {
++            av_assert0(errno == EINTR);
++        }
++
++        if (de->q_terminate)
++            break;
++
++        pthread_mutex_lock(&de->q_lock);
++        frame = de->q_next;
++        de->q_next = NULL;
++        pthread_mutex_unlock(&de->q_lock);
++
++        do_display(s, de, frame);
++
++        av_frame_free(&de->q_this);
++        de->q_this = frame;
++    }
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, ">>> %s\n", __func__);
++#endif
++
++    return NULL;
++
++fail:
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, ">>> %s: FAIL\n", __func__);
++#endif
++    de->q_terminate = 1;
++    sem_post(&de->display_start_sem);
++
++    return NULL;
++}
++
++static int egl_vout_write_packet(AVFormatContext *s, AVPacket *pkt)
++{
++    const AVFrame * const src_frame = (AVFrame *)pkt->data;
++    AVFrame * frame;
++    egl_display_env_t * const de = s->priv_data;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++
++    if (src_frame->format == AV_PIX_FMT_DRM_PRIME) {
++        frame = av_frame_alloc();
++        av_frame_ref(frame, src_frame);
++    }
++    else if (src_frame->format == AV_PIX_FMT_VAAPI) {
++        frame = av_frame_alloc();
++        frame->format = AV_PIX_FMT_DRM_PRIME;
++        if (av_hwframe_map(frame, src_frame, 0) != 0)
++        {
++            av_log(s, AV_LOG_WARNING, "Failed to map frame (format=%d) to DRM_PRiME\n", src_frame->format);
++            av_frame_free(&frame);
++            return AVERROR(EINVAL);
++        }
++    }
++    else {
++        av_log(s, AV_LOG_WARNING, "Frame (format=%d) not DRM_PRiME\n", src_frame->format);
++        return AVERROR(EINVAL);
++    }
++
++    // Really hacky sync
++    while (de->show_all && de->q_next) {
++       usleep(3000);
++    }
++
++    pthread_mutex_lock(&de->q_lock);
++    {
++        AVFrame * const t = de->q_next;
++        de->q_next = frame;
++        frame = t;
++    }
++    pthread_mutex_unlock(&de->q_lock);
++
++    if (frame == NULL)
++        sem_post(&de->q_sem);
++    else
++        av_frame_free(&frame);
++
++    return 0;
++}
++
++static int egl_vout_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
++                          unsigned flags)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s: idx=%d, flags=%#x\n", __func__, stream_index, flags);
++#endif
++
++    /* egl_vout_write_header() should have accepted only supported formats */
++    if ((flags & AV_WRITE_UNCODED_FRAME_QUERY))
++        return 0;
++
++    return 0;
++}
++
++static int egl_vout_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type);
++#endif
++    switch(type) {
++    case AV_APP_TO_DEV_WINDOW_REPAINT:
++        return 0;
++    default:
++        break;
++    }
++    return AVERROR(ENOSYS);
++}
++
++// deinit is called if init fails so no need to clean up explicity here
++static int egl_vout_init(struct AVFormatContext * s)
++{
++    egl_display_env_t * const de = s->priv_data;
++    unsigned int i;
++
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    de->setup = (struct egl_setup){0};
++
++    for (i = 0; i != 32; ++i) {
++        de->aux[i].fd = -1;
++    }
++
++    de->q_terminate = 0;
++    pthread_mutex_init(&de->q_lock, NULL);
++    sem_init(&de->q_sem, 0, 0);
++    sem_init(&de->display_start_sem, 0, 0);
++    av_assert0(pthread_create(&de->q_thread, NULL, display_thread, s) == 0);
++
++    sem_wait(&de->display_start_sem);
++    if (de->q_terminate) {
++       av_log(s, AV_LOG_ERROR, "%s: Display startup failure\n", __func__);
++       return -1;
++    }
++
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++
++    return 0;
++}
++
++static void egl_vout_deinit(struct AVFormatContext * s)
++{
++    egl_display_env_t * const de = s->priv_data;
++
++    av_log(s, AV_LOG_DEBUG, "<<< %s\n", __func__);
++
++    de->q_terminate = 1;
++    sem_post(&de->q_sem);
++    pthread_join(de->q_thread, NULL);
++    sem_destroy(&de->q_sem);
++    pthread_mutex_destroy(&de->q_lock);
++
++    av_frame_free(&de->q_next);
++    av_frame_free(&de->q_this);
++
++    av_log(s, AV_LOG_DEBUG, ">>> %s\n", __func__);
++}
++
++#define OFFSET(x) offsetof(egl_display_env_t, x)
++static const AVOption options[] = {
++   { "show_all", "show all frames", OFFSET(show_all), AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++   { "window_size",  "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
++   { "window_x",     "set window x offset",    OFFSET(window_x),     AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++   { "window_y",     "set window y offset",    OFFSET(window_y),     AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++   { "fullscreen",   "set fullscreen display", OFFSET(fullscreen),   AV_OPT_TYPE_BOOL,   {.i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++    { NULL }
++
++};
++
++static const AVClass egl_vout_class = {
++    .class_name = "egl vid outdev",
++    .item_name  = av_default_item_name,
++    .option     = options,
++    .version    = LIBAVUTIL_VERSION_INT,
++    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
++};
++
++AVOutputFormat ff_vout_egl_muxer = {
++    .name           = "vout_egl",
++    .long_name      = NULL_IF_CONFIG_SMALL("Egl video output device"),
++    .priv_data_size = sizeof(egl_display_env_t),
++    .audio_codec    = AV_CODEC_ID_NONE,
++    .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
++    .write_header   = egl_vout_write_header,
++    .write_packet   = egl_vout_write_packet,
++    .write_uncoded_frame = egl_vout_write_frame,
++    .write_trailer  = egl_vout_write_trailer,
++    .control_message = egl_vout_control_message,
++    .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
++    .priv_class     = &egl_vout_class,
++    .init           = egl_vout_init,
++    .deinit         = egl_vout_deinit,
++};
++
+--- /dev/null
++++ b/libavdevice/rpi_vout.c
+@@ -0,0 +1,534 @@
++/*
++ * Copyright (c) 2013 Jeff Moguillansky
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * XVideo output device
++ *
++ * TODO:
++ * - add support to more formats
++ */
++
++#include "libavutil/opt.h"
++#include "libavutil/avassert.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/imgutils.h"
++#include "libavformat/internal.h"
++#include "avdevice.h"
++
++#include <stdatomic.h>
++#include <unistd.h>
++
++#pragma GCC diagnostic push
++// Many many redundant decls in the header files
++#pragma GCC diagnostic ignored "-Wredundant-decls"
++#include <bcm_host.h>
++#include <interface/mmal/mmal.h>
++#include <interface/mmal/mmal_parameters_camera.h>
++#include <interface/mmal/mmal_buffer.h>
++#include <interface/mmal/mmal_port.h>
++#include <interface/mmal/util/mmal_util.h>
++#include <interface/mmal/util/mmal_default_components.h>
++#include <interface/mmal/util/mmal_connection.h>
++#include <interface/mmal/util/mmal_util_params.h>
++#pragma GCC diagnostic pop
++#include "libavutil/rpi_sand_fns.h"
++#include "libavcodec/rpi_zc.h"
++
++#define TRACE_ALL 0
++
++#define DISPLAY_PORT_DEPTH 4
++
++typedef struct rpi_display_env_s
++{
++    AVClass *class;
++
++    MMAL_COMPONENT_T* display;
++    MMAL_COMPONENT_T* isp;
++    MMAL_PORT_T * port_in;  // Input port of either isp or display depending on pipe setup
++    MMAL_CONNECTION_T * conn;
++
++    MMAL_POOL_T *rpi_pool;
++    volatile int rpi_display_count;
++
++    MMAL_FOURCC_T req_fmt;
++    MMAL_VIDEO_FORMAT_T req_vfmt;
++
++    AVZcEnvPtr zc;
++
++    int window_width, window_height;
++    int window_x, window_y;
++    int layer, fullscreen;
++    int show_all;
++} rpi_display_env_t;
++
++
++static void display_cb_input(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) {
++    mmal_buffer_header_release(buffer);
++}
++
++static void display_cb_control(MMAL_PORT_T *port,MMAL_BUFFER_HEADER_T *buffer) {
++    mmal_buffer_header_release(buffer);
++}
++
++
++static MMAL_FOURCC_T mmfmt_from_avfmt(const enum AVPixelFormat fmt)
++{
++    switch (fmt) {
++    case AV_PIX_FMT_SAND128:
++    case AV_PIX_FMT_RPI4_8:
++        return MMAL_ENCODING_YUVUV128;
++    case AV_PIX_FMT_RPI4_10:
++        return MMAL_ENCODING_YUV10_COL;
++    case AV_PIX_FMT_SAND64_10:
++        return MMAL_ENCODING_YUVUV64_10;
++    case AV_PIX_FMT_SAND64_16:
++        return MMAL_ENCODING_YUVUV64_16;
++    case AV_PIX_FMT_YUV420P:
++        return MMAL_ENCODING_I420;
++
++    default:
++        break;
++    }
++    return 0;
++}
++
++
++static void video_format_from_zc_frame(MMAL_ES_FORMAT_T* const es_fmt,
++                                       const AVFrame * const frame, const AVRpiZcRefPtr fr_ref)
++{
++    MMAL_VIDEO_FORMAT_T *const vfmt = &es_fmt->es->video;
++    const AVRpiZcFrameGeometry * geo = av_rpi_zc_geometry(fr_ref);
++    if (av_rpi_is_sand_format(geo->format)) {
++        // Sand formats are a bit "special"
++        // stride1 implicit in format
++        // width = stride2
++        vfmt->width = geo->stripe_is_yc ?
++            geo->height_y + geo->height_c : geo->height_y;
++//        es->height = geo->video_height;  //*** When we get the FLAG this will change
++        vfmt->height = geo->height_y;
++        es_fmt->flags = MMAL_ES_FORMAT_FLAG_COL_FMTS_WIDTH_IS_COL_STRIDE;
++    }
++    else {
++        vfmt->width = geo->stride_y / geo->bytes_per_pel;
++        vfmt->height = geo->height_y;
++        es_fmt->flags = 0;
++    }
++
++    es_fmt->type = MMAL_ES_TYPE_VIDEO;
++    es_fmt->encoding = mmfmt_from_avfmt(geo->format);
++    es_fmt->encoding_variant = 0;
++    es_fmt->bitrate = 0;
++
++    vfmt->crop.x = frame->crop_left;
++    vfmt->crop.y = frame->crop_top;
++    vfmt->crop.width = av_frame_cropped_width(frame);
++    vfmt->crop.height = av_frame_cropped_height(frame);
++
++    vfmt->frame_rate.den = 0;  // Don't think I know it here
++    vfmt->frame_rate.num = 0;
++
++    vfmt->par.den = frame->sample_aspect_ratio.den;
++    vfmt->par.num = frame->sample_aspect_ratio.num;
++
++    vfmt->color_space = 0;  // Unknown currently
++}
++
++static MMAL_BOOL_T buf_release_cb(MMAL_BUFFER_HEADER_T * buf, void *userdata)
++{
++    rpi_display_env_t * const de = userdata;
++    if (buf->user_data != NULL) {
++        av_rpi_zc_unref((AVRpiZcRefPtr)buf->user_data);
++        buf->user_data = NULL;
++    }
++    atomic_fetch_add(&de->rpi_display_count, -1);
++    return MMAL_FALSE;
++}
++
++static inline int avfmt_needs_isp(const enum AVPixelFormat avfmt)
++{
++    return avfmt == AV_PIX_FMT_SAND64_10;
++}
++
++static void isp_remove(AVFormatContext * const s, rpi_display_env_t * const de)
++{
++    if (de->isp != NULL)
++    {
++        if (de->isp->input[0]->is_enabled)
++            mmal_port_disable(de->isp->input[0]);
++        if (de->isp->control->is_enabled)
++            mmal_port_disable(de->isp->control);
++    }
++    if (de->conn != NULL) {
++        mmal_connection_destroy(de->conn);
++        de->conn = NULL;
++    }
++    if (de->isp != NULL) {
++        mmal_component_destroy(de->isp);
++        de->isp = NULL;
++    }
++}
++
++static void display_frame(AVFormatContext * const s, rpi_display_env_t * const de, const AVFrame* const fr)
++{
++    MMAL_BUFFER_HEADER_T* buf = NULL;
++    AVRpiZcRefPtr fr_buf = NULL;
++
++    if (de == NULL)
++        return;
++
++    if (atomic_load(&de->rpi_display_count) >= DISPLAY_PORT_DEPTH - 1) {
++        av_log(s, AV_LOG_VERBOSE, "Frame dropped\n");
++        return;
++    }
++
++    if ((fr_buf = av_rpi_zc_ref(s, de->zc, fr, fr->format, 1)) == NULL) {
++        return;
++    }
++
++    buf = mmal_queue_get(de->rpi_pool->queue);
++    if (!buf) {
++        // Running too fast so drop the frame (unexpected)
++        goto fail;
++    }
++
++    buf->cmd = 0;
++    buf->offset = 0;
++    buf->flags = 0;
++    mmal_buffer_header_reset(buf);
++
++    atomic_fetch_add(&de->rpi_display_count, 1);  // Deced on release
++    mmal_buffer_header_pre_release_cb_set(buf, buf_release_cb, de);
++
++    buf->user_data = fr_buf;
++    buf->data = (uint8_t *)av_rpi_zc_vc_handle(fr_buf);  // Cast our handle to a pointer for mmal
++    buf->offset = av_rpi_zc_offset(fr_buf);
++    buf->length = av_rpi_zc_length(fr_buf);
++    buf->alloc_size = av_rpi_zc_numbytes(fr_buf);
++
++    while (de->show_all && atomic_load(&de->rpi_display_count) >= DISPLAY_PORT_DEPTH - 1) {
++        usleep(5000);
++    }
++
++    {
++        MMAL_ES_SPECIFIC_FORMAT_T new_ess = {.video = {0}};
++        MMAL_ES_FORMAT_T new_es = {.es = &new_ess};
++		MMAL_VIDEO_FORMAT_T * const new_vfmt = &new_ess.video;
++
++        video_format_from_zc_frame(&new_es, fr, fr_buf);
++        if (de->req_fmt != new_es.encoding ||
++            de->req_vfmt.width       != new_vfmt->width ||
++            de->req_vfmt.height      != new_vfmt->height ||
++            de->req_vfmt.crop.x      != new_vfmt->crop.x ||
++            de->req_vfmt.crop.y      != new_vfmt->crop.y ||
++            de->req_vfmt.crop.width  != new_vfmt->crop.width ||
++            de->req_vfmt.crop.height != new_vfmt->crop.height) {
++            // Something has changed
++
++            // If we have an ISP tear it down
++            isp_remove(s, de);
++            de->port_in = de->display->input[0];
++
++            // If we still need an ISP create it now
++            if (avfmt_needs_isp(fr->format))
++            {
++                if (mmal_component_create("vc.ril.isp", &de->isp) != MMAL_SUCCESS)
++                {
++                    av_log(s, AV_LOG_ERROR, "ISP creation failed\n");
++                    goto fail;
++                }
++                de->port_in = de->isp->input[0];
++            }
++
++            mmal_format_copy(de->port_in->format, &new_es);
++
++            if (mmal_port_format_commit(de->port_in)) {
++                av_log(s, AV_LOG_ERROR, "Failed to commit input format\n");
++                goto fail;
++            }
++
++            // If we have an ISP then we must want to use it
++            if (de->isp != NULL) {
++                MMAL_PORT_T * const port_out = de->isp->output[0];
++                MMAL_VIDEO_FORMAT_T* vfmt_in = &de->port_in->format->es->video;
++                MMAL_VIDEO_FORMAT_T* vfmt_out = &port_out->format->es->video;
++
++                port_out->format->type = MMAL_ES_TYPE_VIDEO;
++                port_out->format->encoding  = MMAL_ENCODING_YUVUV128;
++                port_out->format->encoding_variant = 0;
++                port_out->format->bitrate = 0;
++                port_out->format->flags = 0;
++                port_out->format->extradata = NULL;
++                port_out->format->extradata_size = 0;
++
++                vfmt_out->width       = (vfmt_in->crop.width + 31) & ~31;
++                vfmt_out->height      = (vfmt_in->crop.height + 15) & ~15;
++                vfmt_out->crop.x      = 0;
++                vfmt_out->crop.y      = 0;
++                vfmt_out->crop.width  = vfmt_in->crop.width;
++                vfmt_out->crop.height = vfmt_in->crop.height;
++                vfmt_out->frame_rate  = vfmt_in->frame_rate;
++                vfmt_out->par         = vfmt_in->par;
++                vfmt_out->color_space = vfmt_in->color_space;
++
++                if (mmal_port_format_commit(port_out)) {
++                    av_log(s, AV_LOG_ERROR, "Failed to commit output format\n");
++                    goto fail;
++                }
++
++                if (mmal_connection_create(&de->conn, port_out, de->display->input[0], MMAL_CONNECTION_FLAG_TUNNELLING) != MMAL_SUCCESS) {
++                    av_log(s, AV_LOG_ERROR, "Failed to create connection\n");
++                    goto fail;
++                }
++                if (mmal_connection_enable(de->conn) != MMAL_SUCCESS) {
++                    av_log(s, AV_LOG_ERROR, "Failed to enable connection\n");
++                    goto fail;
++                }
++                mmal_port_enable(de->isp->control,display_cb_control);
++                mmal_component_enable(de->isp);
++            }
++
++            // Number of slots in my port Q
++            de->port_in->buffer_num = DISPLAY_PORT_DEPTH;
++            // Size to keep it happy - isn't used for anything other than error checking
++            de->port_in->buffer_size = buf->alloc_size;
++            if (!de->port_in->is_enabled)
++            {
++                mmal_port_parameter_set_boolean(de->port_in, MMAL_PARAMETER_ZERO_COPY, MMAL_TRUE); // Does this mark that the buffer contains a vc_handle?  Would have expected a vc_image?
++                if (mmal_port_enable(de->port_in, display_cb_input) != MMAL_SUCCESS) {
++                    av_log(s, AV_LOG_ERROR, "Failed to enable input port\n");
++                    goto fail;
++                }
++            }
++
++            de->req_fmt  = new_es.encoding;
++            de->req_vfmt = *new_vfmt;
++        }
++    }
++
++    if (mmal_port_send_buffer(de->port_in, buf) != MMAL_SUCCESS)
++    {
++        av_log(s, AV_LOG_ERROR, "mmal_port_send_buffer failed: depth=%d\n", de->rpi_display_count);
++        goto fail;
++    }
++    return;
++
++fail:
++    // If we have a buf then fr_buf is held by that
++    if (buf != NULL)
++        mmal_buffer_header_release(buf);
++    else if (fr_buf != NULL)
++        av_rpi_zc_unref(fr_buf);
++}
++
++
++static int xv_write_trailer(AVFormatContext *s)
++{
++    rpi_display_env_t * const de = s->priv_data;
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++    if (de->port_in != NULL && de->port_in->is_enabled) {
++        mmal_port_disable(de->port_in);
++    }
++
++    // The above disable should kick out all buffers - check that
++    if (atomic_load(&de->rpi_display_count) != 0) {
++        av_log(s, AV_LOG_WARNING, "Exiting with display count non-zero:%d\n", atomic_load(&de->rpi_display_count));
++    }
++
++    isp_remove(s, de);
++    if (de->rpi_pool != NULL) {
++        mmal_pool_destroy(de->rpi_pool);
++        de->rpi_pool = NULL;
++    }
++    if (de->display != NULL) {
++        mmal_component_destroy(de->display);
++        de->display = NULL;
++    }
++
++    return 0;
++}
++
++static int xv_write_header(AVFormatContext *s)
++{
++    rpi_display_env_t * const de = s->priv_data;
++    const AVCodecParameters * const par = s->streams[0]->codecpar;
++    const unsigned int w = de->window_width ? de->window_width : par->width;
++    const unsigned int h = de->window_height ? de->window_height : par->height;
++    const unsigned int x = de->window_x;
++    const unsigned int y = de->window_y;
++    const int layer = de->layer ? de->layer : 2;
++    const MMAL_BOOL_T fullscreen = de->fullscreen;
++
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s: %dx%d\n", __func__, w, h);
++#endif
++    if (   s->nb_streams > 1
++        || par->codec_type != AVMEDIA_TYPE_VIDEO
++        || par->codec_id   != AV_CODEC_ID_WRAPPED_AVFRAME) {
++        av_log(s, AV_LOG_ERROR, "Only supports one wrapped avframe stream\n");
++        return AVERROR(EINVAL);
++    }
++
++    {
++        MMAL_DISPLAYREGION_T region =
++        {
++            .hdr = {MMAL_PARAMETER_DISPLAYREGION, sizeof(region)},
++            .set = MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_FULLSCREEN |
++                MMAL_DISPLAY_SET_DEST_RECT | MMAL_DISPLAY_SET_ALPHA,
++            .layer = layer,
++            .fullscreen = fullscreen,
++            .dest_rect = {x, y, w, h},
++            .alpha = !fullscreen ? 0xff : 0xff | MMAL_DISPLAY_ALPHA_FLAGS_DISCARD_LOWER_LAYERS,
++        };
++
++        bcm_host_init();  // Needs to be done by someone...
++
++        if (mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &de->display) != MMAL_SUCCESS)
++        {
++            av_log(s, AV_LOG_ERROR, "Failed to create display component\n");
++            goto fail;
++        }
++        de->port_in = de->display->input[0];
++
++        mmal_port_parameter_set(de->display->input[0], &region.hdr);
++
++        if (mmal_component_enable(de->display) != MMAL_SUCCESS)
++        {
++            av_log(s, AV_LOG_ERROR, "Failed to enable display component\n");
++            goto fail;
++        }
++        if (mmal_port_enable(de->display->control,display_cb_control) != MMAL_SUCCESS)
++        {
++            av_log(s, AV_LOG_ERROR, "Failed to enable display control port\n");
++            goto fail;
++        }
++
++        if ((de->rpi_pool = mmal_pool_create(DISPLAY_PORT_DEPTH, 0)) == NULL)
++        {
++            av_log(s, AV_LOG_ERROR, "Failed to create pool\n");
++            goto fail;
++        }
++    }
++
++    return 0;
++
++fail:
++    xv_write_trailer(s);
++    return AVERROR_UNKNOWN;
++}
++
++static int xv_write_packet(AVFormatContext *s, AVPacket *pkt)
++{
++    AVFrame * const frame = (AVFrame *)pkt->data;
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s\n", __func__);
++#endif
++    display_frame(s, s->priv_data, frame);
++    return 0;
++}
++
++static int xv_write_frame(AVFormatContext *s, int stream_index, AVFrame **ppframe,
++                          unsigned flags)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s: idx=%d, flags=%#x\n", __func__, stream_index, flags);
++#endif
++
++    /* xv_write_header() should have accepted only supported formats */
++    if ((flags & AV_WRITE_UNCODED_FRAME_QUERY))
++        return 0;
++//    return write_picture(s, (*frame)->data, (*frame)->linesize);
++
++    display_frame(s, s->priv_data, *ppframe);
++    return 0;
++}
++
++static int xv_control_message(AVFormatContext *s, int type, void *data, size_t data_size)
++{
++#if TRACE_ALL
++    av_log(s, AV_LOG_INFO, "%s: %d\n", __func__, type);
++#endif
++    switch(type) {
++    case AV_APP_TO_DEV_WINDOW_REPAINT:
++        return 0;
++    default:
++        break;
++    }
++    return AVERROR(ENOSYS);
++}
++
++// deinit is called if init fails so no need to clean up explicity here
++static int rpi_vout_init(struct AVFormatContext * s)
++{
++    rpi_display_env_t * const de = s->priv_data;
++
++    // Get a ZC context in case we need one - has little overhead if unused
++    if ((de->zc = av_rpi_zc_int_env_alloc(s)) == NULL)
++        return 1;
++
++    return 0;
++}
++
++static void rpi_vout_deinit(struct AVFormatContext * s)
++{
++    rpi_display_env_t * const de = s->priv_data;
++
++    av_rpi_zc_int_env_freep(&de->zc);
++}
++
++
++#define OFFSET(x) offsetof(rpi_display_env_t, x)
++static const AVOption options[] = {
++    { "show_all",     "show all frames",        OFFSET(show_all),     AV_OPT_TYPE_BOOL,   {.i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++    { "window_size",  "set window forced size", OFFSET(window_width), AV_OPT_TYPE_IMAGE_SIZE, {.str = NULL}, 0, 0, AV_OPT_FLAG_ENCODING_PARAM },
++    { "window_x",     "set window x offset",    OFFSET(window_x),     AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++    { "window_y",     "set window y offset",    OFFSET(window_y),     AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++    { "display_layer","set display layer",      OFFSET(layer),        AV_OPT_TYPE_INT,    {.i64 = 0 }, -INT_MAX, INT_MAX, AV_OPT_FLAG_ENCODING_PARAM },
++    { "fullscreen",   "set fullscreen display", OFFSET(fullscreen),   AV_OPT_TYPE_BOOL,   {.i64 = 0 }, 0, 1, AV_OPT_FLAG_ENCODING_PARAM },
++    { NULL }
++
++};
++
++static const AVClass xv_class = {
++    .class_name = "rpi vid outdev",
++    .item_name  = av_default_item_name,
++    .option     = options,
++    .version    = LIBAVUTIL_VERSION_INT,
++    .category   = AV_CLASS_CATEGORY_DEVICE_VIDEO_OUTPUT,
++};
++
++AVOutputFormat ff_vout_rpi_muxer = {
++    .name           = "vout_rpi",
++    .long_name      = NULL_IF_CONFIG_SMALL("Rpi (mmal) video output device"),
++    .priv_data_size = sizeof(rpi_display_env_t),
++    .audio_codec    = AV_CODEC_ID_NONE,
++    .video_codec    = AV_CODEC_ID_WRAPPED_AVFRAME,
++    .write_header   = xv_write_header,
++    .write_packet   = xv_write_packet,
++    .write_uncoded_frame = xv_write_frame,
++    .write_trailer  = xv_write_trailer,
++    .control_message = xv_control_message,
++    .flags          = AVFMT_NOFILE | AVFMT_VARIABLE_FPS | AVFMT_NOTIMESTAMPS,
++    .priv_class     = &xv_class,
++    .init           = rpi_vout_init,
++    .deinit         = rpi_vout_deinit,
++};
+--- a/libavfilter/Makefile
++++ b/libavfilter/Makefile
+@@ -434,6 +434,7 @@ OBJS-$(CONFIG_TRANSPOSE_OPENCL_FILTER)
+ OBJS-$(CONFIG_TRANSPOSE_VAAPI_FILTER)        += vf_transpose_vaapi.o vaapi_vpp.o
+ OBJS-$(CONFIG_TRIM_FILTER)                   += trim.o
+ OBJS-$(CONFIG_UNPREMULTIPLY_FILTER)          += vf_premultiply.o framesync.o
++OBJS-$(CONFIG_UNSAND_FILTER)                 += vf_unsand.o
+ OBJS-$(CONFIG_UNSHARP_FILTER)                += vf_unsharp.o
+ OBJS-$(CONFIG_UNSHARP_OPENCL_FILTER)         += vf_unsharp_opencl.o opencl.o \
+                                                 opencl/unsharp.o
+--- a/libavfilter/allfilters.c
++++ b/libavfilter/allfilters.c
+@@ -414,6 +414,7 @@ extern AVFilter ff_vf_transpose_opencl;
+ extern AVFilter ff_vf_transpose_vaapi;
+ extern AVFilter ff_vf_trim;
+ extern AVFilter ff_vf_unpremultiply;
++extern AVFilter ff_vf_unsand;
+ extern AVFilter ff_vf_unsharp;
+ extern AVFilter ff_vf_unsharp_opencl;
+ extern AVFilter ff_vf_untile;
+--- a/libavfilter/avfiltergraph.c
++++ b/libavfilter/avfiltergraph.c
+@@ -32,6 +32,9 @@
+ #include "libavutil/internal.h"
+ #include "libavutil/opt.h"
+ #include "libavutil/pixdesc.h"
++#if CONFIG_UNSAND_FILTER
++#include "libavutil/rpi_sand_fns.h"
++#endif
+
+ #define FF_INTERNAL_FIELDS 1
+ #include "framequeue.h"
+@@ -427,6 +430,19 @@ static int can_merge_formats(AVFilterFor
+     }
+ }
+
++#if CONFIG_UNSAND_FILTER
++static int has_sand_format(const AVFilterFormats * const ff)
++{
++    int i;
++    for (i = 0; i != ff->nb_formats; ++i) {
++        if (av_rpi_is_sand_format(ff->formats[i])) {
++            return 1;
++        }
++    }
++    return 0;
++}
++#endif
++
+ /**
+  * Perform one round of query_formats() and merging formats lists on the
+  * filter graph.
+@@ -467,6 +483,7 @@ static int query_formats(AVFilterGraph *
+         for (j = 0; j < filter->nb_inputs; j++) {
+             AVFilterLink *link = filter->inputs[j];
+             int convert_needed = 0;
++            unsigned int extra_convert_tried = 0;
+
+             if (!link)
+                 continue;
+@@ -514,11 +531,14 @@ static int query_formats(AVFilterGraph *
+             )
+ #undef MERGE_DISPATCH
+
+-            if (convert_needed) {
++            while (convert_needed) {
+                 AVFilterContext *convert;
+                 const AVFilter *filter;
+                 AVFilterLink *inlink, *outlink;
+                 char inst_name[30];
++                int can_retry = 0;
++
++                convert_needed = 0;
+
+                 if (graph->disable_auto_convert) {
+                     av_log(log_ctx, AV_LOG_ERROR,
+@@ -531,19 +551,45 @@ static int query_formats(AVFilterGraph *
+                 /* couldn't merge format lists. auto-insert conversion filter */
+                 switch (link->type) {
+                 case AVMEDIA_TYPE_VIDEO:
+-                    if (!(filter = avfilter_get_by_name("scale"))) {
+-                        av_log(log_ctx, AV_LOG_ERROR, "'scale' filter "
+-                               "not present, cannot convert pixel formats.\n");
+-                        return AVERROR(EINVAL);
+-                    }
+-
+-                    snprintf(inst_name, sizeof(inst_name), "auto_scaler_%d",
+-                             scaler_count++);
++#if CONFIG_UNSAND_FILTER
++                    // Only try each extra conversion once
++                    // The unsand output pad should never trigger has_sand_format
++                    // but it is better to be safe
++                    if ((extra_convert_tried & 1) == 0 && has_sand_format(link->in_formats)) {
++                        if (!(filter = avfilter_get_by_name("unsand"))) {
++                            av_log(log_ctx, AV_LOG_ERROR, "'unsand' filter "
++                                   "not present, cannot convert pixel formats.\n");
++                            return AVERROR(EINVAL);
++                        }
++
++                        snprintf(inst_name, sizeof(inst_name), "auto_unsand_%d",
++                                 scaler_count++);
++
++                        if ((ret = avfilter_graph_create_filter(&convert, filter,
++                                                                inst_name, "", NULL,
++                                                                graph)) < 0)
++                            return ret;
+
+-                    if ((ret = avfilter_graph_create_filter(&convert, filter,
+-                                                            inst_name, graph->scale_sws_opts, NULL,
+-                                                            graph)) < 0)
+-                        return ret;
++                        extra_convert_tried |= 1;
++                        can_retry = 1;
++                    }
++                    else
++#endif
++                    {
++                        if (!(filter = avfilter_get_by_name("scale"))) {
++                            av_log(log_ctx, AV_LOG_ERROR, "'scale' filter "
++                                   "not present, cannot convert pixel formats.\n");
++                            return AVERROR(EINVAL);
++                        }
++
++                        snprintf(inst_name, sizeof(inst_name), "auto_scaler_%d",
++                                 scaler_count++);
++
++                        if ((ret = avfilter_graph_create_filter(&convert, filter,
++                                                                inst_name, graph->scale_sws_opts, NULL,
++                                                                graph)) < 0)
++                            return ret;
++                    }
+                     break;
+                 case AVMEDIA_TYPE_AUDIO:
+                     if (!(filter = avfilter_get_by_name("aresample"))) {
+@@ -585,9 +631,19 @@ static int query_formats(AVFilterGraph *
+                     av_assert0(outlink-> in_channel_layouts->refcount > 0);
+                     av_assert0(outlink->out_channel_layouts->refcount > 0);
+                 }
+-                if (!ff_merge_formats( inlink->in_formats,  inlink->out_formats,  inlink->type) ||
+-                    !ff_merge_formats(outlink->in_formats, outlink->out_formats, outlink->type))
++                // If we have added an extra filter we must merge the input
++                // side but we can have another go at the output
++                if (!ff_merge_formats( inlink->in_formats,  inlink->out_formats,  inlink->type))
+                     ret = AVERROR(ENOSYS);
++                else if (!ff_merge_formats(outlink->in_formats, outlink->out_formats, outlink->type))
++                {
++                    if (can_retry) {
++                        link = outlink;
++                        convert_needed = 1;
++                        continue;
++                    }
++                    ret = AVERROR(ENOSYS);
++                }
+                 if (inlink->type == AVMEDIA_TYPE_AUDIO &&
+                     (!ff_merge_samplerates(inlink->in_samplerates,
+                                            inlink->out_samplerates) ||
+--- a/libavfilter/buffersrc.c
++++ b/libavfilter/buffersrc.c
+@@ -210,7 +210,7 @@ static int av_buffersrc_add_frame_intern
+
+         switch (ctx->outputs[0]->type) {
+         case AVMEDIA_TYPE_VIDEO:
+-            CHECK_VIDEO_PARAM_CHANGE(ctx, s, frame->width, frame->height,
++            CHECK_VIDEO_PARAM_CHANGE(ctx, s, av_frame_cropped_width(frame), av_frame_cropped_height(frame),
+                                      frame->format, frame->pts);
+             break;
+         case AVMEDIA_TYPE_AUDIO:
+--- /dev/null
++++ b/libavfilter/vf_unsand.c
+@@ -0,0 +1,234 @@
++/*
++ * Copyright (c) 2007 Bobby Bingham
++ *
++ * This file is part of FFmpeg.
++ *
++ * FFmpeg is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU Lesser General Public
++ * License as published by the Free Software Foundation; either
++ * version 2.1 of the License, or (at your option) any later version.
++ *
++ * FFmpeg is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
++ * Lesser General Public License for more details.
++ *
++ * You should have received a copy of the GNU Lesser General Public
++ * License along with FFmpeg; if not, write to the Free Software
++ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
++ */
++
++/**
++ * @file
++ * format and noformat video filters
++ */
++
++#include <string.h>
++
++#include "libavutil/internal.h"
++#include "libavutil/mem.h"
++#include "libavutil/pixdesc.h"
++#include "libavutil/opt.h"
++#include "libavutil/rpi_sand_fns.h"
++
++#include "avfilter.h"
++#include "formats.h"
++#include "internal.h"
++#include "video.h"
++
++typedef struct UnsandContext {
++    const AVClass *class;
++} UnsandContext;
++
++static av_cold void uninit(AVFilterContext *ctx)
++{
++//    UnsandContext *s = ctx->priv;
++}
++
++static av_cold int init(AVFilterContext *ctx)
++{
++//    UnsandContext *s = ctx->priv;
++
++    return 0;
++}
++
++
++static int filter_frame(AVFilterLink *link, AVFrame *in)
++{
++    AVFilterLink * const outlink = link->dst->outputs[0];
++    AVFrame *out = NULL;
++    int rv = 0;
++
++    if (outlink->format == in->format) {
++        // If nothing to do then do nothing
++        out = in;
++    }
++    else
++    {
++        if ((out = ff_get_video_buffer(outlink, av_frame_cropped_width(in), av_frame_cropped_height(in))) == NULL)
++        {
++            rv = AVERROR(ENOMEM);
++            goto fail;
++        }
++        if (av_rpi_sand_to_planar_frame(out, in) != 0)
++        {
++            rv = -1;
++            goto fail;
++        }
++
++        av_frame_free(&in);
++    }
++
++    return ff_filter_frame(outlink, out);
++
++fail:
++    av_frame_free(&out);
++    av_frame_free(&in);
++    return rv;
++}
++
++#if 0
++static void dump_fmts(const AVFilterFormats * fmts)
++{
++    int i;
++    if (fmts== NULL) {
++        printf("NULL\n");
++        return;
++    }
++    for (i = 0; i < fmts->nb_formats; ++i) {
++        printf(" %d", fmts->formats[i]);
++    }
++    printf("\n");
++}
++#endif
++
++static int query_formats(AVFilterContext *ctx)
++{
++//    UnsandContext *s = ctx->priv;
++    int ret;
++
++    // If we aren't connected at both ends then just do nothing
++    if (ctx->inputs[0] == NULL || ctx->outputs[0] == NULL)
++        return 0;
++
++//    printf("Unsand: %s in: ", __func__);
++//    dump_fmts(ctx->inputs[0]->in_formats);
++//    printf("Unsand: %s out: ", __func__);
++//    dump_fmts(ctx->outputs[0]->out_formats);
++
++    // Our output formats depend on our input formats and we can't/don't
++    // want to convert between bit depths so we need to wait for the source
++    // to have an opinion before we do
++    if (ctx->inputs[0]->in_formats == NULL)
++        return AVERROR(EAGAIN);
++
++    // Accept anything
++    if (ctx->inputs[0]->out_formats == NULL &&
++        (ret = ff_formats_ref(ctx->inputs[0]->in_formats, &ctx->inputs[0]->out_formats)) < 0)
++        return ret;
++
++    // Filter out sand formats
++
++    // Generate a container if we don't already have one
++    if (ctx->outputs[0]->in_formats == NULL)
++    {
++        // Somewhat rubbish way of ensuring we have a good structure
++        const static enum AVPixelFormat out_fmts[] = {AV_PIX_FMT_YUV420P10, AV_PIX_FMT_YUV420P, AV_PIX_FMT_NONE};
++        AVFilterFormats *formats = ff_make_format_list(out_fmts);
++
++        if (formats == NULL)
++            return AVERROR(ENOMEM);
++        if ((ret = ff_formats_ref(formats, &ctx->outputs[0]->in_formats)) < 0)
++            return ret;
++    }
++
++    // Replace old format list with new filtered list derived from what our
++    // input says it can do
++    {
++        const AVFilterFormats * const src_ff = ctx->inputs[0]->out_formats;
++        AVFilterFormats * const dst_ff = ctx->outputs[0]->in_formats;
++        enum AVPixelFormat *dst_fmts = av_malloc(sizeof(enum AVPixelFormat) * src_ff->nb_formats);
++        int i;
++        int n = 0;
++        int seen_420p = 0;
++        int seen_420p10 = 0;
++
++        for (i = 0; i < src_ff->nb_formats; ++i) {
++            const enum AVPixelFormat f = src_ff->formats[i];
++
++            switch (f){
++                case AV_PIX_FMT_YUV420P:
++                case AV_PIX_FMT_SAND128:
++                case AV_PIX_FMT_RPI4_8:
++                    if (!seen_420p) {
++                        seen_420p = 1;
++                        dst_fmts[n++] = AV_PIX_FMT_YUV420P;
++                    }
++                    break;
++                case AV_PIX_FMT_SAND64_10:
++                case AV_PIX_FMT_YUV420P10:
++                case AV_PIX_FMT_RPI4_10:
++                    if (!seen_420p10) {
++                        seen_420p10 = 1;
++                        dst_fmts[n++] = AV_PIX_FMT_YUV420P10;
++                    }
++                    break;
++                default:
++                    dst_fmts[n++] = f;
++                    break;
++            }
++        }
++
++        av_freep(&dst_ff->formats);
++        dst_ff->formats = dst_fmts;
++        dst_ff->nb_formats = n;
++    }
++
++//    printf("Unsand: %s calc: ", __func__);
++//    dump_fmts(ctx->outputs[0]->in_formats);
++
++    return 0;
++}
++
++
++#define OFFSET(x) offsetof(UnsandContext, x)
++static const AVOption unsand_options[] = {
++    { NULL }
++};
++
++
++AVFILTER_DEFINE_CLASS(unsand);
++
++static const AVFilterPad avfilter_vf_unsand_inputs[] = {
++    {
++        .name             = "default",
++        .type             = AVMEDIA_TYPE_VIDEO,
++        .filter_frame = filter_frame,
++    },
++    { NULL }
++};
++
++static const AVFilterPad avfilter_vf_unsand_outputs[] = {
++    {
++        .name = "default",
++        .type = AVMEDIA_TYPE_VIDEO
++    },
++    { NULL }
++};
++
++AVFilter ff_vf_unsand = {
++    .name          = "unsand",
++    .description   = NULL_IF_CONFIG_SMALL("Convert sand pix fmt to yuv"),
++
++    .init          = init,
++    .uninit        = uninit,
++
++    .query_formats = query_formats,
++
++    .priv_size     = sizeof(UnsandContext),
++    .priv_class    = &unsand_class,
++
++    .inputs        = avfilter_vf_unsand_inputs,
++    .outputs       = avfilter_vf_unsand_outputs,
++};
++
+--- a/libavformat/utils.c
++++ b/libavformat/utils.c
+@@ -3051,6 +3051,40 @@ static int has_codec_parameters(AVStream
+     return 1;
+ }
+
++#if CONFIG_HEVC_RPI_DECODER && CONFIG_HEVC_DECODER
++// This should be quite general purpose but avoid possible conflicts
++// by limiting usage to cases wehere we know it works.
++static int try_fallback_decoder(AVCodecContext * const avctx, const AVCodec *const old_codec, AVDictionary ** const opts)
++{
++    // Only try fallback if we know it is supported (HEVC only)
++    const AVCodec *const new_codec = old_codec->id != AV_CODEC_ID_HEVC ? NULL :
++        avcodec_find_decoder_by_id_and_fmt(old_codec->id, AV_PIX_FMT_NONE);
++    int err;
++
++    // Failed to find fallback or we are already at the fallback
++    if (new_codec == NULL || new_codec == old_codec)
++    {
++        return AVERROR_DECODER_NOT_FOUND;
++    }
++
++    // * This may be dodgy - header says to not use this fn,
++    //   especially if we are going to reopen the context...
++    //   (but it does seem to work for our cases)
++    if (avcodec_is_open(avctx)) {
++        avcodec_close(avctx);
++    }
++
++    if ((err = avcodec_open2(avctx, new_codec, opts)) < 0)
++    {
++        return err;
++    }
++
++    return 0;
++}
++#else
++#define try_fallback_decoder(avctx, old_codec, opts) (AVERROR_DECODER_NOT_FOUND)
++#endif
++
+ /* returns 1 or 0 if or if not decoded data was returned, or a negative error */
+ static int try_decode_frame(AVFormatContext *s, AVStream *st,
+                             const AVPacket *avpkt, AVDictionary **options)
+@@ -3085,7 +3119,11 @@ static int try_decode_frame(AVFormatCont
+         av_dict_set(options ? options : &thread_opt, "threads", "1", 0);
+         if (s->codec_whitelist)
+             av_dict_set(options ? options : &thread_opt, "codec_whitelist", s->codec_whitelist, 0);
+-        ret = avcodec_open2(avctx, codec, options ? options : &thread_opt);
++        if ((ret = avcodec_open2(avctx, codec, options ? options : &thread_opt)) == AVERROR_DECODER_NOT_FOUND)
++        {
++            // Try fallback if if looks worth a try
++            ret = try_fallback_decoder(avctx, codec, options ? options : &thread_opt);
++        }
+         if (!options)
+             av_dict_free(&thread_opt);
+         if (ret < 0) {
+@@ -3116,6 +3154,14 @@ static int try_decode_frame(AVFormatCont
+         if (avctx->codec_type == AVMEDIA_TYPE_VIDEO ||
+             avctx->codec_type == AVMEDIA_TYPE_AUDIO) {
+             ret = avcodec_send_packet(avctx, &pkt);
++
++            // If we are going to want to fall back we should know here
++            if (ret == AVERROR_DECODER_NOT_FOUND) {
++                if ((ret = try_fallback_decoder(avctx, avctx->codec, options)) < 0)
++                    break;
++                continue;
++            }
++
+             if (ret < 0 && ret != AVERROR(EAGAIN) && ret != AVERROR_EOF)
+                 break;
+             if (ret >= 0)
+@@ -3726,9 +3772,20 @@ FF_ENABLE_DEPRECATION_WARNINGS
+         // Try to just open decoders, in case this is enough to get parameters.
+         if (!has_codec_parameters(st, NULL) && st->request_probe <= 0) {
+             if (codec && !avctx->codec)
+-                if (avcodec_open2(avctx, codec, options ? &options[i] : &thread_opt) < 0)
+-                    av_log(ic, AV_LOG_WARNING,
+-                           "Failed to open codec in %s\n",__FUNCTION__);
++            {
++                int err;
++
++                if ((err = avcodec_open2(avctx, codec, options ? &options[i] : &thread_opt)) < 0)
++                {
++                    if (err == AVERROR_DECODER_NOT_FOUND) {
++                        err = try_fallback_decoder(avctx, codec, options ? &options[i] : &thread_opt);
++                    }
++                    if (err < 0) {
++                        av_log(ic, AV_LOG_WARNING,
++                               "Failed to open codec in %s\n",__FUNCTION__);
++                    }
++                }
++            }
+         }
+         if (!options)
+             av_dict_free(&thread_opt);
+--- a/libavutil/Makefile
++++ b/libavutil/Makefile
+@@ -68,6 +68,7 @@ HEADERS = adler32.h
+           rational.h                                                    \
+           replaygain.h                                                  \
+           ripemd.h                                                      \
++	  rpi_sand_fns.h                                                \
+           samplefmt.h                                                   \
+           sha.h                                                         \
+           sha512.h                                                      \
+@@ -86,6 +87,7 @@ HEADERS = adler32.h
+           tx.h                                                          \
+
+ HEADERS-$(CONFIG_LZO)                   += lzo.h
++HEADERS-$(CONFIG-RPI)                   += rpi_sand_fn_pw.h
+
+ ARCH_HEADERS = bswap.h                                                  \
+                intmath.h                                                \
+@@ -180,6 +182,7 @@ OBJS-$(CONFIG_LZO)
+ OBJS-$(CONFIG_MEDIACODEC)               += hwcontext_mediacodec.o
+ OBJS-$(CONFIG_OPENCL)                   += hwcontext_opencl.o
+ OBJS-$(CONFIG_QSV)                      += hwcontext_qsv.o
++OBJS-$(CONFIG_SAND)                     += rpi_sand_fns.o
+ OBJS-$(CONFIG_VAAPI)                    += hwcontext_vaapi.o
+ OBJS-$(CONFIG_VIDEOTOOLBOX)             += hwcontext_videotoolbox.o
+ OBJS-$(CONFIG_VDPAU)                    += hwcontext_vdpau.o
+--- a/libavutil/aarch64/Makefile
++++ b/libavutil/aarch64/Makefile
+@@ -1,4 +1,6 @@
+ OBJS += aarch64/cpu.o                                                 \
+         aarch64/float_dsp_init.o                                      \
+
+-NEON-OBJS += aarch64/float_dsp_neon.o
++NEON-OBJS += aarch64/float_dsp_neon.o                                 \
++             aarch64/rpi_sand_neon.o                                  \
++
+--- /dev/null
++++ b/libavutil/aarch64/rpi_sand_neon.S
+@@ -0,0 +1,676 @@
++/*
++Copyright (c) 2021 Michael Eiler
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: Michael Eiler <eiler.mike@gmail.com>
++*/
++
++#include "asm.S"
++
++// void ff_rpi_sand8_lines_to_planar_y8(
++//   uint8_t * dest,            : x0
++//   unsigned int dst_stride,   : w1
++//   const uint8_t * src,       : x2
++//   unsigned int src_stride1,  : w3, always 128
++//   unsigned int src_stride2,  : w4
++//   unsigned int _x,           : w5
++//   unsigned int y,            : w6
++//   unsigned int _w,           : w7
++//   unsigned int h);           : [sp, #0]
++
++function ff_rpi_sand8_lines_to_planar_y8, export=1
++    // w15 contains the number of rows we need to process
++    ldr w15, [sp, #0]
++
++    // w8 will contain the number of blocks per row
++    // w8 = floor(_w/stride1)
++    // stride1 is assumed to always be 128
++    mov w8, w1
++    lsr w8, w8, #7
++
++    // in case the width of the image is not a multiple of 128, there will
++    // be an incomplete block at the end of every row
++    // w9 contains the number of pixels stored within this block
++    // w9 = _w - w8 * 128
++    lsl w9, w8, #7
++    sub w9, w7, w9
++
++    // this is the value we have to add to the src pointer after reading a complete block
++    // it will move the address to the start of the next block
++    // w10 = stride2 * stride1 - stride1
++    mov w10, w4
++    lsl w10, w10, #7
++    sub w10, w10, #128
++
++    // w11 is the row offset, meaning the start offset of the first block of every collumn
++    // this will be increased with stride1 within every iteration of the row_loop
++    eor w11, w11, w11
++
++    // w12 = 0, processed row count
++    eor w12, w12, w12
++row_loop:
++    // start of the first block within the current row
++    // x13 = row offset + src
++    mov x13, x2
++    add x13, x13, x11
++
++    // w14 = 0, processed block count
++    eor w14, w14, w14
++
++    cmp w8, #0
++    beq no_main_y8
++
++block_loop:
++    // copy 128 bytes (a full block) into the vector registers v0-v7 and increase the src address by 128
++    // fortunately these aren't callee saved ones, meaning we don't need to backup them
++    ld1 { v0.16b,  v1.16b,  v2.16b,  v3.16b}, [x13], #64
++    ld1 { v4.16b,  v5.16b,  v6.16b,  v7.16b}, [x13], #64
++
++    // write these registers back to the destination vector and increase the dst address by 128
++    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
++    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x0], #64
++
++    // move the source register to the beginning of the next block (x13 = src + block offset)
++    add x13, x13, x10
++    // increase the block counter
++    add w14, w14, #1
++
++    // continue with the block_loop if we haven't copied all full blocks yet
++    cmp w8, w14
++    bgt block_loop
++
++    // handle the last block at the end of each row
++    // at most 127 byte values copied from src to dst
++no_main_y8:
++    eor w5, w5, w5 // i = 0
++incomplete_block_loop_y8:
++    cmp w5, w9
++    bge incomplete_block_loop_end_y8
++
++    ldrb w6, [x13]
++    strb w6, [x0]
++    add x13, x13, #1
++    add x0, x0, #1
++
++    add w5, w5, #1
++    b incomplete_block_loop_y8
++incomplete_block_loop_end_y8:
++
++
++    // increase the row offset by 128 (stride1)
++    add w11, w11, #128
++    // increment the row counter
++    add w12, w12, #1
++
++    // process the next row if we haven't finished yet
++    cmp w15, w12
++    bgt row_loop
++
++    ret
++endfunc
++
++
++
++// void ff_rpi_sand8_lines_to_planar_c8(
++//   uint8_t * dst_u,           : x0
++//   unsigned int dst_stride_u, : w1 == width
++//   uint8_t * dst_v,           : x2
++//   unsigned int dst_stride_v, : w3 == width
++//   const uint8_t * src,       : x4
++//   unsigned int stride1,      : w5 == 128
++//   unsigned int stride2,      : w6
++//   unsigned int _x,           : w7
++//   unsigned int y,            : [sp, #0]
++//   unsigned int _w,           : [sp, #8]
++//   unsigned int h);           : [sp, #16]
++
++function ff_rpi_sand8_lines_to_planar_c8, export=1
++    // w7 = width
++    ldr w7, [sp, #8]
++
++    // w15 contains the number of rows we need to process
++    // counts down
++    ldr w15, [sp, #16]
++
++    // number of full blocks, w8 = _w / (stride1 >> 1) == _w / 64 == _w >> 6
++    mov w8, w7
++    lsr w8, w8, #6
++
++    // number of pixels in block at the end of every row
++    // w9 = _w - (w8 * 64)
++    lsl w9, w8, #6
++    sub w9, w7, w9
++
++    // Skip at the end of the line to account for stride
++    sub w12, w1, w7
++
++    // address delta to the beginning of the next block
++    // w10 = (stride2 * stride1 - stride1) = stride2 * 128 - 128
++    lsl w10, w6, #7
++    sub w10, w10, #128
++
++    // w11 = row address start offset = 0
++    eor w11, w11, w11
++
++row_loop_c8:
++    // start of the first block within the current row
++    // x13 = row offset + src
++    mov x13, x4
++    add x13, x13, x11
++
++    // w14 = 0, processed block count
++    eor w14, w14, w14
++
++    cmp w8, #0
++    beq no_main_c8
++
++block_loop_c8:
++    // load the full block -> 128 bytes, the block contains 64 interleaved U and V values
++    ld2 { v0.16b,  v1.16b }, [x13], #32
++    ld2 { v2.16b,  v3.16b }, [x13], #32
++    ld2 { v4.16b,  v5.16b }, [x13], #32
++    ld2 { v6.16b,  v7.16b }, [x13], #32
++
++    // swap register so that we can write them out with a single instruction
++    mov v16.16b, v1.16b
++    mov v17.16b, v3.16b
++    mov v18.16b, v5.16b
++    mov v1.16b, v2.16b
++    mov v2.16b, v4.16b
++    mov v3.16b, v6.16b
++    mov v4.16b, v16.16b
++    mov v5.16b, v17.16b
++    mov v6.16b, v18.16b
++
++    st1 { v0.16b,  v1.16b,  v2.16b,  v3.16b }, [x0], #64
++    st1 { v4.16b,  v5.16b,  v6.16b,  v7.16b }, [x2], #64
++
++    // increment row counter and move src to the beginning of the next block
++    add w14, w14, #1
++    add x13, x13, x10
++
++    // jump to block_loop_c8 iff the block count is smaller than the number of full blocks
++    cmp w8, w14
++    bgt block_loop_c8
++
++no_main_c8:
++    // handle incomplete block at the end of every row
++    eor w5, w5, w5 // point counter, this might be
++incomplete_block_loop_c8:
++    cmp w5, w9
++    bge incomplete_block_loop_end_c8
++
++    ldrb w1, [x13]
++    strb w1, [x0]
++    add x13, x13, #1
++
++    ldrb w1, [x13]
++    strb w1, [x2]
++    add x13, x13, #1
++
++    add x0, x0, #1
++    add x2, x2, #1
++
++    add w5, w5, #1
++    b incomplete_block_loop_c8
++incomplete_block_loop_end_c8:
++
++    // increase row_offset by stride1
++    add w11, w11, #128
++    add x0, x0, w12, sxtw
++    add x2, x2, w12, sxtw
++
++    // jump to row_Loop_c8 iff the row count is small than the height
++    subs w15, w15, #1
++    bgt row_loop_c8
++
++    ret
++endfunc
++
++//void ff_rpi_sand30_lines_to_planar_y16(
++//  uint8_t * dest,             // [x0]
++//  unsigned int dst_stride,    // [w1] -> assumed to be equal to _w
++//  const uint8_t * src,        // [x2]
++//  unsigned int src_stride1,   // [w3] -> 128
++//  unsigned int src_stride2,   // [w4]
++//  unsigned int _x,            // [w5]
++//  unsigned int y,             // [w6]
++//  unsigned int _w,            // [w7]
++//  unsigned int h);            // [sp, #0]
++
++function ff_rpi_sand30_lines_to_planar_y16, export=1
++    stp x19, x20, [sp, #-48]!
++    stp x21, x22, [sp, #16]
++    stp x23, x24, [sp, #32]
++
++    // w6 = argument h
++    ldr w6, [sp, #48]
++
++    // slice_inc = ((stride2 - 1) * stride1)
++    mov w5, w4
++    sub w5, w5, #1
++    lsl w5, w5, #7
++
++    // total number of bytes per row = (width / 3) * 4
++    mov w8, w7
++    mov w9, #3
++    udiv w8, w8, w9
++    lsl w8, w8, #2
++
++    // number of full 128 byte blocks to be processed
++    mov w9, #96
++    udiv w9, w7, w9 // = (width * 4) / (3*128) = width/96
++
++    // w10 = number of full integers to process (4 bytes)
++    // w11 = remaning zero to two 10bit values still to copy over
++    mov w12, #96
++    mul w12, w9, w12
++    sub w12, w7, w12  // width - blocks*96 = remaining points per row
++    mov w11, #3
++    udiv w10, w12, w11 // full integers to process = w12 / 3
++    mul w11, w10, w11  // #integers *3
++    sub w11, w12, w11  // remaining 0-2 points = remaining points - integers*3
++
++    // increase w9 by one if w10+w11 is not zero, and decrease the row count by one
++    // this is to efficiently copy incomplete blocks at the end of the rows
++    // the last row is handled explicitly to avoid writing out of bounds
++    add w22, w10, w11
++    cmp w22, #0
++    cset w22, ne // 1 iff w10+w11 not zero, 0 otherwise
++    add w9, w9, w22
++    sub w6, w6, #1
++
++    // store the number of bytes in w20 which we copy too much for every row
++    // when the width of the frame is not a multiple of 96 (128bytes storing 96 10bit values)
++    mov w20, #96*2
++    mul w20, w20, w9
++    sub w20, w1, w20
++
++    mov w23, #0 // flag to check whether the last line had already been processed
++
++    // bitmask to clear the uppper 6bits of the result values
++    mov x19, #0x03ff03ff03ff03ff
++    dup v22.2d, x19
++
++    // row counter = 0
++    eor w12, w12, w12
++row_loop_y16:
++    cmp w12, w6               // jump to row_loop_y16_fin if we processed all rows
++    bge row_loop_y16_fin
++
++    mov x13, x2               // row src
++    eor w14, w14, w14         // full block counter
++block_loop_y16:
++    cmp w14, w9
++    bge block_loop_y16_fin
++
++    // load 64 bytes
++    ld1 { v0.4s,  v1.4s, v2.4s, v3.4s }, [x13], #64
++
++    // process v0 and v1
++    xtn v16.4h, v0.4s
++    ushr v0.4s, v0.4s, #10
++    xtn v17.4h, v0.4s
++    ushr v0.4s, v0.4s, #10
++    xtn v18.4h, v0.4s
++
++    xtn2 v16.8h, v1.4s
++    and v16.16b, v16.16b, v22.16b
++    ushr v1.4s, v1.4s, #10
++    xtn2 v17.8h, v1.4s
++    and v17.16b, v17.16b, v22.16b
++    ushr v1.4s, v1.4s, #10
++    xtn2 v18.8h, v1.4s
++    and v18.16b, v18.16b, v22.16b
++
++    st3 { v16.8h, v17.8h, v18.8h }, [x0], #48
++
++    // process v2 and v3
++    xtn v23.4h, v2.4s
++    ushr v2.4s, v2.4s, #10
++    xtn v24.4h, v2.4s
++    ushr v2.4s, v2.4s, #10
++    xtn v25.4h, v2.4s
++
++    xtn2 v23.8h, v3.4s
++    and v23.16b, v23.16b, v22.16b
++    ushr v3.4s, v3.4s, #10
++    xtn2 v24.8h, v3.4s
++    and v24.16b, v24.16b, v22.16b
++    ushr v3.4s, v3.4s, #10
++    xtn2 v25.8h, v3.4s
++    and v25.16b, v25.16b, v22.16b
++
++    st3 { v23.8h, v24.8h, v25.8h }, [x0], #48
++
++    // load the second half of the block -> 64 bytes into registers v4-v7
++    ld1 { v4.4s,  v5.4s,  v6.4s,  v7.4s }, [x13], #64
++
++    // process v4 and v5
++    xtn v16.4h, v4.4s
++    ushr v4.4s, v4.4s, #10
++    xtn v17.4h, v4.4s
++    ushr v4.4s, v4.4s, #10
++    xtn v18.4h, v4.4s
++
++    xtn2 v16.8h, v5.4s
++    and v16.16b, v16.16b, v22.16b
++    ushr v5.4s, v5.4s, #10
++    xtn2 v17.8h, v5.4s
++    and v17.16b, v17.16b, v22.16b
++    ushr v5.4s, v5.4s, #10
++    xtn2 v18.8h, v5.4s
++    and v18.16b, v18.16b, v22.16b
++
++    st3 { v16.8h, v17.8h, v18.8h }, [x0], #48
++
++    // v6 and v7
++    xtn v23.4h, v6.4s
++    ushr v6.4s, v6.4s, #10
++    xtn v24.4h, v6.4s
++    ushr v6.4s, v6.4s, #10
++    xtn v25.4h, v6.4s
++
++    xtn2 v23.8h, v7.4s
++    and v23.16b, v23.16b, v22.16b
++    ushr v7.4s, v7.4s, #10
++    xtn2 v24.8h, v7.4s
++    and v24.16b, v24.16b, v22.16b
++    ushr v7.4s, v7.4s, #10
++    xtn2 v25.8h, v7.4s
++    and v25.16b, v25.16b, v22.16b
++
++    st3 { v23.8h, v24.8h, v25.8h }, [x0], #48
++
++    add x13, x13, x5          // row src += slice_inc
++    add w14, w14, #1
++    b block_loop_y16
++block_loop_y16_fin:
++
++
++
++
++    add x2, x2, #128          // src += stride1 (start of the next row)
++    add x0, x0, w20, sxtw     // subtract the bytes we copied too much from dst
++    add w12, w12, #1
++    b row_loop_y16
++row_loop_y16_fin:
++
++    // check whether we have incomplete blocks at the end of every row
++    // in that case decrease row block count by one
++    // change height back to it's original value (meaning increase it by 1)
++    // and jump back to another iteration of row_loop_y16
++
++    cmp w23, #1
++    beq row_loop_y16_fin2 // don't continue here if we already processed the last row
++    add w6, w6, #1    // increase height to the original value
++    sub w9, w9, w22   // block count - 1 or 0, depending on the remaining bytes count
++    mov w23, #1
++    b row_loop_y16
++row_loop_y16_fin2:
++
++    sub x0, x0, w20, sxtw // with the last row we didn't actually move the dst ptr to far ahead, therefore readd the diference
++
++    // now we've got to handle the last block in the last row
++    eor w12, w12, w12 // w12 = 0 = counter
++integer_loop_y16:
++    cmp w12, w10
++    bge integer_loop_y16_fin
++    ldr w14, [x13], #4
++    and w15, w14, #0x3ff
++    strh w15, [x0], #2
++    lsr w14, w14, #10
++    and w15, w14, #0x3ff
++    strh w15, [x0], #2
++    lsr w14, w14, #10
++    and w15, w14, #0x3ff
++    strh w15, [x0], #2
++    add w12, w12, #1
++    b integer_loop_y16
++integer_loop_y16_fin:
++
++final_values_y16:
++    // remaining point count = w11
++    ldr w14, [x13], #4
++    cmp w11, #0
++    beq final_values_y16_fin
++    and w15, w14, #0x3ff
++    strh w15, [x0], #2
++    cmp w11, #1
++    beq final_values_y16_fin
++    lsr w14, w14, #10
++    and w15, w14, #0x3ff
++    strh w15, [x0], #2
++final_values_y16_fin:
++
++    ldp x23, x24, [sp, #32]
++    ldp x21, x22, [sp, #16]
++    ldp x19, x20, [sp], #48
++    ret
++endfunc
++
++//void ff_rpi_sand30_lines_to_planar_c16(
++//  uint8_t * dst_u,            // [x0]
++//  unsigned int dst_stride_u,  // [w1] == _w*2
++//  uint8_t * dst_v,            // [x2]
++//  unsigned int dst_stride_v,  // [w3] == _w*2
++//  const uint8_t * src,        // [x4]
++//  unsigned int stride1,       // [w5] == 128
++//  unsigned int stride2,       // [w6]
++//  unsigned int _x,            // [w7] == 0
++//  unsigned int y,             // [sp, #0] == 0
++//  unsigned int _w,            // [sp, #8] -> w3
++//  unsigned int h);            // [sp, #16] -> w7
++
++.macro rpi_sand30_lines_to_planar_c16_block_half
++    ld1 { v0.4s,  v1.4s, v2.4s, v3.4s }, [x13], #64
++
++    xtn v4.4h, v0.4s
++    ushr v0.4s, v0.4s, #10
++    xtn v5.4h, v0.4s
++    ushr v0.4s, v0.4s, #10
++    xtn v6.4h, v0.4s
++    xtn2 v4.8h, v1.4s
++    ushr v1.4s, v1.4s, #10
++    xtn2 v5.8h, v1.4s
++    ushr v1.4s, v1.4s, #10
++    xtn2 v6.8h, v1.4s
++    and v4.16b, v4.16b, v16.16b
++    and v5.16b, v5.16b, v16.16b
++    and v6.16b, v6.16b, v16.16b
++    st3 { v4.8h, v5.8h, v6.8h }, [sp], #48
++
++    xtn v4.4h, v2.4s
++    ushr v2.4s, v2.4s, #10
++    xtn v5.4h, v2.4s
++    ushr v2.4s, v2.4s, #10
++    xtn v6.4h, v2.4s
++    xtn2 v4.8h, v3.4s
++    ushr v3.4s, v3.4s, #10
++    xtn2 v5.8h, v3.4s
++    ushr v3.4s, v3.4s, #10
++    xtn2 v6.8h, v3.4s
++    and v4.16b, v4.16b, v16.16b
++    and v5.16b, v5.16b, v16.16b
++    and v6.16b, v6.16b, v16.16b
++    st3 { v4.8h, v5.8h, v6.8h }, [sp]
++    sub sp, sp, #48
++.endm
++
++function ff_rpi_sand30_lines_to_planar_c16, export=1
++    stp x19, x20, [sp, #-48]!
++    stp x21, x22, [sp, #16]
++    stp x23, x24, [sp, #32]
++
++    ldr w3, [sp, #48+8]    // w3 = width
++    ldr w7, [sp, #48+16]   // w7 = height
++
++    // reserve space on the stack for intermediate results
++    sub sp, sp, #256
++
++    // number of 128byte blocks per row, w8 = width / 48
++    mov w9, #48
++    udiv w8, w3, w9
++
++    // remaining pixels (rem_pix) per row, w9 = width - w8 * 48
++    mul w9, w8, w9
++    sub w9, w3, w9
++
++    // row offset, the beginning of the next row to process
++    eor w10, w10, w10
++
++    // offset to the beginning of the next block, w11 = stride2 * 128 - 128
++    lsl w11, w6, #7
++    sub w11, w11, #128
++
++    // decrease the height by one and in case of remaining pixels increase the block count by one
++    sub w7, w7, #1
++    cmp w9, #0
++    cset w19, ne    // w19 == 1 iff reamining pixels != 0
++    add w8, w8, w19
++
++    // bytes we have to move dst back by at the end of every row
++    mov w21, #48*2
++    mul w21, w21, w8
++    sub w21, w1, w21
++
++    mov w20, #0     // w20 = flag, last row processed
++
++    mov x12, #0x03ff03ff03ff03ff
++    dup v16.2d, x12
++
++    // iterate through rows, row counter = w12 = 0
++    eor w12, w12, w12
++row_loop_c16:
++    cmp w12, w7
++    bge row_loop_c16_fin
++
++    // address of row data = src + row_offset
++    mov x13, x4
++    add x13, x13, x10
++
++    eor w14, w14, w14
++block_loop_c16:
++    cmp w14, w8
++    bge block_loop_c16_fin
++
++    rpi_sand30_lines_to_planar_c16_block_half
++
++    ld2 { v0.8h, v1.8h }, [sp], #32
++    ld2 { v2.8h, v3.8h }, [sp], #32
++    ld2 { v4.8h, v5.8h }, [sp]
++    sub sp, sp, #64
++
++    st1 { v0.8h }, [x0], #16
++    st1 { v2.8h }, [x0], #16
++    st1 { v4.8h }, [x0], #16
++    st1 { v1.8h }, [x2], #16
++    st1 { v3.8h }, [x2], #16
++    st1 { v5.8h }, [x2], #16
++
++    rpi_sand30_lines_to_planar_c16_block_half
++
++    ld2 { v0.8h, v1.8h }, [sp], #32
++    ld2 { v2.8h, v3.8h }, [sp], #32
++    ld2 { v4.8h, v5.8h }, [sp]
++    sub sp, sp, #64
++
++    st1 { v0.8h }, [x0], #16
++    st1 { v2.8h }, [x0], #16
++    st1 { v4.8h }, [x0], #16
++    st1 { v1.8h }, [x2], #16
++    st1 { v3.8h }, [x2], #16
++    st1 { v5.8h }, [x2], #16
++
++    add x13, x13, x11 // offset to next block
++    add w14, w14, #1
++    b block_loop_c16
++block_loop_c16_fin:
++
++    add w10, w10, #128
++    add w12, w12, #1
++    add x0, x0, w21, sxtw  // move dst pointers back by x21
++    add x2, x2, w21, sxtw
++    b row_loop_c16
++row_loop_c16_fin:
++
++    cmp w20, #1
++    beq row_loop_c16_fin2
++    mov w20, #1
++    sub w8, w8, w19 // decrease block count by w19
++    add w7, w7, #1 // increase height
++    b row_loop_c16
++
++row_loop_c16_fin2:
++    sub x0, x0, w21, sxtw // readd x21 in case of the last row
++    sub x2, x2, w21, sxtw // so that we can write out the few remaining pixels
++
++    // last incomplete block to be finished
++    // read operations are fine, stride2 is more than large enough even if rem_pix is 0
++    rpi_sand30_lines_to_planar_c16_block_half
++    ld2 { v0.8h, v1.8h }, [sp], #32
++    ld2 { v2.8h, v3.8h }, [sp], #32
++    ld2 { v4.8h, v5.8h }, [sp], #32
++    rpi_sand30_lines_to_planar_c16_block_half
++    ld2 { v0.8h, v1.8h }, [sp], #32
++    ld2 { v2.8h, v3.8h }, [sp], #32
++    ld2 { v4.8h, v5.8h }, [sp]
++    sub sp, sp, #160
++
++    mov x4, sp
++    eor w20, w20, w20
++rem_pix_c16_loop:
++    cmp w20, w9
++    bge rem_pix_c16_fin
++
++    ldr w22, [x4], #4
++    str w22, [x0], #2
++    lsr w22, w22, #16
++    str w22, [x2], #2
++
++    add w20, w20, #1
++    b rem_pix_c16_loop
++rem_pix_c16_fin:
++
++    add sp, sp, #256
++
++    ldp x23, x24, [sp, #32]
++    ldp x21, x22, [sp, #16]
++    ldp x19, x20, [sp], #48
++    ret
++endfunc
++
++
++
++//void ff_rpi_sand30_lines_to_planar_p010(
++//  uint8_t * dest,
++//  unsigned int dst_stride,
++//  const uint8_t * src,
++//  unsigned int src_stride1,
++//  unsigned int src_stride2,
++//  unsigned int _x,
++//  unsigned int y,
++//  unsigned int _w,
++//  unsigned int h);
++
+--- /dev/null
++++ b/libavutil/aarch64/rpi_sand_neon.h
+@@ -0,0 +1,55 @@
++/*
++Copyright (c) 2021 Michael Eiler
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: Michael Eiler <eiler.mike@gmail.com>
++*/
++
++#pragma once
++
++#ifdef __cplusplus
++extern "C" {
++#endif
++
++void ff_rpi_sand8_lines_to_planar_y8(uint8_t * dest, unsigned int dst_stride,
++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++void ff_rpi_sand8_lines_to_planar_c8(uint8_t * dst_u, unsigned int dst_stride_u,
++  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src,
++  unsigned int stride1, unsigned int stride2, unsigned int _x, unsigned int y,
++  unsigned int _w, unsigned int h);
++
++void ff_rpi_sand30_lines_to_planar_y16(uint8_t * dest, unsigned int dst_stride,
++  const uint8_t * src, unsigned int src_stride1, unsigned int src_stride2,
++  unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++void ff_rpi_sand30_lines_to_planar_c16(uint8_t * dst_u, unsigned int dst_stride_u,
++  uint8_t * dst_v, unsigned int dst_stride_v, const uint8_t * src, unsigned int stride1,
++  unsigned int stride2, unsigned int _x, unsigned int y, unsigned int _w, unsigned int h);
++
++#ifdef __cplusplus
++}
++#endif
++
+--- a/libavutil/arm/Makefile
++++ b/libavutil/arm/Makefile
+@@ -6,3 +6,4 @@ VFP-OBJS += arm/float_dsp_init_vfp.o
+
+ NEON-OBJS += arm/float_dsp_init_neon.o                                  \
+              arm/float_dsp_neon.o                                       \
++             arm/rpi_sand_neon.o                                        \
+--- /dev/null
++++ b/libavutil/arm/rpi_sand_neon.S
+@@ -0,0 +1,768 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#include "libavutil/arm/asm.S"
++
++
++@ General notes:
++@ Having done some timing on this in sand8->y8 (Pi4)
++@  vst1 (680fps) is a bit faster than vstm (660fps)
++@  vldm (680fps) is noticably faster than vld1 (480fps)
++@  (or it might be that a mix is what is required)
++@
++@ At least on a Pi4 it is no more expensive to have a single auto-inc register
++@ for dest address than it is to have 2 used alternately (On Pi3 Ben asserted
++@ the latter was better)
++@
++@ vstm will bus error on unaligned access (so will vldm), vst1 is safe unless
++@ the memory is uncached.
++@ As these are Sand -> planar we can assume that src is going to be aligned but
++@ it is possible that dest isn't (converting to .yuv or other packed format).
++@ Luckily vst1 is faster than vstm :-) so all is well
++@ vst1 has alignment requirements of el size so maybe splitting vst1.32 into 4
++@ .8 stores would let us do non-word aligned stores into uncached but it
++@ probably isn't worth it.
++
++
++
++
++@ void ff_rpi_sand128b_stripe_to_8_10(
++@   uint8_t * dest,             // [r0]
++@   const uint8_t * src1,       // [r1]
++@   const uint8_t * src2,       // [r2]
++@   unsigned int lines);        // [r3]
++
++.macro  stripe2_to_8, bit_depth
++        vpush    {q4-q7}
++1:
++        vldm     r1!, {q0-q7}
++        subs     r3, #1
++        vldm     r2!, {q8-q15}
++        vqrshrn.u16 d0,  q0,  #\bit_depth - 8
++        vqrshrn.u16 d1,  q1,  #\bit_depth - 8
++        vqrshrn.u16 d2,  q2,  #\bit_depth - 8
++        vqrshrn.u16 d3,  q3,  #\bit_depth - 8
++        vqrshrn.u16 d4,  q4,  #\bit_depth - 8
++        vqrshrn.u16 d5,  q5,  #\bit_depth - 8
++        vqrshrn.u16 d6,  q6,  #\bit_depth - 8
++        vqrshrn.u16 d7,  q7,  #\bit_depth - 8
++        vqrshrn.u16 d8,  q8,  #\bit_depth - 8
++        vqrshrn.u16 d9,  q9,  #\bit_depth - 8
++        vqrshrn.u16 d10, q10, #\bit_depth - 8
++        vqrshrn.u16 d11, q11, #\bit_depth - 8
++        vqrshrn.u16 d12, q12, #\bit_depth - 8
++        vqrshrn.u16 d13, q13, #\bit_depth - 8
++        vqrshrn.u16 d14, q14, #\bit_depth - 8
++        vqrshrn.u16 d15, q15, #\bit_depth - 8
++        vstm     r0!, {q0-q7}
++        bne      1b
++        vpop     {q4-q7}
++        bx       lr
++.endm
++
++function ff_rpi_sand128b_stripe_to_8_10, export=1
++        stripe2_to_8     10
++endfunc
++
++@ void ff_rpi_sand8_lines_to_planar_y8(
++@   uint8_t * dest,             // [r0]
++@   unsigned int dst_stride,    // [r1]
++@   const uint8_t * src,        // [r2]
++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++@   unsigned int src_stride2,   // [sp, #0]  -> r3
++@   unsigned int _x,            // [sp, #4]  Ignored - 0
++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++@   unsigned int h);            // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand8_lines_to_planar_y8, export=1
++                push            {r4-r8, lr}     @ +24            L
++                ldr             r3,  [sp, #24]
++                ldr             r6,  [sp, #36]
++                ldr             r7,  [sp, #32]  @ y
++                lsl             r3,  #7
++                sub             r1,  r6
++                add             r8,  r2,  r7,  lsl #7
++                ldr             r7,  [sp, #40]
++
++10:
++                mov             r2,  r8
++                add             r4,  r0,  #24
++                mov             r5,  r6
++                mov             lr,  #0
++1:
++                vldm            r2,  {q8-q15}
++                add             r2,  r3
++                subs            r5,  #128
++                blt             2f
++                vst1.8          {d16, d17, d18, d19}, [r0]!
++                vst1.8          {d20, d21, d22, d23}, [r0]!
++                vst1.8          {d24, d25, d26, d27}, [r0]!
++                vst1.8          {d28, d29, d30, d31}, [r0]!
++                bne             1b
++11:
++                subs            r7,  #1
++                add             r0,  r1
++                add             r8,  #128
++                bne             10b
++
++                pop             {r4-r8, pc}
++
++@ Partial final write
++2:
++                cmp             r5,  #64-128
++                blt             1f
++                vst1.8          {d16, d17, d18, d19}, [r0]!
++                vst1.8          {d20, d21, d22, d23}, [r0]!
++                beq             11b
++                vmov            q8,  q12
++                vmov            q9,  q13
++                sub             r5,  #64
++                vmov            q10, q14
++                vmov            q11, q15
++1:
++                cmp             r5,  #32-128
++                blt             1f
++                vst1.8          {d16, d17, d18, d19}, [r0]!
++                beq             11b
++                vmov            q8,  q10
++                sub             r5,  #32
++                vmov            q9,  q11
++1:
++                cmp             r5,  #16-128
++                blt             1f
++                vst1.8          {d16, d17}, [r0]!
++                beq             11b
++                sub             r5,  #16
++                vmov            q8,  q9
++1:
++                cmp             r5,  #8-128
++                blt             1f
++                vst1.8          {d16}, [r0]!
++                beq             11b
++                sub             r5,  #8
++                vmov            d16, d17
++1:
++                cmp             r5,  #4-128
++                blt             1f
++                vst1.32         {d16[0]}, [r0]!
++                beq             11b
++                sub             r5,  #4
++                vshr.u64        d16, #32
++1:
++                cmp             r5,  #2-128
++                blt             1f
++                vst1.16         {d16[0]}, [r0]!
++                beq             11b
++                vst1.8          {d16[2]}, [r0]!
++                b               11b
++1:
++                vst1.8          {d16[0]}, [r0]!
++                b               11b
++endfunc
++
++@ void ff_rpi_sand8_lines_to_planar_c8(
++@   uint8_t * dst_u,            // [r0]
++@   unsigned int dst_stride_u,  // [r1]
++@   uint8_t * dst_v,            // [r2]
++@   unsigned int dst_stride_v,  // [r3]
++@   const uint8_t * src,        // [sp, #0]  -> r4, r5
++@   unsigned int stride1,       // [sp, #4]  128
++@   unsigned int stride2,       // [sp, #8]  -> r8
++@   unsigned int _x,            // [sp, #12] 0
++@   unsigned int y,             // [sp, #16] (r7 in prefix)
++@   unsigned int _w,            // [sp, #20] -> r12, r6
++@   unsigned int h);            // [sp, #24] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand8_lines_to_planar_c8, export=1
++                push            {r4-r8, lr}     @ +24
++
++                ldr             r5,  [sp, #24]
++                ldr             r8,  [sp, #32]
++                ldr             r7,  [sp, #40]
++                ldr             r6,  [sp, #44]
++                lsl             r8,  #7
++                add             r5,  r5,  r7,  lsl #7
++                sub             r1,  r1,  r6
++                sub             r3,  r3,  r6
++                ldr             r7,  [sp, #48]
++                vpush           {q4-q7}
++
++10:
++                mov             r4,  r5
++                mov             r12, r6
++1:
++                subs            r12, #64
++                vldm            r4,  {q0-q7}
++                add             r4,  r8
++                it              gt
++                vldmgt          r4,  {q8-q15}
++                add             r4,  r8
++
++                vuzp.8          q0,  q1
++                vuzp.8          q2,  q3
++                vuzp.8          q4,  q5
++                vuzp.8          q6,  q7
++
++                vuzp.8          q8,  q9
++                vuzp.8          q10, q11
++                vuzp.8          q12, q13
++                vuzp.8          q14, q15
++                subs            r12, #64
++
++                @ Rearrange regs so we can use vst1 with 4 regs
++                vswp            q1,  q2
++                vswp            q5,  q6
++                vswp            q9,  q10
++                vswp            q13, q14
++                blt             2f
++
++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
++                vst1.8          {d8,  d9,  d10, d11}, [r0]!
++                vst1.8          {d16, d17, d18, d19}, [r0]!
++                vst1.8          {d24, d25, d26, d27}, [r0]!
++
++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
++                vst1.8          {d12, d13, d14, d15}, [r2]!
++                vst1.8          {d20, d21, d22, d23}, [r2]!
++                vst1.8          {d28, d29, d30, d31}, [r2]!
++                bne             1b
++11:
++                subs            r7,  #1
++                add             r5,  #128
++                add             r0,  r1
++                add             r2,  r3
++                bne             10b
++                vpop            {q4-q7}
++                pop             {r4-r8,pc}
++
++2:
++                cmp             r12, #64-128
++                blt             1f
++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
++                vst1.8          {d8,  d9,  d10, d11}, [r0]!
++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
++                vst1.8          {d12, d13, d14, d15}, [r2]!
++                beq             11b
++                sub             r12, #64
++                vmov            q0,  q8
++                vmov            q1,  q9
++                vmov            q2,  q10
++                vmov            q3,  q11
++                vmov            q4,  q12
++                vmov            q5,  q13
++                vmov            q6,  q14
++                vmov            q7,  q15
++1:
++                cmp             r12, #32-128
++                blt             1f
++                vst1.8          {d0,  d1,  d2,  d3 }, [r0]!
++                vst1.8          {d4,  d5,  d6,  d7 }, [r2]!
++                beq             11b
++                sub             r12, #32
++                vmov            q0,  q4
++                vmov            q1,  q5
++                vmov            q2,  q6
++                vmov            q3,  q7
++1:
++                cmp             r12, #16-128
++                blt             1f
++                vst1.8          {d0,  d1 }, [r0]!
++                vst1.8          {d4,  d5 }, [r2]!
++                beq             11b
++                sub             r12, #16
++                vmov            q0,  q1
++                vmov            q2,  q3
++1:
++                cmp             r12, #8-128
++                blt             1f
++                vst1.8          {d0}, [r0]!
++                vst1.8          {d4}, [r2]!
++                beq             11b
++                sub             r12, #8
++                vmov            d0,  d1
++                vmov            d4,  d5
++1:
++                cmp             r12, #4-128
++                blt             1f
++                vst1.32         {d0[0]}, [r0]!
++                vst1.32         {d4[0]}, [r2]!
++                beq             11b
++                sub             r12, #4
++                vmov            s0,  s1
++                vmov            s8,  s9
++1:
++                cmp             r12, #2-128
++                blt             1f
++                vst1.16         {d0[0]}, [r0]!
++                vst1.16         {d4[0]}, [r2]!
++                beq             11b
++                vst1.8          {d0[2]}, [r0]!
++                vst1.8          {d4[2]}, [r2]!
++                b               11b
++1:
++                vst1.8          {d0[0]}, [r0]!
++                vst1.8          {d4[0]}, [r2]!
++                b               11b
++endfunc
++
++
++
++@ void ff_rpi_sand30_lines_to_planar_y16(
++@   uint8_t * dest,             // [r0]
++@   unsigned int dst_stride,    // [r1]
++@   const uint8_t * src,        // [r2]
++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++@   unsigned int src_stride2,   // [sp, #0]  -> r3
++@   unsigned int _x,            // [sp, #4]  Ignored - 0
++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++@   unsigned int h);            // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_y16, export=1
++                push            {r4-r8, lr}     @ +24
++                ldr             r3,  [sp, #24]
++                ldr             r6,  [sp, #36]
++                ldr             r7,  [sp, #32]  @ y
++                mov             r12, #48
++                vmov.u16        q15, #0x3ff
++                sub             r3,  #1
++                lsl             r3,  #7
++                sub             r1,  r1,  r6,  lsl #1
++                add             r8,  r2,  r7,  lsl #7
++                ldr             r7,  [sp, #40]
++
++10:
++                mov             r2,  r8
++                add             r4,  r0,  #24
++                mov             r5,  r6
++                mov             lr,  #0
++1:
++                vldm            r2!, {q10-q13}
++                add             lr,  #64
++
++                vshr.u32        q14, q10, #20    @ Cannot vshrn.u32 #20!
++                ands            lr,  #127
++                vshrn.u32       d2,  q10, #10
++                vmovn.u32       d0,  q10
++                vmovn.u32       d4,  q14
++
++                vshr.u32        q14, q11, #20
++                it              eq
++                addeq           r2,  r3
++                vshrn.u32       d3,  q11, #10
++                vmovn.u32       d1,  q11
++                vmovn.u32       d5,  q14
++
++                subs            r5,  #48
++                vand            q0,  q15
++                vand            q1,  q15
++                vand            q2,  q15
++
++                vshr.u32        q14, q12, #20
++                vshrn.u32       d18, q12, #10
++                vmovn.u32       d16, q12
++                vmovn.u32       d20, q14
++
++                vshr.u32        q14, q13, #20
++                vshrn.u32       d19, q13, #10
++                vmovn.u32       d17, q13
++                vmovn.u32       d21, q14
++
++                vand            q8,  q15
++                vand            q9,  q15
++                vand            q10, q15
++                blt             2f
++
++                vst3.16         {d0,  d2,  d4},  [r0], r12
++                vst3.16         {d1,  d3,  d5},  [r4], r12
++                vst3.16         {d16, d18, d20}, [r0], r12
++                vst3.16         {d17, d19, d21}, [r4], r12
++
++                bne             1b
++
++11:
++                subs            r7,  #1
++                add             r0,  r1
++                add             r8,  #128
++                bne             10b
++
++                pop             {r4-r8, pc}
++
++@ Partial final write
++2:
++                cmp             r5,  #24-48
++                blt             1f
++                vst3.16         {d0,  d2,  d4},  [r0], r12
++                vst3.16         {d1,  d3,  d5},  [r4]
++                beq             11b
++                vmov            q0,  q8
++                sub             r5,  #24
++                vmov            q1,  q9
++                vmov            q2,  q10
++1:
++                cmp             r5,  #12-48
++                blt             1f
++                vst3.16         {d0,  d2,  d4},  [r0]!
++                beq             11b
++                vmov            d0, d1
++                sub             r5, #12
++                vmov            d2, d3
++                vmov            d4, d5
++1:
++                cmp             r5,  #6-48
++                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
++                blt             1f
++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
++                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
++                add             r0,  #12
++                beq             11b
++                vmov            s0,  s1
++                sub             r5,  #6
++                vmov            s4,  s5
++                vmov            s8,  s9
++1:
++                cmp             r5, #3-48
++                blt             1f
++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
++                beq             11b
++                sub             r5, #3
++                vshr.u32        d0, #16
++                vshr.u32        d2, #16
++1:
++                cmp             r5, #2-48
++                blt             1f
++                vst2.16         {d0[0], d2[0]}, [r0]!
++                b               11b
++1:
++                vst1.16         {d0[0]}, [r0]!
++                b               11b
++
++endfunc
++
++
++@ void ff_rpi_sand30_lines_to_planar_c16(
++@   uint8_t * dst_u,            // [r0]
++@   unsigned int dst_stride_u,  // [r1]
++@   uint8_t * dst_v,            // [r2]
++@   unsigned int dst_stride_v,  // [r3]
++@   const uint8_t * src,        // [sp, #0]  -> r4, r5
++@   unsigned int stride1,       // [sp, #4]  128
++@   unsigned int stride2,       // [sp, #8]  -> r8
++@   unsigned int _x,            // [sp, #12] 0
++@   unsigned int y,             // [sp, #16] (r7 in prefix)
++@   unsigned int _w,            // [sp, #20] -> r6, r9
++@   unsigned int h);            // [sp, #24] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_c16, export=1
++                push            {r4-r10, lr}    @ +32
++                ldr             r5,  [sp, #32]
++                ldr             r8,  [sp, #40]
++                ldr             r7,  [sp, #48]
++                ldr             r9,  [sp, #52]
++                mov             r12, #48
++                vmov.u16        q15, #0x3ff
++                sub             r8,  #1
++                lsl             r8,  #7
++                add             r5,  r5,  r7,  lsl #7
++                sub             r1,  r1,  r9,  lsl #1
++                sub             r3,  r3,  r9,  lsl #1
++                ldr             r7,  [sp, #56]
++10:
++                mov             lr,  #0
++                mov             r4,  r5
++                mov             r6,  r9
++1:
++                vldm            r4!, {q0-q3}
++                add             lr,  #64
++
++                @ N.B. unpack [0,1,2] -> (reg order) 1, 0, 2
++                vshr.u32        q14, q0,  #20
++                vshrn.u32       d16, q0,  #10
++                vmovn.u32       d18, q0
++                ands            lr,  #127
++                vmovn.u32       d20, q14
++
++                vshr.u32        q14, q1,  #20
++                vshrn.u32       d17, q1,  #10
++                vmovn.u32       d19, q1
++                vmovn.u32       d21, q14
++
++                vshr.u32        q14, q2,  #20
++                vshrn.u32       d22, q2,  #10
++                vmovn.u32       d24, q2
++                vmovn.u32       d26, q14
++
++                vshr.u32        q14, q3,  #20
++                vshrn.u32       d23, q3,  #10
++                vmovn.u32       d25, q3
++                add             r10, r0,  #24
++                vmovn.u32       d27, q14
++
++                it              eq
++                addeq           r4,  r8
++                vuzp.16         q8,  q11
++                vuzp.16         q9,  q12
++                vuzp.16         q10, q13
++
++                @ q8   V0, V3,.. -> q0
++                @ q9   U0, U3...
++                @ q10  U1, U4...
++                @ q11  U2, U5,..
++                @ q12  V1, V4,.. -> q1
++                @ q13  V2, V5,.. -> q2
++
++                subs            r6,  #24
++                vand            q11, q15
++                vand            q9,  q15
++                vand            q10, q15
++                vand            q0,  q8,  q15
++                vand            q1,  q12, q15
++                vand            q2,  q13, q15
++
++                blt             2f
++
++                vst3.16         {d18, d20, d22}, [r0],  r12
++                vst3.16         {d19, d21, d23}, [r10]
++                add             r10, r2,  #24
++                vst3.16         {d0,  d2,  d4},  [r2],  r12
++                vst3.16         {d1,  d3,  d5},  [r10]
++
++                bne             1b
++
++11:
++                subs            r7,  #1
++                add             r5,  #128
++                add             r0,  r1
++                add             r2,  r3
++                bne             10b
++
++                pop             {r4-r10, pc}
++
++@ Partial final write
++2:
++                cmp             r6,  #-12
++                blt             1f
++                vst3.16         {d18, d20, d22}, [r0]!
++                vst3.16         {d0,  d2,  d4},  [r2]!
++                beq             11b
++                vmov            d18, d19
++                vmov            d20, d21
++                vmov            d22, d23
++                sub             r6,  #12
++                vmov            d0,  d1
++                vmov            d2,  d3
++                vmov            d4,  d5
++1:
++                cmp             r6,  #-18
++                @ Rezip here as it makes the remaining tail handling easier
++                vzip.16         d0,  d18
++                vzip.16         d2,  d20
++                vzip.16         d4,  d22
++                blt             1f
++                vst3.16         {d0[1],  d2[1],  d4[1]},  [r0]!
++                vst3.16         {d0[0],  d2[0],  d4[0]},  [r2]!
++                vst3.16         {d0[3],  d2[3],  d4[3]},  [r0]!
++                vst3.16         {d0[2],  d2[2],  d4[2]},  [r2]!
++                beq             11b
++                vmov            d0,  d18
++                vmov            d2,  d20
++                sub             r6,  #6
++                vmov            d4,  d22
++1:
++                cmp             r6,  #-21
++                blt             1f
++                vst3.16         {d0[1], d2[1], d4[1]}, [r0]!
++                vst3.16         {d0[0], d2[0], d4[0]}, [r2]!
++                beq             11b
++                vmov            s4,  s5
++                sub             r6,  #3
++                vmov            s0,  s1
++1:
++                cmp             r6,  #-22
++                blt             1f
++                vst2.16         {d0[1], d2[1]}, [r0]!
++                vst2.16         {d0[0], d2[0]}, [r2]!
++                b               11b
++1:
++                vst1.16         {d0[1]}, [r0]!
++                vst1.16         {d0[0]}, [r2]!
++                b               11b
++
++endfunc
++
++@ void ff_rpi_sand30_lines_to_planar_p010(
++@   uint8_t * dest,             // [r0]
++@   unsigned int dst_stride,    // [r1]
++@   const uint8_t * src,        // [r2]
++@   unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++@   unsigned int src_stride2,   // [sp, #0]  -> r3
++@   unsigned int _x,            // [sp, #4]  Ignored - 0
++@   unsigned int y,             // [sp, #8]  (r7 in prefix)
++@   unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++@   unsigned int h);            // [sp, #16] -> r7
++@
++@ Assumes that we are starting on a stripe boundary and that overreading
++@ within the stripe is OK. However it does respect the dest size for writing
++
++function ff_rpi_sand30_lines_to_planar_p010, export=1
++                push            {r4-r8, lr}     @ +24
++                ldr             r3,  [sp, #24]
++                ldr             r6,  [sp, #36]
++                ldr             r7,  [sp, #32]  @ y
++                mov             r12, #48
++                vmov.u16        q15, #0xffc0
++                sub             r3,  #1
++                lsl             r3,  #7
++                sub             r1,  r1,  r6,  lsl #1
++                add             r8,  r2,  r7,  lsl #7
++                ldr             r7,  [sp, #40]
++
++10:
++                mov             r2,  r8
++                add             r4,  r0,  #24
++                mov             r5,  r6
++                mov             lr,  #0
++1:
++                vldm            r2!, {q10-q13}
++                add             lr,  #64
++
++                vshl.u32        q14, q10, #6
++                ands            lr,  #127
++                vshrn.u32       d4,  q10, #14
++                vshrn.u32       d2,  q10, #4
++                vmovn.u32       d0,  q14
++
++                vshl.u32        q14, q11, #6
++                it              eq
++                addeq           r2,  r3
++                vshrn.u32       d5,  q11, #14
++                vshrn.u32       d3,  q11, #4
++                vmovn.u32       d1,  q14
++
++                subs            r5,  #48
++                vand            q2,  q15
++                vand            q1,  q15
++                vand            q0,  q15
++
++                vshl.u32        q14, q12, #6
++                vshrn.u32       d20, q12, #14
++                vshrn.u32       d18, q12, #4
++                vmovn.u32       d16, q14
++
++                vshl.u32        q14, q13, #6
++                vshrn.u32       d21, q13, #14
++                vshrn.u32       d19, q13, #4
++                vmovn.u32       d17, q14
++
++                vand            q10, q15
++                vand            q9,  q15
++                vand            q8,  q15
++                blt             2f
++
++                vst3.16         {d0,  d2,  d4},  [r0], r12
++                vst3.16         {d1,  d3,  d5},  [r4], r12
++                vst3.16         {d16, d18, d20}, [r0], r12
++                vst3.16         {d17, d19, d21}, [r4], r12
++
++                bne             1b
++
++11:
++                subs            r7,  #1
++                add             r0,  r1
++                add             r8,  #128
++                bne             10b
++
++                pop             {r4-r8, pc}
++
++@ Partial final write
++2:
++                cmp             r5,  #24-48
++                blt             1f
++                vst3.16         {d0,  d2,  d4},  [r0], r12
++                vst3.16         {d1,  d3,  d5},  [r4]
++                beq             11b
++                vmov            q0,  q8
++                sub             r5,  #24
++                vmov            q1,  q9
++                vmov            q2,  q10
++1:
++                cmp             r5,  #12-48
++                blt             1f
++                vst3.16         {d0,  d2,  d4},  [r0]!
++                beq             11b
++                vmov            d0, d1
++                sub             r5, #12
++                vmov            d2, d3
++                vmov            d4, d5
++1:
++                cmp             r5,  #6-48
++                add             r4,  r0,  #6    @ avoid [r0]! on sequential instructions
++                blt             1f
++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]
++                vst3.16         {d0[1], d2[1], d4[1]}, [r4]
++                add             r0,  #12
++                beq             11b
++                vmov            s0,  s1
++                sub             r5,  #6
++                vmov            s4,  s5
++                vmov            s8,  s9
++1:
++                cmp             r5, #3-48
++                blt             1f
++                vst3.16         {d0[0], d2[0], d4[0]}, [r0]!
++                beq             11b
++                sub             r5, #3
++                vshr.u32        d0, #16
++                vshr.u32        d2, #16
++1:
++                cmp             r5, #2-48
++                blt             1f
++                vst2.16         {d0[0], d2[0]}, [r0]!
++                b               11b
++1:
++                vst1.16         {d0[0]}, [r0]!
++                b               11b
++
++endfunc
++
++
++
+--- /dev/null
++++ b/libavutil/arm/rpi_sand_neon.h
+@@ -0,0 +1,99 @@
++/*
++Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#ifndef AVUTIL_ARM_SAND_NEON_H
++#define AVUTIL_ARM_SAND_NEON_H
++
++void ff_rpi_sand128b_stripe_to_8_10(
++  uint8_t * dest,             // [r0]
++  const uint8_t * src1,       // [r1]
++  const uint8_t * src2,       // [r2]
++  unsigned int lines);        // [r3]
++
++void ff_rpi_sand8_lines_to_planar_y8(
++  uint8_t * dest,             // [r0]
++  unsigned int dst_stride,    // [r1]
++  const uint8_t * src,        // [r2]
++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++  unsigned int src_stride2,   // [sp, #0]  -> r3
++  unsigned int _x,            // [sp, #4]  Ignored - 0
++  unsigned int y,             // [sp, #8]  (r7 in prefix)
++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++  unsigned int h);            // [sp, #16] -> r7
++
++void ff_rpi_sand8_lines_to_planar_c8(
++  uint8_t * dst_u,            // [r0]
++  unsigned int dst_stride_u,  // [r1]
++  uint8_t * dst_v,            // [r2]
++  unsigned int dst_stride_v,  // [r3]
++  const uint8_t * src,        // [sp, #0]  -> r4, r5
++  unsigned int stride1,       // [sp, #4]  128
++  unsigned int stride2,       // [sp, #8]  -> r8
++  unsigned int _x,            // [sp, #12] 0
++  unsigned int y,             // [sp, #16] (r7 in prefix)
++  unsigned int _w,            // [sp, #20] -> r12, r6
++  unsigned int h);            // [sp, #24] -> r7
++
++void ff_rpi_sand30_lines_to_planar_y16(
++  uint8_t * dest,             // [r0]
++  unsigned int dst_stride,    // [r1]
++  const uint8_t * src,        // [r2]
++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++  unsigned int src_stride2,   // [sp, #0]  -> r3
++  unsigned int _x,            // [sp, #4]  Ignored - 0
++  unsigned int y,             // [sp, #8]  (r7 in prefix)
++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++  unsigned int h);            // [sp, #16] -> r7
++
++void ff_rpi_sand30_lines_to_planar_c16(
++  uint8_t * dst_u,            // [r0]
++  unsigned int dst_stride_u,  // [r1]
++  uint8_t * dst_v,            // [r2]
++  unsigned int dst_stride_v,  // [r3]
++  const uint8_t * src,        // [sp, #0]  -> r4, r5
++  unsigned int stride1,       // [sp, #4]  128
++  unsigned int stride2,       // [sp, #8]  -> r8
++  unsigned int _x,            // [sp, #12] 0
++  unsigned int y,             // [sp, #16] (r7 in prefix)
++  unsigned int _w,            // [sp, #20] -> r6, r9
++  unsigned int h);            // [sp, #24] -> r7
++
++void ff_rpi_sand30_lines_to_planar_p010(
++  uint8_t * dest,             // [r0]
++  unsigned int dst_stride,    // [r1]
++  const uint8_t * src,        // [r2]
++  unsigned int src_stride1,   // [r3]      Ignored - assumed 128
++  unsigned int src_stride2,   // [sp, #0]  -> r3
++  unsigned int _x,            // [sp, #4]  Ignored - 0
++  unsigned int y,             // [sp, #8]  (r7 in prefix)
++  unsigned int _w,            // [sp, #12] -> r6 (cur r5)
++  unsigned int h);            // [sp, #16] -> r7
++
++#endif // AVUTIL_ARM_SAND_NEON_H
++
+--- a/libavutil/frame.c
++++ b/libavutil/frame.c
+@@ -16,6 +16,8 @@
+  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+  */
+
++#include "config.h"
++
+ #include "channel_layout.h"
+ #include "avassert.h"
+ #include "buffer.h"
+@@ -26,6 +28,9 @@
+ #include "mem.h"
+ #include "samplefmt.h"
+ #include "hwcontext.h"
++#if CONFIG_SAND
++#include "rpi_sand_fns.h"
++#endif
+
+ #if FF_API_FRAME_GET_SET
+ MAKE_ACCESSORS(AVFrame, frame, int64_t, best_effort_timestamp)
+@@ -902,6 +907,12 @@ int av_frame_apply_cropping(AVFrame *fra
+         (frame->crop_top + frame->crop_bottom) >= frame->height)
+         return AVERROR(ERANGE);
+
++#if CONFIG_SAND
++    // Sand cannot be cropped - do not try
++    if (av_rpi_is_sand_format(frame->format))
++        return 0;
++#endif
++
+     desc = av_pix_fmt_desc_get(frame->format);
+     if (!desc)
+         return AVERROR_BUG;
+--- a/libavutil/frame.h
++++ b/libavutil/frame.h
+@@ -968,6 +968,16 @@ int av_frame_apply_cropping(AVFrame *fra
+  */
+ const char *av_frame_side_data_name(enum AVFrameSideDataType type);
+
++
++static inline int av_frame_cropped_width(const AVFrame * const frame)
++{
++    return frame->width - (frame->crop_left + frame->crop_right);
++}
++static inline int av_frame_cropped_height(const AVFrame * const frame)
++{
++    return frame->height - (frame->crop_top + frame->crop_bottom);
++}
++
+ /**
+  * @}
+  */
+--- a/libavutil/hwcontext_drm.c
++++ b/libavutil/hwcontext_drm.c
+@@ -19,8 +19,10 @@
+ #include <fcntl.h>
+ #include <sys/mman.h>
+ #include <unistd.h>
++#include <sys/ioctl.h>
+
+ #include <drm.h>
++#include <libdrm/drm_fourcc.h>
+ #include <xf86drm.h>
+
+ #include "avassert.h"
+@@ -28,6 +30,11 @@
+ #include "hwcontext_drm.h"
+ #include "hwcontext_internal.h"
+ #include "imgutils.h"
++#include "libavutil/rpi_sand_fns.h"
++
++#include <linux/mman.h>
++#include <linux/dma-buf.h>
++#include <linux/dma-heap.h>
+
+
+ static void drm_device_free(AVHWDeviceContext *hwdev)
+@@ -43,6 +50,11 @@ static int drm_device_create(AVHWDeviceC
+     AVDRMDeviceContext *hwctx = hwdev->hwctx;
+     drmVersionPtr version;
+
++    if (device == NULL) {
++      hwctx->fd = -1;
++      return 0;
++    }
++
+     hwctx->fd = open(device, O_RDWR);
+     if (hwctx->fd < 0)
+         return AVERROR(errno);
+@@ -85,18 +97,37 @@ static int drm_get_buffer(AVHWFramesCont
+ typedef struct DRMMapping {
+     // Address and length of each mmap()ed region.
+     int nb_regions;
++    unsigned int dmaflags;
+     void *address[AV_DRM_MAX_PLANES];
+     size_t length[AV_DRM_MAX_PLANES];
++    int fds[AV_DRM_MAX_PLANES];
+ } DRMMapping;
+
++static int dmasync(const int fd, const unsigned int flags)
++{
++    struct dma_buf_sync sync = {
++        .flags = flags
++    };
++    while (ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync) == -1) {
++        const int err = errno;
++        if (errno == EINTR)
++            continue;
++        av_log(NULL, AV_LOG_WARNING, "%s: ioctl failed: flags=%#x\n", __func__, flags);
++        return -err;
++    }
++    return 0;
++}
++
+ static void drm_unmap_frame(AVHWFramesContext *hwfc,
+                             HWMapDescriptor *hwmap)
+ {
+     DRMMapping *map = hwmap->priv;
+     int i;
+
+-    for (i = 0; i < map->nb_regions; i++)
++    for (i = 0; i < map->nb_regions; i++) {
+         munmap(map->address[i], map->length[i]);
++        dmasync(map->fds[i], DMA_BUF_SYNC_END | map->dmaflags);
++    }
+
+     av_free(map);
+ }
+@@ -114,15 +145,28 @@ static int drm_map_frame(AVHWFramesConte
+     if (!map)
+         return AVERROR(ENOMEM);
+
++    for (i = 0; i < AV_DRM_MAX_PLANES; i++)
++        map->fds[i] = -1;
++
+     mmap_prot = 0;
+-    if (flags & AV_HWFRAME_MAP_READ)
++    if (flags & AV_HWFRAME_MAP_READ) {
++        map->dmaflags |= DMA_BUF_SYNC_READ;
+         mmap_prot |= PROT_READ;
+-    if (flags & AV_HWFRAME_MAP_WRITE)
++    }
++    if (flags & AV_HWFRAME_MAP_WRITE) {
++        map->dmaflags |= DMA_BUF_SYNC_WRITE;
+         mmap_prot |= PROT_WRITE;
++    }
++
++    if (dst->format == AV_PIX_FMT_NONE)
++        dst->format = hwfc->sw_format;
+
+     av_assert0(desc->nb_objects <= AV_DRM_MAX_PLANES);
+     for (i = 0; i < desc->nb_objects; i++) {
+-        addr = mmap(NULL, desc->objects[i].size, mmap_prot, MAP_SHARED,
++        dmasync(desc->objects[i].fd, DMA_BUF_SYNC_START | map->dmaflags);
++        map->fds[i] = desc->objects[i].fd;
++
++        addr = mmap(NULL, desc->objects[i].size, mmap_prot, MAP_SHARED | MAP_POPULATE,
+                     desc->objects[i].fd, 0);
+         if (addr == MAP_FAILED) {
+             err = AVERROR(errno);
+@@ -151,6 +195,23 @@ static int drm_map_frame(AVHWFramesConte
+
+     dst->width  = src->width;
+     dst->height = src->height;
++    dst->crop_top    = src->crop_top;
++    dst->crop_bottom = src->crop_bottom;
++    dst->crop_left   = src->crop_left;
++    dst->crop_right  = src->crop_right;
++
++#if CONFIG_SAND
++    // Rework for sand frames
++    if (av_rpi_is_sand_frame(dst)) {
++        // As it stands the sand formats hold stride2 in linesize[3]
++        // linesize[0] & [1] contain stride1 which is always 128 for everything we do
++        // * Arguably this should be reworked s.t. stride2 is in linesize[0] & [1]
++        dst->linesize[3] = fourcc_mod_broadcom_param(desc->objects[0].format_modifier);
++        dst->linesize[0] = 128;
++        dst->linesize[1] = 128;
++        // *** Are we sure src->height is actually what we want ???
++    }
++#endif
+
+     err = ff_hwframe_map_create(src->hw_frames_ctx, dst, src,
+                                 &drm_unmap_frame, map);
+@@ -160,7 +221,9 @@ static int drm_map_frame(AVHWFramesConte
+     return 0;
+
+ fail:
+-    for (i = 0; i < desc->nb_objects; i++) {
++    for (i = 0; i < AV_DRM_MAX_PLANES; i++) {
++        if (map->fds[i] != -1)
++            dmasync(map->fds[i], DMA_BUF_SYNC_END | map->dmaflags);
+         if (map->address[i])
+             munmap(map->address[i], map->length[i]);
+     }
+@@ -178,7 +241,15 @@ static int drm_transfer_get_formats(AVHW
+     if (!pix_fmts)
+         return AVERROR(ENOMEM);
+
+-    pix_fmts[0] = ctx->sw_format;
++    // **** Offer native sand too ????
++    pix_fmts[0] =
++#if CONFIG_SAND
++        ctx->sw_format == AV_PIX_FMT_RPI4_8 || ctx->sw_format == AV_PIX_FMT_SAND128 ?
++            AV_PIX_FMT_YUV420P :
++        ctx->sw_format == AV_PIX_FMT_RPI4_10 ?
++            AV_PIX_FMT_YUV420P10LE :
++#endif
++            ctx->sw_format;
+     pix_fmts[1] = AV_PIX_FMT_NONE;
+
+     *formats = pix_fmts;
+@@ -197,18 +268,80 @@ static int drm_transfer_data_from(AVHWFr
+     map = av_frame_alloc();
+     if (!map)
+         return AVERROR(ENOMEM);
+-    map->format = dst->format;
+
++    // Map to default
++    map->format = AV_PIX_FMT_NONE;
+     err = drm_map_frame(hwfc, map, src, AV_HWFRAME_MAP_READ);
+     if (err)
+         goto fail;
+
+-    map->width  = dst->width;
+-    map->height = dst->height;
++#if 0
++    av_log(hwfc, AV_LOG_INFO, "%s: src fmt=%d (%d), dst fmt=%d (%d) s=%dx%d l=%d/%d/%d/%d, d=%dx%d l=%d/%d/%d\n", __func__,
++           map->hwfc_format, AV_PIX_FMT_RPI4_8, dst->format, AV_PIX_FMT_YUV420P10LE,
++           map->width, map->height,
++           map->linesize[0],
++           map->linesize[1],
++           map->linesize[2],
++           map->linesize[3],
++           dst->width, dst->height,
++           dst->linesize[0],
++           dst->linesize[1],
++           dst->linesize[2]);
++#endif
++#if CONFIG_SAND
++    if (av_rpi_is_sand_frame(map)) {
++        // Preserve crop - later ffmpeg code assumes that we have in that it
++        // overwrites any crop that we create with the old values
++        unsigned int stride2 = map->linesize[3];
++        const unsigned int w = FFMIN(dst->width, map->width);
++        const unsigned int h = FFMIN(dst->height, map->height);
++
++        if (map->format == AV_PIX_FMT_RPI4_8 && dst->format == AV_PIX_FMT_YUV420P) {
++            av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
++                                     map->data[0],
++                                     128, stride2,
++                                     0, 0, w, h);
++            av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
++                                     dst->data[2], dst->linesize[2],
++                                     map->data[1],
++                                     128, stride2,
++                                     0, 0, w / 2, h / 2);
++        }
++        else if (map->format == AV_PIX_FMT_RPI4_10 && dst->format == AV_PIX_FMT_YUV420P10LE) {
++            av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
++                                     map->data[0],
++                                     128, stride2,
++                                     0, 0, w, h);
++            av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
++                                     dst->data[2], dst->linesize[2],
++                                     map->data[1],
++                                     128, stride2,
++                                     0, 0, w / 2, h / 2);
++        }
++        else
++        {
++            av_log(hwfc, AV_LOG_ERROR, "%s: Incompatible output pixfmt for sand\n", __func__);
++            err = AVERROR(EINVAL);
++            goto fail;
++        }
++
++        dst->width = w;
++        dst->height = h;
++    }
++    else
++#endif
++    {
++        // Kludge mapped h/w s.t. frame_copy works
++        map->width  = dst->width;
++        map->height = dst->height;
++        err = av_frame_copy(dst, map);
++    }
+
+-    err = av_frame_copy(dst, map);
+     if (err)
++    {
++        av_log(hwfc, AV_LOG_ERROR, "%s: Copy fail\n", __func__);
+         goto fail;
++    }
+
+     err = 0;
+ fail:
+@@ -223,7 +356,10 @@ static int drm_transfer_data_to(AVHWFram
+     int err;
+
+     if (src->width > hwfc->width || src->height > hwfc->height)
++    {
++        av_log(hwfc, AV_LOG_ERROR, "%s: H/w mismatch: %d/%d, %d/%d\n", __func__, dst->width, hwfc->width, dst->height, hwfc->height);
+         return AVERROR(EINVAL);
++    }
+
+     map = av_frame_alloc();
+     if (!map)
+--- a/libavutil/pixdesc.c
++++ b/libavutil/pixdesc.c
+@@ -2371,6 +2371,38 @@ static const AVPixFmtDescriptor av_pix_f
+         .name = "vulkan",
+         .flags = AV_PIX_FMT_FLAG_HWACCEL,
+     },
++    [AV_PIX_FMT_SAND128] = {
++        .name = "sand128",
++        .nb_components = 3,
++        .log2_chroma_w = 1,
++        .log2_chroma_h = 1,
++        .comp = {
++            { 0, 1, 0, 0, 8, 0, 7, 1 },        /* Y */
++            { 1, 2, 0, 0, 8, 1, 7, 1 },        /* U */
++            { 1, 2, 1, 0, 8, 1, 7, 2 },        /* V */
++        },
++        .flags = 0,
++    },
++    [AV_PIX_FMT_SAND64_10] = {
++        .name = "sand64_10",
++        .nb_components = 3,
++        .log2_chroma_w = 1,
++        .log2_chroma_h = 1,
++        .comp = {
++            { 0, 2, 0, 0, 10, 0, 9, 1 },        /* Y */
++            { 1, 4, 0, 0, 10, 1, 9, 1 },        /* U */
++            { 1, 4, 1, 0, 10, 1, 9, 2 },        /* V */
++        },
++        .flags = 0,
++    },
++    [AV_PIX_FMT_RPI4_8] = {
++        .name = "rpi",
++        .flags = AV_PIX_FMT_FLAG_HWACCEL,
++    },
++    [AV_PIX_FMT_RPI4_10] = {
++        .name = "rpi",
++        .flags = AV_PIX_FMT_FLAG_HWACCEL,
++    },
+ };
+ #if FF_API_PLUS1_MINUS1
+ FF_ENABLE_DEPRECATION_WARNINGS
+--- a/libavutil/pixfmt.h
++++ b/libavutil/pixfmt.h
+@@ -357,6 +357,12 @@ enum AVPixelFormat {
+
+     AV_PIX_FMT_Y210BE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, big-endian
+     AV_PIX_FMT_Y210LE,    ///< packed YUV 4:2:2 like YUYV422, 20bpp, data in the high bits, little-endian
++// RPI - not on ifdef so can be got at by calling progs
++    AV_PIX_FMT_SAND128,    ///< 4:2:0  8-bit 128x*Y stripe, 64x*UV stripe, then next x stripe, mysterious padding
++    AV_PIX_FMT_SAND64_10,  ///< 4:2:0 10-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
++    AV_PIX_FMT_SAND64_16,  ///< 4:2:0 16-bit  64x*Y stripe, 32x*UV stripe, then next x stripe, mysterious padding
++    AV_PIX_FMT_RPI4_8,
++    AV_PIX_FMT_RPI4_10,
+
+     AV_PIX_FMT_NB         ///< number of pixel formats, DO NOT USE THIS if you want to link with shared libav* because the number of formats might differ between versions
+ };
+--- /dev/null
++++ b/libavutil/rpi_sand_fn_pw.h
+@@ -0,0 +1,227 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++// * Included twice from rpi_sand_fn with different PW
++
++#define STRCAT(x,y) x##y
++
++#if PW == 1
++#define pixel uint8_t
++#define FUNC(f) STRCAT(f, 8)
++#elif PW == 2
++#define pixel uint16_t
++#define FUNC(f) STRCAT(f, 16)
++#else
++#error Unexpected PW
++#endif
++
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// unclipped
++void FUNC(av_rpi_sand_to_planar_y)(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x = _x;
++    const unsigned int w = _w;
++    const unsigned int mask = stride1 - 1;
++
++#if PW == 1 && HAVE_SAND_ASM
++    if (_x == 0) {
++        ff_rpi_sand8_lines_to_planar_y8(dst, dst_stride,
++                                     src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if ((x & ~mask) == ((x + w) & ~mask)) {
++        // All in one sand stripe
++        const uint8_t * p = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p += stride1) {
++            memcpy(dst, p, w);
++        }
++    }
++    else
++    {
++        // Two+ stripe
++        const unsigned int sstride = stride1 * stride2;
++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        const uint8_t * p2 = p1 + sstride - (x & mask);
++        const unsigned int w1 = stride1 - (x & mask);
++        const unsigned int w3 = (x + w) & mask;
++        const unsigned int w2 = w - (w1 + w3);
++
++        for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p1 += stride1, p2 += stride1) {
++            unsigned int j;
++            const uint8_t * p = p2;
++            uint8_t * d = dst;
++            memcpy(d, p1, w1);
++            d += w1;
++            for (j = 0; j < w2; j += stride1, d += stride1, p += sstride) {
++                memcpy(d, p, stride1);
++            }
++            memcpy(d, p, w3);
++        }
++    }
++}
++
++// x & w in bytes but not of interleave (i.e. offset = x*2 for U&V)
++
++void FUNC(av_rpi_sand_to_planar_c)(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x = _x * 2;
++    const unsigned int w = _w * 2;
++    const unsigned int mask = stride1 - 1;
++
++#if PW == 1 && HAVE_SAND_ASM
++    if (_x == 0) {
++        ff_rpi_sand8_lines_to_planar_c8(dst_u, dst_stride_u, dst_v, dst_stride_v,
++                                     src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if ((x & ~mask) == ((x + w) & ~mask)) {
++        // All in one sand stripe
++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1) {
++            pixel * du = (pixel *)dst_u;
++            pixel * dv = (pixel *)dst_v;
++            const pixel * p = (const pixel *)p1;
++            for (unsigned int k = 0; k < w; k += 2 * PW) {
++                *du++ = *p++;
++                *dv++ = *p++;
++            }
++        }
++    }
++    else
++    {
++        // Two+ stripe
++        const unsigned int sstride = stride1 * stride2;
++        const unsigned int sstride_p = (sstride - stride1) / PW;
++
++        const uint8_t * p1 = src + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        const uint8_t * p2 = p1 + sstride - (x & mask);
++        const unsigned int w1 = stride1 - (x & mask);
++        const unsigned int w3 = (x + w) & mask;
++        const unsigned int w2 = w - (w1 + w3);
++
++        for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p1 += stride1, p2 += stride1) {
++            unsigned int j;
++            const pixel * p = (const pixel *)p1;
++            pixel * du = (pixel *)dst_u;
++            pixel * dv = (pixel *)dst_v;
++            for (unsigned int k = 0; k < w1; k += 2 * PW) {
++                *du++ = *p++;
++                *dv++ = *p++;
++            }
++            for (j = 0, p = (const pixel *)p2; j < w2; j += stride1, p += sstride_p) {
++                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
++                    *du++ = *p++;
++                    *dv++ = *p++;
++                }
++            }
++            for (unsigned int k = 0; k < w3; k += 2 * PW) {
++                *du++ = *p++;
++                *dv++ = *p++;
++            }
++        }
++    }
++}
++
++void FUNC(av_rpi_planar_to_sand_c)(uint8_t * dst_c,
++                             unsigned int stride1, unsigned int stride2,
++                             const uint8_t * src_u, const unsigned int src_stride_u,
++                             const uint8_t * src_v, const unsigned int src_stride_v,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x = _x * 2;
++    const unsigned int w = _w * 2;
++    const unsigned int mask = stride1 - 1;
++    if ((x & ~mask) == ((x + w) & ~mask)) {
++        // All in one sand stripe
++        uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1) {
++            const pixel * su = (const pixel *)src_u;
++            const pixel * sv = (const pixel *)src_v;
++            pixel * p = (pixel *)p1;
++            for (unsigned int k = 0; k < w; k += 2 * PW) {
++                *p++ = *su++;
++                *p++ = *sv++;
++            }
++        }
++    }
++    else
++    {
++        // Two+ stripe
++        const unsigned int sstride = stride1 * stride2;
++        const unsigned int sstride_p = (sstride - stride1) / PW;
++
++        const uint8_t * p1 = dst_c + (x & mask) + y * stride1 + (x & ~mask) * stride2;
++        const uint8_t * p2 = p1 + sstride - (x & mask);
++        const unsigned int w1 = stride1 - (x & mask);
++        const unsigned int w3 = (x + w) & mask;
++        const unsigned int w2 = w - (w1 + w3);
++
++        for (unsigned int i = 0; i != h; ++i, src_u += src_stride_u, src_v += src_stride_v, p1 += stride1, p2 += stride1) {
++            unsigned int j;
++            const pixel * su = (const pixel *)src_u;
++            const pixel * sv = (const pixel *)src_v;
++            pixel * p = (pixel *)p1;
++            for (unsigned int k = 0; k < w1; k += 2 * PW) {
++                *p++ = *su++;
++                *p++ = *sv++;
++            }
++            for (j = 0, p = (pixel *)p2; j < w2; j += stride1, p += sstride_p) {
++                for (unsigned int k = 0; k < stride1; k += 2 * PW) {
++                    *p++ = *su++;
++                    *p++ = *sv++;
++                }
++            }
++            for (unsigned int k = 0; k < w3; k += 2 * PW) {
++                *p++ = *su++;
++                *p++ = *sv++;
++            }
++        }
++    }
++}
++
++
++#undef pixel
++#undef STRCAT
++#undef FUNC
++
+--- /dev/null
++++ b/libavutil/rpi_sand_fns.c
+@@ -0,0 +1,356 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#include "config.h"
++#include <stdint.h>
++#include <string.h>
++#include "rpi_sand_fns.h"
++#include "avassert.h"
++#include "frame.h"
++
++#if ARCH_ARM && HAVE_NEON
++#include "arm/rpi_sand_neon.h"
++#define HAVE_SAND_ASM 1
++#elif ARCH_AARCH64 && HAVE_NEON
++#include "aarch64/rpi_sand_neon.h"
++#define HAVE_SAND_ASM 1
++#else
++#define HAVE_SAND_ASM 0
++#endif
++
++#define PW 1
++#include "rpi_sand_fn_pw.h"
++#undef PW
++
++#define PW 2
++#include "rpi_sand_fn_pw.h"
++#undef PW
++
++#if 1
++// Simple round
++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
++{
++    const unsigned int rnd = (1 << shr) >> 1;
++    const uint16_t * src = (const uint16_t *)_src;
++
++    for (; n != 0; --n) {
++        *dst++ = (*src++ + rnd) >> shr;
++    }
++}
++#else
++// Dithered variation
++static void cpy16_to_8(uint8_t * dst, const uint8_t * _src, unsigned int n, const unsigned int shr)
++{
++    unsigned int rnd = (1 << shr) >> 1;
++    const unsigned int mask = ((1 << shr) - 1);
++    const uint16_t * src = (const uint16_t *)_src;
++
++    for (; n != 0; --n) {
++        rnd = *src++ + (rnd & mask);
++        *dst++ = rnd >> shr;
++    }
++}
++#endif
++
++// Fetches a single patch - offscreen fixup not done here
++// w <= stride1
++// unclipped
++// _x & _w in pixels, strides in bytes
++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x0 = (_x / 3) * 4; // Byte offset of the word
++    const unsigned int xskip0 = _x - (x0 >> 2) * 3;
++    const unsigned int x1 = ((_x + _w) / 3) * 4;
++    const unsigned int xrem1 = _x + _w - (x1 >> 2) * 3;
++    const unsigned int mask = stride1 - 1;
++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++    if (_x == 0) {
++        ff_rpi_sand30_lines_to_planar_y16(dst, dst_stride, src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if (x0 == x1) {
++        // *******************
++        // Partial single word xfer
++        return;
++    }
++
++    for (unsigned int i = 0; i != h; ++i, dst += dst_stride, p0 += stride1)
++    {
++        unsigned int x = x0;
++        const uint32_t * p = (const uint32_t *)p0;
++        uint16_t * d = (uint16_t *)dst;
++
++        if (xskip0 != 0) {
++            const uint32_t p3 = *p++;
++
++            if (xskip0 == 1)
++                *d++ = (p3 >> 10) & 0x3ff;
++            *d++ = (p3 >> 20) & 0x3ff;
++
++            if (((x += 4) & mask) == 0)
++                p += slice_inc;
++        }
++
++        while (x != x1) {
++            const uint32_t p3 = *p++;
++            *d++ = p3 & 0x3ff;
++            *d++ = (p3 >> 10) & 0x3ff;
++            *d++ = (p3 >> 20) & 0x3ff;
++
++            if (((x += 4) & mask) == 0)
++                p += slice_inc;
++        }
++
++        if (xrem1 != 0) {
++            const uint32_t p3 = *p;
++
++            *d++ = p3 & 0x3ff;
++            if (xrem1 == 2)
++                *d++ = (p3 >> 10) & 0x3ff;
++        }
++    }
++}
++
++
++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h)
++{
++    const unsigned int x0 = (_x / 3) * 8; // Byte offset of the word
++    const unsigned int xskip0 = _x - (x0 >> 3) * 3;
++    const unsigned int x1 = ((_x + _w) / 3) * 8;
++    const unsigned int xrem1 = _x + _w - (x1 >> 3) * 3;
++    const unsigned int mask = stride1 - 1;
++    const uint8_t * p0 = src + (x0 & mask) + y * stride1 + (x0 & ~mask) * stride2;
++    const unsigned int slice_inc = ((stride2 - 1) * stride1) >> 2;  // RHS of a stripe to LHS of next in words
++
++#if HAVE_SAND_ASM
++    if (_x == 0) {
++        ff_rpi_sand30_lines_to_planar_c16(dst_u, dst_stride_u, dst_v, dst_stride_v,
++                                       src, stride1, stride2, _x, y, _w, h);
++        return;
++    }
++#endif
++
++    if (x0 == x1) {
++        // *******************
++        // Partial single word xfer
++        return;
++    }
++
++    for (unsigned int i = 0; i != h; ++i, dst_u += dst_stride_u, dst_v += dst_stride_v, p0 += stride1)
++    {
++        unsigned int x = x0;
++        const uint32_t * p = (const uint32_t *)p0;
++        uint16_t * du = (uint16_t *)dst_u;
++        uint16_t * dv = (uint16_t *)dst_v;
++
++        if (xskip0 != 0) {
++            const uint32_t p3a = *p++;
++            const uint32_t p3b = *p++;
++
++            if (xskip0 == 1)
++            {
++                *du++ = (p3a >> 20) & 0x3ff;
++                *dv++ = (p3b >>  0) & 0x3ff;
++            }
++            *du++ = (p3b >> 10) & 0x3ff;
++            *dv++ = (p3b >> 20) & 0x3ff;
++
++            if (((x += 8) & mask) == 0)
++                p += slice_inc;
++        }
++
++        while (x != x1) {
++            const uint32_t p3a = *p++;
++            const uint32_t p3b = *p++;
++
++            *du++ = p3a & 0x3ff;
++            *dv++ = (p3a >> 10) & 0x3ff;
++            *du++ = (p3a >> 20) & 0x3ff;
++            *dv++ = p3b & 0x3ff;
++            *du++ = (p3b >> 10) & 0x3ff;
++            *dv++ = (p3b >> 20) & 0x3ff;
++
++            if (((x += 8) & mask) == 0)
++                p += slice_inc;
++        }
++
++        if (xrem1 != 0) {
++            const uint32_t p3a = *p++;
++            const uint32_t p3b = *p++;
++
++            *du++ = p3a & 0x3ff;
++            *dv++ = (p3a >> 10) & 0x3ff;
++            if (xrem1 == 2)
++            {
++                *du++ = (p3a >> 20) & 0x3ff;
++                *dv++ = p3b & 0x3ff;
++            }
++        }
++    }
++}
++
++
++// w/h in pixels
++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
++                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
++                         unsigned int w, unsigned int h, const unsigned int shr)
++{
++    const unsigned int n = dst_stride1 / 2;
++    unsigned int j;
++
++    // This is true for our current layouts
++    av_assert0(dst_stride1 == src_stride1);
++
++    // As we have the same stride1 for src & dest and src is wider than dest
++    // then if we loop on src we can always write contiguously to dest
++    // We make no effort to copy an exact width - round up to nearest src stripe
++    // as we will always have storage in dest for that
++
++#if ARCH_ARM && HAVE_NEON
++    if (shr == 3 && src_stride1 == 128) {
++        for (j = 0; j + n < w; j += dst_stride1) {
++            uint8_t * d = dst + j * dst_stride2;
++            const uint8_t * s1 = src + j * 2 * src_stride2;
++            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
++
++            ff_rpi_sand128b_stripe_to_8_10(d, s1, s2, h);
++        }
++    }
++    else
++#endif
++    {
++        for (j = 0; j + n < w; j += dst_stride1) {
++            uint8_t * d = dst + j * dst_stride2;
++            const uint8_t * s1 = src + j * 2 * src_stride2;
++            const uint8_t * s2 = s1 + src_stride1 * src_stride2;
++
++            for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, s2 += src_stride1, d += dst_stride1) {
++                cpy16_to_8(d, s1, n, shr);
++                cpy16_to_8(d + n, s2, n, shr);
++            }
++        }
++    }
++
++    // Fix up a trailing dest half stripe
++    if (j < w) {
++        uint8_t * d = dst + j * dst_stride2;
++        const uint8_t * s1 = src + j * 2 * src_stride2;
++
++        for (unsigned int i = 0; i != h; ++i, s1 += src_stride1, d += dst_stride1) {
++            cpy16_to_8(d, s1, n, shr);
++        }
++    }
++}
++
++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src)
++{
++    const int w = av_frame_cropped_width(src);
++    const int h = av_frame_cropped_height(src);
++    const int x = src->crop_left;
++    const int y = src->crop_top;
++
++    // We will crop as part of the conversion
++    dst->crop_top = 0;
++    dst->crop_left = 0;
++    dst->crop_bottom = 0;
++    dst->crop_right = 0;
++
++    switch (src->format){
++        case AV_PIX_FMT_SAND128:
++        case AV_PIX_FMT_RPI4_8:
++            switch (dst->format){
++                case AV_PIX_FMT_YUV420P:
++                    av_rpi_sand_to_planar_y8(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y, w, h);
++                    av_rpi_sand_to_planar_c8(dst->data[1], dst->linesize[1],
++                                             dst->data[2], dst->linesize[2],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x/2, y/2,  w/2, h/2);
++                    break;
++                default:
++                    return -1;
++            }
++            break;
++        case AV_PIX_FMT_SAND64_10:
++            switch (dst->format){
++                case AV_PIX_FMT_YUV420P10:
++                    av_rpi_sand_to_planar_y16(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x*2, y, w*2, h);
++                    av_rpi_sand_to_planar_c16(dst->data[1], dst->linesize[1],
++                                             dst->data[2], dst->linesize[2],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y/2,  w, h/2);
++                    break;
++                default:
++                    return -1;
++            }
++            break;
++        case AV_PIX_FMT_RPI4_10:
++            switch (dst->format){
++                case AV_PIX_FMT_YUV420P10:
++                    av_rpi_sand30_to_planar_y16(dst->data[0], dst->linesize[0],
++                                             src->data[0],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x, y, w, h);
++                    av_rpi_sand30_to_planar_c16(dst->data[1], dst->linesize[1],
++                                             dst->data[2], dst->linesize[2],
++                                             src->data[1],
++                                             av_rpi_sand_frame_stride1(src), av_rpi_sand_frame_stride2(src),
++                                             x/2, y/2, w/2, h/2);
++                    break;
++                default:
++                    return -1;
++            }
++            break;
++        default:
++            return -1;
++    }
++
++    return av_frame_copy_props(dst, src);
++}
+--- /dev/null
++++ b/libavutil/rpi_sand_fns.h
+@@ -0,0 +1,183 @@
++/*
++Copyright (c) 2018 Raspberry Pi (Trading) Ltd.
++All rights reserved.
++
++Redistribution and use in source and binary forms, with or without
++modification, are permitted provided that the following conditions are met:
++    * Redistributions of source code must retain the above copyright
++      notice, this list of conditions and the following disclaimer.
++    * Redistributions in binary form must reproduce the above copyright
++      notice, this list of conditions and the following disclaimer in the
++      documentation and/or other materials provided with the distribution.
++    * Neither the name of the copyright holder nor the
++      names of its contributors may be used to endorse or promote products
++      derived from this software without specific prior written permission.
++
++THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
++ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
++WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY
++DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
++(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
++LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
++ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
++(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++
++Authors: John Cox
++*/
++
++#ifndef AVUTIL_RPI_SAND_FNS
++#define AVUTIL_RPI_SAND_FNS
++
++#include "libavutil/frame.h"
++
++// For all these fns _x & _w are measured as coord * PW
++// For the C fns coords are in chroma pels (so luma / 2)
++// Strides are in bytes
++
++void av_rpi_sand_to_planar_y8(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++void av_rpi_sand_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++void av_rpi_sand_to_planar_c8(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++void av_rpi_sand_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++void av_rpi_planar_to_sand_c8(uint8_t * dst_c,
++                             unsigned int stride1, unsigned int stride2,
++                             const uint8_t * src_u, const unsigned int src_stride_u,
++                             const uint8_t * src_v, const unsigned int src_stride_v,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++void av_rpi_planar_to_sand_c16(uint8_t * dst_c,
++                             unsigned int stride1, unsigned int stride2,
++                             const uint8_t * src_u, const unsigned int src_stride_u,
++                             const uint8_t * src_v, const unsigned int src_stride_v,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++void av_rpi_sand30_to_planar_y16(uint8_t * dst, const unsigned int dst_stride,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++void av_rpi_sand30_to_planar_c16(uint8_t * dst_u, const unsigned int dst_stride_u,
++                             uint8_t * dst_v, const unsigned int dst_stride_v,
++                             const uint8_t * src,
++                             unsigned int stride1, unsigned int stride2,
++                             unsigned int _x, unsigned int y,
++                             unsigned int _w, unsigned int h);
++
++
++// w/h in pixels
++void av_rpi_sand16_to_sand8(uint8_t * dst, const unsigned int dst_stride1, const unsigned int dst_stride2,
++                         const uint8_t * src, const unsigned int src_stride1, const unsigned int src_stride2,
++                         unsigned int w, unsigned int h, const unsigned int shr);
++
++
++// dst must contain required pixel format & allocated data buffers
++// Cropping on the src buffer will be honoured and dst crop will be set to zero
++int av_rpi_sand_to_planar_frame(AVFrame * const dst, const AVFrame * const src);
++
++
++static inline unsigned int av_rpi_sand_frame_stride1(const AVFrame * const frame)
++{
++#ifdef RPI_ZC_SAND128_ONLY
++    // If we are sure we only only support 128 byte sand formats replace the
++    // var with a constant which should allow for better optimisation
++    return 128;
++#else
++    return frame->linesize[0];
++#endif
++}
++
++static inline unsigned int av_rpi_sand_frame_stride2(const AVFrame * const frame)
++{
++    return frame->linesize[3];
++}
++
++
++static inline int av_rpi_is_sand_format(const int format)
++{
++    return (format >= AV_PIX_FMT_SAND128 && format <= AV_PIX_FMT_RPI4_10);
++}
++
++static inline int av_rpi_is_sand_frame(const AVFrame * const frame)
++{
++    return av_rpi_is_sand_format(frame->format);
++}
++
++static inline int av_rpi_is_sand8_frame(const AVFrame * const frame)
++{
++    return (frame->format == AV_PIX_FMT_SAND128 || frame->format == AV_PIX_FMT_RPI4_8);
++}
++
++static inline int av_rpi_is_sand16_frame(const AVFrame * const frame)
++{
++    return (frame->format >= AV_PIX_FMT_SAND64_10 && frame->format <= AV_PIX_FMT_SAND64_16);
++}
++
++static inline int av_rpi_is_sand30_frame(const AVFrame * const frame)
++{
++    return (frame->format == AV_PIX_FMT_RPI4_10);
++}
++
++static inline int av_rpi_sand_frame_xshl(const AVFrame * const frame)
++{
++    return av_rpi_is_sand8_frame(frame) ? 0 : 1;
++}
++
++// If x is measured in bytes (not pixels) then this works for sand64_16 as
++// well as sand128 - but in the general case we work that out
++
++static inline unsigned int av_rpi_sand_frame_off_y(const AVFrame * const frame, const unsigned int x_y, const unsigned int y)
++{
++    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
++    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
++    const unsigned int x = x_y << av_rpi_sand_frame_xshl(frame);
++    const unsigned int x1 = x & (stride1 - 1);
++    const unsigned int x2 = x ^ x1;
++
++    return x1 + stride1 * y + stride2 * x2;
++}
++
++static inline unsigned int av_rpi_sand_frame_off_c(const AVFrame * const frame, const unsigned int x_c, const unsigned int y_c)
++{
++    const unsigned int stride1 = av_rpi_sand_frame_stride1(frame);
++    const unsigned int stride2 = av_rpi_sand_frame_stride2(frame);
++    const unsigned int x = x_c << (av_rpi_sand_frame_xshl(frame) + 1);
++    const unsigned int x1 = x & (stride1 - 1);
++    const unsigned int x2 = x ^ x1;
++
++    return x1 + stride1 * y_c + stride2 * x2;
++}
++
++static inline uint8_t * av_rpi_sand_frame_pos_y(const AVFrame * const frame, const unsigned int x, const unsigned int y)
++{
++    return frame->data[0] + av_rpi_sand_frame_off_y(frame, x, y);
++}
++
++static inline uint8_t * av_rpi_sand_frame_pos_c(const AVFrame * const frame, const unsigned int x, const unsigned int y)
++{
++    return frame->data[1] + av_rpi_sand_frame_off_c(frame, x, y);
++}
++
++#endif
++
+--- /dev/null
++++ b/pi-util/BUILD.txt
+@@ -0,0 +1,29 @@
++Building Pi FFmpeg
++==================
++
++Configuration:
++=============
++
++These instructions work for cross compiles from Ubuntu 16.04 & Ubuntu
++18.04. I would expect most other linux environments to work but I haven't
++tried them.
++
++pi-util/conf_pi2.sh
++
++contains suitable options to build the code for Pi2/3.  It expects to find
++git clones of
++
++https://github.com/raspberrypi/tools
++https://github.com/raspberrypi/firmware
++
++in the parent of the FFmpeg directory.  I recommend using --depth 1 to avoid a
++lot of history you don't want.
++
++If you have a copy of qasm.py in ../local/bin then the .qasm sources will be
++rebuilt.  Otherwise the prebuilt .c & .h files will be used.
++Likewise ../local/bin/vasmvidcore_std will enable VPU code rebuild
++
++pi-util/conf_p1.sh should configure for Pi1.  Beware that as of this time
++H265 QPU acceleration is broken on Pi1 and so it is disabled.
++
++
+--- /dev/null
++++ b/pi-util/NOTES.txt
+@@ -0,0 +1,69 @@
++Notes on the hevc_rpi decoder & associated support code
++-------------------------------------------------------
++
++There are 3 main parts to the existing code:
++
++1) The decoder - this is all in libavcodec as rpi_hevc*.
++
++2) A few filters to deal with Sand frames and a small patch to
++automatically select the sand->i420 converter when required.
++
++3) A kludge in ffmpeg.c to display the decoded video. This could & should
++be converted into a proper ffmpeg display module.
++
++
++Decoder
++-------
++
++The decoder is a modified version of the existing ffmpeg hevc decoder.
++Generally it is ~100% faster than the existing ffmpeg hevc s/w decoder.
++More complex bitstreams can be up to ~200% faster but particularly easy
++streams can cut its advantage down to ~50%.  This means that a Pi3+ can
++display nearly all 8-bit 1080p30 streams and with some overclocking it can
++display most lower bitrate 10-bit 1080p30 streams - this latter case is
++not helped by the requirement to downsample to 8-bit before display on a
++Pi.
++
++It has had co-processor offload added for inter-pred and large block
++residual transform.  Various parts have had optimized ARM NEON assembler
++added and the existing ARM asm sections have been profiled and
++re-optimized for A53. The main C code has been substantially reworked at
++its lower levels in an attempt to optimize it and minimize memory
++bandwidth. To some extent code paths that deal with frame types that it
++doesn't support have been pruned.
++
++It outputs frames in Broadcom Sand format. This is a somewhat annoying
++layout that doesn't fit into ffmpegs standard frame descriptions. It has
++vertical stripes of 128 horizontal pixels (64 in 10 bit forms) with Y for
++the stripe followed by interleaved U & V, that is then followed by the Y
++for the next stripe, etc. The final stripe is always padded to
++stripe-width. This is used in an attempt to help with cache locality and
++cut down on the number of dram bank switches. It is annoying to use for
++inter-pred with conventional processing but the way the Pi QPU (which is
++used for inter-pred) works means that it has negligible downsides here and
++the improved memory performance exceeds the overhead of the increased
++complexity in the rest of the code.
++
++Frames must be allocated out of GPU memory (as otherwise they can't be
++accessed by the co-processors). Utility functions (in rpi_zc.c) have been
++written to make this easier. As the frames are already in GPU memory they
++can be displayed by the Pi h/w without any further copying.
++
++
++Known non-features
++------------------
++
++Frame allocation should probably be done in some other way in order to fit
++into the standard framework better.
++
++Sand frames are currently declared as software frames, there is an
++argument that they should be hardware frames but they aren't really.
++
++There must be a better way of auto-selecting the hevc_rpi decoder over the
++normal s/w hevc decoder, but I became confused by the existing h/w
++acceleration framework and what I wanted to do didn't seem to fit in
++neatly.
++
++Display should be a proper device rather than a kludge in ffmpeg.c
++
++
+--- /dev/null
++++ b/pi-util/TESTMESA.txt
+@@ -0,0 +1,82 @@
++# Setup & Build instructions for testing Argon30 mesa support (on Pi4)
++
++# These assume that the drm_mmal test for Sand8 has been built on this Pi
++# as build relies on many of the same files
++
++# 1st get everything required to build ffmpeg
++# If sources aren't already enabled on your Pi then enable them
++sudo su
++sed "s/#deb-src/deb-src/" /etc/apt/sources.list > /tmp/sources.list
++sed "s/#deb-src/deb-src/" /etc/apt/sources.list.d/raspi.list > /tmp/raspi.list
++mv /tmp/sources.list /etc/apt/
++mv /tmp/raspi.list /etc/apt/sources.list.d/
++apt update
++
++# Get dependancies
++sudo apt build-dep ffmpeg
++
++sudo apt install meson libepoxy-dev libxcb-dri3-dev libxcb1-dev libx11-dev libx11-xcb-dev libdrm-dev
++
++# Enable H265 V4L2 request decoder
++sudo su
++echo dtoverlay=rpivid-v4l2 >> /boot/config.txt
++# You may also want to add more CMA if you are going to try 4k videos
++# Change the dtoverlay=vc4-fkms-v3d line in config.txt to read
++# dtoverlay=vc4-fkms-v3d,cma-512
++reboot
++# Check it has turned up
++ls -la /dev/video*
++# This should include video19
++# crw-rw----+ 1 root video 81, 7 Aug  4 17:25 /dev/video19
++
++# Currently on the Pi the linux headers from the debian distro don't match
++# the kernel that we ship and we need to update them - hopefully this step
++# will be unneeded in the future
++sudo apt install git bc bison flex libssl-dev make
++git clone --depth=1 https://github.com/raspberrypi/linux --branch rpi-5.10.y
++cd linux
++KERNEL=kernel7l
++make bcm2711_defconfig
++make headers_install
++sudo cp -r usr/include/linux /usr/include
++cd ..
++
++# Config - this builds a staticly linked ffmpeg which is easier for testing
++pi-util/conf_native.sh --noshared
++
++# Build (this is a bit dull)
++# If you want to poke the source the libavdevice/egl_vout.c contains the
++# output code -
++cd out/armv7-static-rel
++
++# Check that you have actually configured V4L2 request
++grep HEVC_V4L2REQUEST config.h
++# You are hoping for
++# #define CONFIG_HEVC_V4L2REQUEST_HWACCEL 1
++# if you get 0 then the config has failed
++
++make -j6
++
++# Grab test streams
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-h264.mkv
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc.mkv
++wget http://www.jell.yfish.us/media/jellyfish-3-mbps-hd-hevc-10bit.mkv
++
++# Test i420 output (works currently)
++./ffmpeg -no_cvt_hw -vcodec h264_v4l2m2m -i jellyfish-3-mbps-hd-h264.mkv -f vout_egl -
++
++# Test Sand8 output - doesn't currently work but should once you have
++# Sand8 working in drm_mmal. I can't guarantee that this will work as
++# I can't test this path with a known working format, but the debug looks
++# good.  If this doesn't work & drm_mmal does with sand8 then come back to me
++# The "show_all 1" forces vout to display every frame otherwise it drops any
++# frame that would cause it to block
++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc.mkv -show_all 1 -f vout_egl -
++
++# Test Sand30 - doesn't currently work
++# (Beware that when FFmpeg errors out it often leaves your teminal window
++# in a state where you need to reset it)
++./ffmpeg -no_cvt_hw -hwaccel drm -vcodec hevc -i jellyfish-3-mbps-hd-hevc-10bit.mkv -f vout_egl -
++
++
++
+--- /dev/null
++++ b/pi-util/clean_usr_libs.sh
+@@ -0,0 +1,23 @@
++set -e
++U=/usr/lib/arm-linux-gnueabihf
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavresample.*
++rm -f $U/libavutil.*
++U=/usr/lib/arm-linux-gnueabihf/neon/vfp
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavresample.*
++rm -f $U/libavutil.*
++U=/usr/lib/aarch64-linux-gnu
++rm -f $U/libavcodec.*
++rm -f $U/libavdevice.*
++rm -f $U/libavfilter.*
++rm -f $U/libavformat.*
++rm -f $U/libavresample.*
++rm -f $U/libavutil.*
++
+--- /dev/null
++++ b/pi-util/conf_arm64_native.sh
+@@ -0,0 +1,45 @@
++echo "Configure for ARM64 native build"
++
++#RPI_KEEPS="-save-temps=obj"
++
++SHARED_LIBS="--enable-shared"
++if [ "$1" == "--noshared" ]; then
++  SHARED_LIBS="--disable-shared"
++  echo Static libs
++  OUT=out/arm64-static-rel
++else
++  echo Shared libs
++  OUT=out/arm64-shared-rel
++fi
++
++mkdir -p $OUT
++cd $OUT
++
++A=aarch64-linux-gnu
++USR_PREFIX=`pwd`/install
++LIB_PREFIX=$USR_PREFIX/lib/$A
++INC_PREFIX=$USR_PREFIX/include/$A
++
++../../configure \
++ --prefix=$USR_PREFIX\
++ --libdir=$LIB_PREFIX\
++ --incdir=$INC_PREFIX\
++ --disable-stripping\
++ --disable-thumb\
++ --disable-mmal\
++ --enable-sand\
++ --enable-v4l2-request\
++ --enable-libdrm\
++ --enable-epoxy\
++ --enable-libudev\
++ --enable-vout-drm\
++ --enable-vout-egl\
++ $SHARED_LIBS\
++ --extra-cflags="-ggdb"
++
++# --enable-decoder=hevc_rpi\
++# --enable-extra-warnings\
++# --arch=armv71\
++
++# gcc option for getting asm listing
++# -Wa,-ahls
+--- /dev/null
++++ b/pi-util/conf_h265.2016.csv
+@@ -0,0 +1,195 @@
++1,HEVC_v1/AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5,8
++1,HEVC_v1/AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5,8
++1,HEVC_v1/AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5,8
++1,HEVC_v1/AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5,8
++1,HEVC_v1/AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5,8
++1,HEVC_v1/AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5,8
++1,HEVC_v1/BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5,8
++1,HEVC_v1/CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5,8
++1,HEVC_v1/CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5,8
++1,HEVC_v1/CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5,8
++1,HEVC_v1/CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5,8
++1,HEVC_v1/CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5,8
++1,HEVC_v1/cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5,8
++1,HEVC_v1/CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5,8
++1,HEVC_v1/CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5,8
++1,HEVC_v1/DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5,10
++1,HEVC_v1/DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5,8
++1,HEVC_v1/DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5,8
++1,HEVC_v1/DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5,8
++1,HEVC_v1/DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5,8
++1,HEVC_v1/DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5,8
++1,HEVC_v1/DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5,8
++1,HEVC_v1/DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5,8
++1,HEVC_v1/ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5,8
++1,HEVC_v1/ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5,8
++1,HEVC_v1/ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5,8
++1,HEVC_v1/EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5,8
++1,HEVC_v1/FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5,8
++1,HEVC_v1/HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5,8
++1,HEVC_v1/INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5,8
++1,HEVC_v1/INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5,10
++1,HEVC_v1/ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5,8
++1,HEVC_v1/ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5,8
++1,HEVC_v1/ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5,8
++1,HEVC_v1/ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5,8
++1,HEVC_v1/ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5,8
++1,HEVC_v1/IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5,8
++1,HEVC_v1/IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5,8
++1,HEVC_v1/IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5,8
++1,HEVC_v1/LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5,8
++1,HEVC_v1/LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5,8
++1,HEVC_v1/LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5,8
++1,HEVC_v1/MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5,8
++1,HEVC_v1/MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5,8
++1,HEVC_v1/MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5,8
++1,HEVC_v1/MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5,8
++1,HEVC_v1/MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5,8
++1,HEVC_v1/MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5,8
++1,HEVC_v1/MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5,8
++1,HEVC_v1/MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5,8
++1,HEVC_v1/MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5,8
++1,HEVC_v1/MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5,8
++1,HEVC_v1/MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5,8
++1,HEVC_v1/MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5,8
++1,HEVC_v1/MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5,8
++1,HEVC_v1/NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5,8
++1,HEVC_v1/NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5,8
++1,HEVC_v1/NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5,8
++1,HEVC_v1/OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5,8
++1,HEVC_v1/OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5,8
++1,HEVC_v1/OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5,8
++1,HEVC_v1/PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5,8
++1,HEVC_v1/PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5,8
++1,HEVC_v1/PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5,8
++1,HEVC_v1/PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5,8
++1,HEVC_v1/PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5,8
++1,HEVC_v1/PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5,8
++1,HEVC_v1/PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5,8
++1,HEVC_v1/POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5,8
++1,HEVC_v1/PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5,8
++1,HEVC_v1/PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5,8
++1,HEVC_v1/RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5,8
++1,HEVC_v1/RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5,8
++1,HEVC_v1/RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5,8
++1,HEVC_v1/RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5,8
++1,HEVC_v1/RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5,8
++1,HEVC_v1/RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5,8
++1,HEVC_v1/RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5,8
++1,HEVC_v1/RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5,8
++1,HEVC_v1/RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5,8
++1,HEVC_v1/RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5,8
++1,HEVC_v1/RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5,8
++1,HEVC_v1/RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5,8
++1,HEVC_v1/RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5,8
++1,HEVC_v1/RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5,8
++1,HEVC_v1/RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5,8
++1,HEVC_v1/RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5,8
++1,HEVC_v1/RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5,8
++1,HEVC_v1/SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5,8
++1,HEVC_v1/SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5,8
++1,HEVC_v1/SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5,8
++1,HEVC_v1/SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5,8
++1,HEVC_v1/SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5,8
++1,HEVC_v1/SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5,8
++1,HEVC_v1/SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5,8
++1,HEVC_v1/SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5,8
++1,HEVC_v1/SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt,8
++1,HEVC_v1/SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt,8
++1,HEVC_v1/SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5,8
++1,HEVC_v1/SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5,8
++1,HEVC_v1/SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5,8
++1,HEVC_v1/SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5,8
++1,HEVC_v1/SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5,8
++1,HEVC_v1/SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5,8
++1,HEVC_v1/SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5,8
++1,HEVC_v1/STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5,8
++1,HEVC_v1/STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5,8
++1,HEVC_v1/TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5,8
++1,HEVC_v1/TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5,8
++1,HEVC_v1/TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5,8
++1,HEVC_v1/TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5,8
++1,HEVC_v1/TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5,8
++1,HEVC_v1/TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5,8
++3,HEVC_v1/TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth,10
++1,HEVC_v1/TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5,8
++1,HEVC_v1/VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5,8
++3,HEVC_v1/VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???,8
++1,HEVC_v1/WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5,10
++1,HEVC_v1/WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5,8
++1,HEVC_v1/WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5,8
++1,HEVC_v1/WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5,10
++1,HEVC_v1/WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5,8
++1,HEVC_v1/WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5,10
++1,HEVC_v1/WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5,8
++1,RExt/ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_2.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_yuv_2.md5,0
++0,RExt/Bitdepth_A_RExt_Sony_1,Bitdepth_A_RExt_Sony_1.bin,md5sum.txt,8
++0,RExt/Bitdepth_B_RExt_Sony_1,Bitdepth_B_RExt_Sony_1.bin,md5sum.txt,8
++0,RExt/CCP_10bit_RExt_QCOM,CCP_10bit_RExt_QCOM.bin,CCP_10bit_RExt_QCOM_md5sum.txt,10
++0,RExt/CCP_12bit_RExt_QCOM,CCP_12bit_RExt_QCOM.bin,CCP_12bit_RExt_QCOM_md5sum.txt,8
++0,RExt/CCP_8bit_RExt_QCOM,CCP_8bit_RExt_QCOM.bin,CCP_8bit_RExt_QCOM_md5sum.txt,8
++1,RExt/ExplicitRdpcm_A_BBC_1,ExplicitRdpcm_A_BBC_1.bit,md5sum.txt,0
++0,RExt/ExplicitRdpcm_B_BBC_2,ExplicitRdpcm_B_BBC_1.bit,md5sum.txt,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_HIGHTHROUGHPUT_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_10BIT_RExt_Sony_1.md5,10
++0,RExt/EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_12BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_16BIT_RExt_Sony_1.md5,8
++0,RExt/EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.bit,EXTPREC_MAIN_444_16_INTRA_8BIT_RExt_Sony_1.md5,8
++1,RExt/GENERAL_10b_420_RExt_Sony_1,GENERAL_10b_420_RExt_Sony_1.bit,GENERAL_10b_420_RExt_Sony_1.md5,10
++1,RExt/GENERAL_10b_422_RExt_Sony_1,GENERAL_10b_422_RExt_Sony_1.bit,GENERAL_10b_422_RExt_Sony_1.md5,0
++1,RExt/GENERAL_10b_444_RExt_Sony_2,GENERAL_10b_444_RExt_Sony_2.bit,GENERAL_10b_444_RExt_Sony_2.md5,0
++1,RExt/GENERAL_12b_400_RExt_Sony_1,GENERAL_12b_400_RExt_Sony_1.bit,GENERAL_12b_400_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_420_RExt_Sony_1,GENERAL_12b_420_RExt_Sony_1.bit,GENERAL_12b_420_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_422_RExt_Sony_1,GENERAL_12b_422_RExt_Sony_1.bit,GENERAL_12b_422_RExt_Sony_1.md5,0
++1,RExt/GENERAL_12b_444_RExt_Sony_2,GENERAL_12b_444_RExt_Sony_2.bit,GENERAL_12b_444_RExt_Sony_2.md5,0
++0,RExt/GENERAL_16b_400_RExt_Sony_1,GENERAL_16b_400_RExt_Sony_1.bit,GENERAL_16b_400_RExt_Sony_1.md5,0
++0,RExt/GENERAL_16b_444_highThroughput_RExt_Sony_2,GENERAL_16b_444_highThroughput_RExt_Sony_2.bit,GENERAL_16b_444_highThroughput_RExt_Sony_2.md5,8
++0,RExt/GENERAL_16b_444_RExt_Sony_2,GENERAL_16b_444_RExt_Sony_2.bit,GENERAL_16b_444_RExt_Sony_2.md5,8
++1,RExt/GENERAL_8b_400_RExt_Sony_1,GENERAL_8b_400_RExt_Sony_1.bit,GENERAL_8b_400_RExt_Sony_1.md5,0
++1,RExt/GENERAL_8b_420_RExt_Sony_1,GENERAL_8b_420_RExt_Sony_1.bit,GENERAL_8b_420_RExt_Sony_1.md5,8
++1,RExt/GENERAL_8b_444_RExt_Sony_2,GENERAL_8b_444_RExt_Sony_2.bit,GENERAL_8b_444_RExt_Sony_2.md5,0
++1,RExt/IPCM_A_RExt_NEC_2,IPCM_A_RExt_NEC_2.bit,IPCM_A_RExt_NEC_2_yuv.md5,0
++1,RExt/IPCM_B_RExt_NEC,IPCM_B_RExt_NEC.bit,IPCM_B_RExt_NEC_yuv.md5,0
++1,RExt/Main_422_10_A_RExt_Sony_2,Main_422_10_A_RExt_Sony_2.bin,md5sum.txt,0
++1,RExt/Main_422_10_B_RExt_Sony_2,Main_422_10_B_RExt_Sony_2.bin,md5sum.txt,0
++1,RExt/PERSIST_RPARAM_A_RExt_Sony_3,PERSIST_RPARAM_A_RExt_Sony_3.bit,PERSIST_RPARAM_A_RExt_Sony_3.md5,0
++1,RExt/QMATRIX_A_RExt_Sony_1,QMATRIX_A_RExt_Sony_1.bit,QMATRIX_A_RExt_Sony_1.md5,0
++0,RExt/SAO_A_RExt_MediaTek_1,SAO_A_RExt_MediaTek_1.bit,SAO_A_RExt_MediaTek_1.md5, # Runs out of memory - could be fixed,8
++0,RExt/TSCTX_10bit_I_RExt_SHARP_1,TSCTX_10bit_I_RExt_SHARP_1.bin,TSCTX_10bit_I_RExt_SHARP_1.md5,10
++0,RExt/TSCTX_10bit_RExt_SHARP_1,TSCTX_10bit_RExt_SHARP_1.bin,TSCTX_10bit_RExt_SHARP_1.md5,10
++0,RExt/TSCTX_12bit_I_RExt_SHARP_1,TSCTX_12bit_I_RExt_SHARP_1.bin,TSCTX_12bit_I_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_12bit_RExt_SHARP_1,TSCTX_12bit_RExt_SHARP_1.bin,TSCTX_12bit_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_8bit_I_RExt_SHARP_1,TSCTX_8bit_I_RExt_SHARP_1.bin,TSCTX_8bit_I_RExt_SHARP_1.md5,8
++0,RExt/TSCTX_8bit_RExt_SHARP_1,TSCTX_8bit_RExt_SHARP_1.bin,TSCTX_8bit_RExt_SHARP_1.md5,8
++0,RExt/WAVETILES_RExt_Sony_2,WAVETILES_RExt_Sony_2.bit,WAVETILES_RExt_Sony_2.md5,8
++1,local/sao_cu16_mobile_344x280,sao_cu16_mobile_344x280.265,sao_cu16_mobile_344x280.md5,8
++1,local/dblk_cu16_mobile_344x280,dblk_cu16_mobile_344x280.265,dblk_cu16_mobile_344x280.md5,8
++1,local/dblksao_cu16_mobile_344x280,dblksao_cu16_mobile_344x280.265,dblksao_cu16_mobile_344x280.md5,8
++1,local/dblk_pu32_horses_832x448,dblk_pu32_horses_832x448.265,dblk_pu32_horses_832x448.md5,8
++1,local/intra_pred_21_laps,intra_pred_21_laps.265,intra_pred_21_laps.md5,8
+--- /dev/null
++++ b/pi-util/conf_h265.2016_HEVC_v1.csv
+@@ -0,0 +1,147 @@
++1,AMP_A_Samsung_7,AMP_A_Samsung_7.bin,AMP_A_Samsung_7.md5
++1,AMP_B_Samsung_7,AMP_B_Samsung_7.bin,AMP_B_Samsung_7.md5
++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
++1,AMVP_C_Samsung_7,AMVP_C_Samsung_7.bin,AMVP_C_Samsung_7.md5
++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
++1,DBLK_A_MAIN10_VIXS_4,DBLK_A_MAIN10_VIXS_4.bit,DBLK_A_MAIN10_VIXS_4.md5
++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
++1,MAXBINS_A_TI_5,MAXBINS_A_TI_5.bit,MAXBINS_A_TI_5_yuv.md5
++1,MAXBINS_B_TI_5,MAXBINS_B_TI_5.bit,MAXBINS_B_TI_5_yuv.md5
++1,MAXBINS_C_TI_5,MAXBINS_C_TI_5.bit,MAXBINS_C_TI_5_yuv.md5
++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
++1,SAO_H_Parabola_1,SAO_H_Parabola_1.bit,SAO_H_Parabola_1.md5
++2,SAODBLK_A_MainConcept_4,SAODBLK_A_MainConcept_4.bin,SAODBLK_A_MainConcept_4_md5.txt
++2,SAODBLK_B_MainConcept_4,SAODBLK_B_MainConcept_4.bin,SAODBLK_B_MainConcept_4_md5.txt
++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
++1,SLIST_A_Sony_5,SLIST_A_Sony_5.bin,SLIST_A_Sony_5_yuv.md5
++1,SLIST_B_Sony_9,SLIST_B_Sony_9.bin,SLIST_B_Sony_9_yuv.md5
++1,SLIST_C_Sony_4,SLIST_C_Sony_4.bin,SLIST_C_Sony_4_yuv.md5
++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
++1,STRUCT_A_Samsung_7,STRUCT_A_Samsung_7.bin,STRUCT_A_Samsung_7.md5
++1,STRUCT_B_Samsung_7,STRUCT_B_Samsung_7.bin,STRUCT_B_Samsung_7.md5
++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
++3,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # unequal bit depth
++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
++3,VPSSPSPPS_A_MainConcept_1,VPSSPSPPS_A_MainConcept_1.bin,VPSSPSPPS_A_MainConcept_1_md5.txt, # ???
++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
+--- /dev/null
++++ b/pi-util/conf_h265.csv
+@@ -0,0 +1,144 @@
++1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.bit,ADJUST_IPRED_ANGLE_A_RExt_Mitsubishi_1.md5
++1,AMP_A_Samsung_6,AMP_A_Samsung_6.bin,AMP_A_Samsung_6.md5
++1,AMP_B_Samsung_6,AMP_B_Samsung_6.bin,AMP_B_Samsung_6.md5
++1,AMP_D_Hisilicon_3,AMP_D_Hisilicon.bit,AMP_D_Hisilicon_3.yuv.md5
++1,AMP_E_Hisilicon_3,AMP_E_Hisilicon.bit,AMP_E_Hisilicon_3.yuv.md5
++1,AMP_F_Hisilicon_3,AMP_F_Hisilicon_3.bit,AMP_F_Hisilicon_3.yuv.md5
++1,AMVP_A_MTK_4,AMVP_A_MTK_4.bit,AMVP_A_MTK_4.md5
++1,AMVP_B_MTK_4,AMVP_B_MTK_4.bit,AMVP_B_MTK_4.md5
++1,AMVP_C_Samsung_6,AMVP_C_Samsung_6.bin,AMVP_C_Samsung_6.md5
++1,BUMPING_A_ericsson_1,BUMPING_A_ericsson_1.bit,BUMPING_A_ericsson_1.md5
++1,CAINIT_A_SHARP_4,CAINIT_A_SHARP_4.bit,CAINIT_A_SHARP_4.md5
++1,CAINIT_B_SHARP_4,CAINIT_B_SHARP_4.bit,CAINIT_B_SHARP_4.md5
++1,CAINIT_C_SHARP_3,CAINIT_C_SHARP_3.bit,CAINIT_C_SHARP_3.md5
++1,CAINIT_D_SHARP_3,CAINIT_D_SHARP_3.bit,CAINIT_D_SHARP_3.md5
++1,CAINIT_E_SHARP_3,CAINIT_E_SHARP_3.bit,CAINIT_E_SHARP_3.md5
++1,CAINIT_F_SHARP_3,CAINIT_F_SHARP_3.bit,CAINIT_F_SHARP_3.md5
++1,CAINIT_G_SHARP_3,CAINIT_G_SHARP_3.bit,CAINIT_G_SHARP_3.md5
++1,CAINIT_H_SHARP_3,CAINIT_H_SHARP_3.bit,CAINIT_H_SHARP_3.md5
++1,CIP_A_Panasonic_3,CIP_A_Panasonic_3.bit,CIP_A_Panasonic_3_yuv.md5
++1,cip_B_NEC_3,cip_B_NEC_3.bit,cip_B_NEC_3.md5
++1,CIP_C_Panasonic_2,CIP_C_Panasonic_2.bit,CIP_C_Panasonic_2_yuv.md5
++1,CONFWIN_A_Sony_1,CONFWIN_A_Sony_1.bit,CONFWIN_A_Sony_1.md5
++1,DBLK_A_MAIN10_VIXS_3,DBLK_A_MAIN10_VIXS_3.bit,DBLK_A_MAIN10_VIXS_3.md5
++1,DBLK_A_SONY_3,DBLK_A_SONY_3.bit,DBLK_A_SONY_3.bit.yuv.md5
++1,DBLK_B_SONY_3,DBLK_B_SONY_3.bit,DBLK_B_SONY_3.bit.yuv.md5
++1,DBLK_C_SONY_3,DBLK_C_SONY_3.bit,DBLK_C_SONY_3.bit.yuv.md5
++1,DBLK_D_VIXS_2,DBLK_D_VIXS_2.bit,DBLK_D_VIXS_2_yuv.md5
++1,DBLK_E_VIXS_2,DBLK_E_VIXS_2.bit,DBLK_E_VIXS_2_yuv.md5
++1,DBLK_F_VIXS_2,DBLK_F_VIXS_2.bit,DBLK_F_VIXS_2_yuv.md5
++1,DBLK_G_VIXS_2,DBLK_G_VIXS_2.bit,DBLK_G_VIXS_2_yuv.md5
++1,DELTAQP_A_BRCM_4,DELTAQP_A_BRCM_4.bit,DELTAQP_A_BRCM_4_yuv.md5
++1,DELTAQP_B_SONY_3,DELTAQP_B_SONY_3.bit,DELTAQP_B_SONY_3.bit.yuv.md5
++1,DELTAQP_C_SONY_3,DELTAQP_C_SONY_3.bit,DELTAQP_C_SONY_3.bit.yuv.md5
++1,DSLICE_A_HHI_5,DSLICE_A_HHI_5.bin,DSLICE_A_HHI_5.md5
++1,DSLICE_B_HHI_5,DSLICE_B_HHI_5.bin,DSLICE_B_HHI_5.md5
++1,DSLICE_C_HHI_5,DSLICE_C_HHI_5.bin,DSLICE_C_HHI_5.md5
++1,ENTP_A_QUALCOMM_1,ENTP_A_Qualcomm_1.bit,ENTP_A_Qualcomm_1.md5
++1,ENTP_B_Qualcomm_1,ENTP_B_Qualcomm_1.bit,ENTP_B_Qualcomm_1.md5
++1,ENTP_C_Qualcomm_1,ENTP_C_Qualcomm_1.bit,ENTP_C_Qualcomm_1.md5
++1,EXT_A_ericsson_4,EXT_A_ericsson_4.bit,EXT_A_ericsson_4.md5
++1,FILLER_A_Sony_1,FILLER_A_Sony_1.bit,FILLER_A_Sony_1.md5
++1,HRD_A_Fujitsu_3,HRD_A_Fujitsu_3.bin,HRD_A_Fujitsu_3.md5
++1,INITQP_A_Sony_1,INITQP_A_Sony_1.bit,INITQP_A_Sony_1.md5
++1,INITQP_B_Main10_Sony_1,INITQP_B_Main10_Sony_1.bit,INITQP_B_Main10_Sony_1.md5
++1,ipcm_A_NEC_3,ipcm_A_NEC_3.bit,ipcm_A_NEC_3.md5
++1,ipcm_B_NEC_3,ipcm_B_NEC_3.bit,ipcm_B_NEC_3.md5
++1,ipcm_C_NEC_3,ipcm_C_NEC_3.bit,ipcm_C_NEC_3.md5
++1,ipcm_D_NEC_3,ipcm_D_NEC_3.bit,ipcm_D_NEC_3.md5
++1,ipcm_E_NEC_2,ipcm_E_NEC_2.bit,ipcm_E_NEC_2.md5
++1,IPRED_A_docomo_2,IPRED_A_docomo_2.bit,IPRED_A_docomo_2.md5
++1,IPRED_B_Nokia_3,IPRED_B_Nokia_3.bit,IPRED_B_Nokia_3_yuv.md5
++1,IPRED_C_Mitsubishi_3,IPRED_C_Mitsubishi_3.bit,IPRED_C_Mitsubishi_3_yuv.md5
++1,LS_A_Orange_2,LS_A_Orange_2.bit,LS_A_Orange_2_yuv.md5
++1,LS_B_Orange_4,LS_B_Orange_4.bit,LS_B_Orange_4_yuv.md5
++1,LTRPSPS_A_Qualcomm_1,LTRPSPS_A_Qualcomm_1.bit,LTRPSPS_A_Qualcomm_1.md5
++1,MAXBINS_A_TI_4,MAXBINS_A_TI_4.bit,MAXBINS_A_TI_4.md5
++1,MAXBINS_B_TI_4,MAXBINS_B_TI_4.bit,MAXBINS_B_TI_4.md5
++1,MAXBINS_C_TI_4,MAXBINS_C_TI_4.bit,MAXBINS_C_TI_4.md5
++1,MERGE_A_TI_3,MERGE_A_TI_3.bit,MERGE_A_TI_3.md5
++1,MERGE_B_TI_3,MERGE_B_TI_3.bit,MERGE_B_TI_3.md5
++1,MERGE_C_TI_3,MERGE_C_TI_3.bit,MERGE_C_TI_3.md5
++1,MERGE_D_TI_3,MERGE_D_TI_3.bit,MERGE_D_TI_3.md5
++1,MERGE_E_TI_3,MERGE_E_TI_3.bit,MERGE_E_TI_3.md5
++1,MERGE_F_MTK_4,MERGE_F_MTK_4.bit,MERGE_F_MTK_4.md5
++1,MERGE_G_HHI_4,MERGE_G_HHI_4.bit,MERGE_G_HHI_4.md5
++1,MVCLIP_A_qualcomm_3,MVCLIP_A_qualcomm_3.bit,MVCLIP_A_qualcomm_3.yuv.md5
++1,MVDL1ZERO_A_docomo_4,MVDL1ZERO_A_docomo_4.bit,MVDL1ZERO_A_docomo_4.md5
++1,MVEDGE_A_qualcomm_3,MVEDGE_A_qualcomm_3.bit,MVEDGE_A_qualcomm_3.yuv.md5
++1,NoOutPrior_A_Qualcomm_1,NoOutPrior_A_Qualcomm_1.bit,NoOutPrior_A_Qualcomm_1.md5
++1,NoOutPrior_B_Qualcomm_1,NoOutPrior_B_Qualcomm_1.bit,NoOutPrior_B_Qualcomm_1.md5
++1,NUT_A_ericsson_5,NUT_A_ericsson_5.bit,NUT_A_ericsson_5.md5
++1,OPFLAG_A_Qualcomm_1,OPFLAG_A_Qualcomm_1.bit,OPFLAG_A_Qualcomm_1.md5
++1,OPFLAG_B_Qualcomm_1,OPFLAG_B_Qualcomm_1.bit,OPFLAG_B_Qualcomm_1.md5
++1,OPFLAG_C_Qualcomm_1,OPFLAG_C_Qualcomm_1.bit,OPFLAG_C_Qualcomm_1.md5
++1,PICSIZE_A_Bossen_1,PICSIZE_A_Bossen_1.bin,PICSIZE_A_Bossen_1.md5
++1,PICSIZE_B_Bossen_1,PICSIZE_B_Bossen_1.bin,PICSIZE_B_Bossen_1.md5
++1,PICSIZE_C_Bossen_1,PICSIZE_C_Bossen_1.bin,PICSIZE_C_Bossen_1.md5
++1,PICSIZE_D_Bossen_1,PICSIZE_D_Bossen_1.bin,PICSIZE_D_Bossen_1.md5
++1,PMERGE_A_TI_3,PMERGE_A_TI_3.bit,PMERGE_A_TI_3.md5
++1,PMERGE_B_TI_3,PMERGE_B_TI_3.bit,PMERGE_B_TI_3.md5
++1,PMERGE_C_TI_3,PMERGE_C_TI_3.bit,PMERGE_C_TI_3.md5
++1,PMERGE_D_TI_3,PMERGE_D_TI_3.bit,PMERGE_D_TI_3.md5
++1,PMERGE_E_TI_3,PMERGE_E_TI_3.bit,PMERGE_E_TI_3.md5
++1,POC_A_Bossen_3,POC_A_Bossen_3.bin,POC_A_Bossen_3.md5
++1,PPS_A_qualcomm_7,PPS_A_qualcomm_7.bit,PPS_A_qualcomm_7.yuv.md5
++1,PS_B_VIDYO_3,PS_B_VIDYO_3.bit,PS_B_VIDYO_3_yuv.md5
++1,RAP_A_docomo_6,RAP_A_docomo_6.bit,RAP_A_docomo_6.md5
++1,RAP_B_Bossen_2,RAP_B_Bossen_2.bit,RAP_B_Bossen_2.md5
++1,RPLM_A_qualcomm_4,RPLM_A_qualcomm_4.bit,RPLM_A_qualcomm_4.yuv.md5
++1,RPLM_B_qualcomm_4,RPLM_B_qualcomm_4.bit,RPLM_B_qualcomm_4.yuv.md5
++1,RPS_A_docomo_5,RPS_A_docomo_5.bit,RPS_A_docomo_5.md5
++1,RPS_B_qualcomm_5,RPS_B_qualcomm_5.bit,RPS_B_qualcomm_5.yuv.md5
++1,RPS_C_ericsson_5,RPS_C_ericsson_5.bit,RPS_C_ericsson_5.md5
++1,RPS_D_ericsson_6,RPS_D_ericsson_6.bit,RPS_D_ericsson_6.md5
++1,RPS_E_qualcomm_5,RPS_E_qualcomm_5.bit,RPS_E_qualcomm_5.yuv.md5
++1,RPS_F_docomo_2,RPS_F_docomo_2.bit,RPS_F_docomo_2.md5
++1,RQT_A_HHI_4,RQT_A_HHI_4.bit,RQT_A_HHI_4.md5
++1,RQT_B_HHI_4,RQT_B_HHI_4.bit,RQT_B_HHI_4.md5
++1,RQT_C_HHI_4,RQT_C_HHI_4.bit,RQT_C_HHI_4.md5
++1,RQT_D_HHI_4,RQT_D_HHI_4.bit,RQT_D_HHI_4.md5
++1,RQT_E_HHI_4,RQT_E_HHI_4.bit,RQT_E_HHI_4.md5
++1,RQT_F_HHI_4,RQT_F_HHI_4.bit,RQT_F_HHI_4.md5
++1,RQT_G_HHI_4,RQT_G_HHI_4.bit,RQT_G_HHI_4.md5
++1,SAO_A_MediaTek_4,SAO_A_MediaTek_4.bit,SAO_A_MediaTek_4.md5
++1,SAO_B_MediaTek_5,SAO_B_MediaTek_5.bit,SAO_B_MediaTek_5.md5
++1,SAO_C_Samsung_5,SAO_C_Samsung_5.bin,SAO_C_Samsung_5.md5
++1,SAO_D_Samsung_5,SAO_D_Samsung_5.bin,SAO_D_Samsung_5.md5
++1,SAO_E_Canon_4,SAO_E_Canon_4.bit,SAO_E_Canon_4.md5
++1,SAO_F_Canon_3,SAO_F_Canon_3.bit,SAO_F_Canon_3.md5
++1,SAO_G_Canon_3,SAO_G_Canon_3.bit,SAO_G_Canon_3.md5
++1,SDH_A_Orange_4,SDH_A_Orange_4.bit,SDH_A_Orange_4_yuv.md5
++1,SLICES_A_Rovi_3,SLICES_A_Rovi_3.bin,SLICES_A_Rovi_3.md5
++1,SLIST_A_Sony_4,str.bin,SLIST_A_Sony_4_yuv.md5
++1,SLIST_B_Sony_8,str.bin,SLIST_B_Sony_8_yuv.md5
++1,SLIST_C_Sony_3,str.bin,SLIST_C_Sony_3_yuv.md5
++1,SLIST_D_Sony_9,str.bin,SLIST_D_Sony_9_yuv.md5
++1,SLPPLP_A_VIDYO_2,SLPPLP_A_VIDYO_2.bit,SLPPLP_A_VIDYO_2_yuv.md5
++1,STRUCT_A_Samsung_6,STRUCT_A_Samsung_6.bin,STRUCT_A_Samsung_6.md5
++1,STRUCT_B_Samsung_6,STRUCT_B_Samsung_6.bin,STRUCT_B_Samsung_6.md5
++1,TILES_A_Cisco_2,TILES_A_Cisco_2.bin,TILES_A_Cisco_2_yuv.md5
++1,TILES_B_Cisco_1,TILES_B_Cisco_1.bin,TILES_B_Cisco_1_yuv.md5
++1,TMVP_A_MS_3,TMVP_A_MS_3.bit,TMVP_A_MS_3.yuv.md5
++1,TSCL_A_VIDYO_5,TSCL_A_VIDYO_5.bit,TSCL_A_VIDYO_5_yuv.md5
++1,TSCL_B_VIDYO_4,TSCL_B_VIDYO_4.bit,TSCL_B_VIDYO_4_yuv.md5
++1,TSKIP_A_MS_3,TSKIP_A_MS_3.bit,TSKIP_A_MS_3.yuv.md5
++0,TSUNEQBD_A_MAIN10_Technicolor_2,TSUNEQBD_A_MAIN10_Technicolor_2.bit,TSUNEQBD_A_MAIN10_Technicolor_2_yuv.md5, # Y/C bit depth unmatched
++1,TUSIZE_A_Samsung_1,TUSIZE_A_Samsung_1.bin,TUSIZE_A_Samsung_1.md5
++1,VPSID_A_VIDYO_2,VPSID_A_VIDYO_2.bit,VPSID_A_VIDYO_2_yuv.md5
++1,WP_A_MAIN10_Toshiba_3,WP_A_MAIN10_Toshiba_3.bit,WP_A_MAIN10_Toshiba_3_yuv.md5
++1,WP_A_Toshiba_3,WP_A_Toshiba_3.bit,WP_A_Toshiba_3_yuv.md5
++1,WP_B_Toshiba_3,WP_B_Toshiba_3.bit,WP_B_Toshiba_3_yuv.md5
++1,WP_MAIN10_B_Toshiba_3,WP_MAIN10_B_Toshiba_3.bit,WP_MAIN10_B_Toshiba_3_yuv.md5
++1,WPP_A_ericsson_MAIN10_2,WPP_A_ericsson_MAIN10_2.bit,WPP_A_ericsson_MAIN10_yuv.md5
++1,WPP_A_ericsson_MAIN_2,WPP_A_ericsson_MAIN_2.bit,WPP_A_ericsson_MAIN_2_yuv.md5
++1,WPP_B_ericsson_MAIN10_2,WPP_B_ericsson_MAIN10_2.bit,WPP_B_ericsson_MAIN10_yuv.md5
++1,WPP_B_ericsson_MAIN_2,WPP_B_ericsson_MAIN_2.bit,WPP_B_ericsson_MAIN_2_yuv.md5
++1,WPP_C_ericsson_MAIN10_2,WPP_C_ericsson_MAIN10_2.bit,WPP_C_ericsson_MAIN10_yuv.md5
++1,WPP_C_ericsson_MAIN_2,WPP_C_ericsson_MAIN_2.bit,WPP_C_ericsson_MAIN_2_yuv.md5
++1,WPP_D_ericsson_MAIN10_2,WPP_D_ericsson_MAIN10_2.bit,WPP_D_ericsson_MAIN10_yuv.md5
++1,WPP_D_ericsson_MAIN_2,WPP_D_ericsson_MAIN_2.bit,WPP_D_ericsson_MAIN_2_yuv.md5
++1,WPP_E_ericsson_MAIN10_2,WPP_E_ericsson_MAIN10_2.bit,WPP_E_ericsson_MAIN10_yuv.md5
++1,WPP_E_ericsson_MAIN_2,WPP_E_ericsson_MAIN_2.bit,WPP_E_ericsson_MAIN_2_yuv.md5
++1,WPP_F_ericsson_MAIN10_2,WPP_F_ericsson_MAIN10_2.bit,WPP_F_ericsson_MAIN10_yuv.md5
++1,WPP_F_ericsson_MAIN_2,WPP_F_ericsson_MAIN_2.bit,WPP_F_ericsson_MAIN_2_yuv.md5
+--- /dev/null
++++ b/pi-util/conf_native.sh
+@@ -0,0 +1,78 @@
++echo "Configure for native build"
++
++FFSRC=`pwd`
++MC=`uname -m`
++
++#RPI_KEEPS="-save-temps=obj"
++RPI_KEEPS=""
++
++if [ "$MC" == "aarch64" ]; then
++  echo "M/C aarch64"
++  A=aarch64-linux-gnu
++  B=arm64
++  MCOPTS=
++  RPI_INCLUDES=
++  RPI_LIBDIRS=
++  RPI_DEFINES=
++  RPI_EXTRALIBS=
++  RPIOPTS="--disable-mmal --enable-sand"
++else
++  echo "M/C armv7"
++  A=arm-linux-gnueabihf
++  B=armv7
++  MCOPTS="--arch=armv6t2 --cpu=cortex-a7"
++  RPI_OPT_VC=/opt/vc
++  RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
++  RPI_LIBDIRS="-L$RPI_OPT_VC/lib"
++  RPI_DEFINES="-D__VCCOREVER__=0x4000000 -mfpu=neon-vfpv4"
++  RPI_EXTRALIBS="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm -Wl,--end-group"
++  RPIOPTS="--enable-mmal --enable-rpi"
++fi
++C=`lsb_release -sc`
++V=`cat RELEASE`
++
++SHARED_LIBS="--enable-shared"
++if [ "$1" == "--noshared" ]; then
++  SHARED_LIBS="--disable-shared"
++  OUT=out/$B-$C-$V-static-rel
++  echo Static libs
++else
++  echo Shared libs
++  OUT=out/$B-$C-$V-shared-rel
++fi
++
++USR_PREFIX=$FFSRC/$OUT/install
++LIB_PREFIX=$USR_PREFIX/lib/$A
++INC_PREFIX=$USR_PREFIX/include/$A
++
++echo Destination directory: $OUT
++mkdir -p $FFSRC/$OUT
++cd $FFSRC/$OUT
++
++$FFSRC/configure \
++ --prefix=$USR_PREFIX\
++ --libdir=$LIB_PREFIX\
++ --incdir=$INC_PREFIX\
++ $MCOPTS\
++ --disable-stripping\
++ --disable-thumb\
++ --enable-v4l2-request\
++ --enable-libdrm\
++ --enable-epoxy\
++ --enable-libudev\
++ --enable-vout-drm\
++ --enable-vout-egl\
++ $SHARED_LIBS\
++ $RPIOPTS\
++ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\
++ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\
++ --extra-ldflags="$RPI_LIBDIRS"\
++ --extra-libs="$RPI_EXTRALIBS"\
++ --extra-version="rpi"
++
++# --enable-decoder=hevc_rpi\
++# --enable-extra-warnings\
++# --arch=armv71\
++
++# gcc option for getting asm listing
++# -Wa,-ahls
+--- /dev/null
++++ b/pi-util/conf_pi1.sh
+@@ -0,0 +1,39 @@
++echo "Configure for Pi1"
++
++RPI_TOOLROOT=`pwd`/../tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf
++RPI_OPT_VC=`pwd`/../firmware/hardfp/opt/vc
++
++RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
++RPI_LIBDIRS="-L$RPI_TOOLROOT/lib -L$RPI_OPT_VC/lib"
++#RPI_KEEPS="-save-temps=obj"
++RPI_KEEPS=""
++
++SHARED_LIBS="--enable-shared"
++if [ "$1" == "--noshared" ]; then
++  SHARED_LIBS="--disable-shared"
++  echo Static libs
++else
++  echo Shared libs
++fi
++
++./configure --enable-cross-compile\
++ --cpu=arm1176jzf-s\
++ --arch=arm\
++ --disable-neon\
++ --target-os=linux\
++ --disable-stripping\
++ --enable-mmal\
++ $SHARED_LIBS\
++ --extra-cflags="-g $RPI_KEEPS $RPI_INCLUDES"\
++ --extra-cxxflags="$RPI_INCLUDES"\
++ --extra-ldflags="$RPI_LIBDIRS -Wl,-rpath=/opt/vc/lib,-rpath-link=$RPI_OPT_VC/lib,-rpath=/lib,-rpath=/usr/lib,-rpath-link=$RPI_TOOLROOT/lib,-rpath-link=$RPI_TOOLROOT/lib"\
++ --extra-libs="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm"\
++ --cross-prefix=$RPI_TOOLROOT/bin/arm-linux-gnueabihf-
++
++
++# --enable-extra-warnings\
++# --arch=armv71\
++# --enable-shared\
++
++# gcc option for getting asm listing
++# -Wa,-ahls
+--- /dev/null
++++ b/pi-util/conf_pi2.sh
+@@ -0,0 +1,57 @@
++echo "Configure for Pi2/3"
++
++FFSRC=`pwd`
++
++RPI_TOOLROOT=$FFSRC/../tools/arm-bcm2708/arm-rpi-4.9.3-linux-gnueabihf
++RPI_OPT_VC=$FFSRC/../firmware/hardfp/opt/vc
++
++RPI_INCLUDES="-I$RPI_OPT_VC/include -I$RPI_OPT_VC/include/interface/vcos/pthreads -I$RPI_OPT_VC/include/interface/vmcs_host/linux"
++RPI_LIBDIRS="-L$RPI_TOOLROOT/lib -L$RPI_OPT_VC/lib"
++RPI_DEFINES="-D__VCCOREVER__=0x4000000 -mfpu=neon-vfpv4"
++#RPI_KEEPS="-save-temps=obj"
++RPI_KEEPS=""
++
++SHARED_LIBS="--enable-shared"
++if [ "$1" == "--noshared" ]; then
++  SHARED_LIBS="--disable-shared"
++  OUT=out/x-armv7-static-rel
++  echo Static libs
++else
++  echo Shared libs
++  OUT=out/x-armv7-shared-rel
++fi
++
++USR_PREFIX=$FFSRC/$OUT/install
++LIB_PREFIX=$USR_PREFIX/lib/arm-linux-gnueabihf
++INC_PREFIX=$USR_PREFIX/include/arm-linux-gnueabihf
++
++mkdir -p $FFSRC/$OUT
++cd $FFSRC/$OUT
++
++$FFSRC/configure --enable-cross-compile\
++ --prefix=$USR_PREFIX\
++ --libdir=$LIB_PREFIX\
++ --incdir=$INC_PREFIX\
++ --arch=armv6t2\
++ --cpu=cortex-a7\
++ --target-os=linux\
++ --disable-stripping\
++ --disable-thumb\
++ --enable-mmal\
++ --enable-rpi\
++ $SHARED_LIBS\
++ --extra-cflags="-ggdb $RPI_KEEPS $RPI_DEFINES $RPI_INCLUDES"\
++ --extra-cxxflags="$RPI_DEFINES $RPI_INCLUDES"\
++ --extra-ldflags="$RPI_LIBDIRS -Wl,-rpath=/opt/vc/lib,-rpath-link=$RPI_OPT_VC/lib,-rpath=/lib,-rpath=/usr/lib,-rpath-link=$RPI_TOOLROOT/lib,-rpath-link=$RPI_TOOLROOT/lib"\
++ --extra-libs="-Wl,--start-group -lbcm_host -lmmal -lmmal_util -lmmal_core -lvcos -lvcsm -lvchostif -lvchiq_arm"\
++ --cross-prefix=$RPI_TOOLROOT/bin/arm-linux-gnueabihf-
++
++# --enable-shared\
++
++# --enable-decoder=hevc_rpi\
++# --enable-extra-warnings\
++# --arch=armv71\
++# --enable-shared\
++
++# gcc option for getting asm listing
++# -Wa,-ahls
+--- /dev/null
++++ b/pi-util/ffconf.py
+@@ -0,0 +1,215 @@
++#!/usr/bin/env python
++
++import string
++import os
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++CODEC_HEVC_RPI  = 1
++HWACCEL_RPI     = 2
++HWACCEL_DRM     = 3
++HWACCEL_VAAPI   = 4
++
++def testone(fileroot, srcname, es_file, md5_file, pix, dectype, vcodec, ffmpeg_exec):
++    hwaccel = ""
++    if dectype == HWACCEL_RPI:
++        hwaccel = "rpi"
++    elif dectype == HWACCEL_DRM:
++        hwaccel = "drm"
++    elif dectype == HWACCEL_VAAPI:
++        hwaccel = "vaapi"
++
++    pix_fmt = []
++    if pix == "8":
++        pix_fmt = ["-pix_fmt", "yuv420p"]
++    elif pix == "10":
++        pix_fmt = ["-pix_fmt", "yuv420p10le"]
++    elif pix == "12":
++        pix_fmt = ["-pix_fmt", "yuv420p12le"]
++
++    tmp_root = "/tmp"
++
++    names = srcname.split('/')
++    while len(names) > 1:
++        tmp_root = os.path.join(tmp_root, names[0])
++        del names[0]
++    name = names[0]
++
++    if not os.path.exists(tmp_root):
++        os.makedirs(tmp_root)
++
++    dec_file = os.path.join(tmp_root, name + ".dec.md5")
++    try:
++        os.remove(dec_file)
++    except:
++        pass
++
++    flog = open(os.path.join(tmp_root, name + ".log"), "wt")
++
++    ffargs = [ffmpeg_exec, "-flags", "unaligned", "-hwaccel", hwaccel, "-vcodec", "hevc", "-i", os.path.join(fileroot, es_file)] + pix_fmt + ["-f", "md5", dec_file]
++
++    # Unaligned needed for cropping conformance
++    if hwaccel:
++        rstr = subprocess.call(ffargs, stdout=flog, stderr=subprocess.STDOUT)
++    else:
++        rstr = subprocess.call(
++            [ffmpeg_exec, "-flags", "unaligned", "-vcodec", vcodec, "-i", os.path.join(fileroot, es_file), "-f", "md5", dec_file],
++            stdout=flog, stderr=subprocess.STDOUT)
++
++    try:
++        m1 = None
++        m2 = None
++        with open(os.path.join(fileroot, md5_file)) as f:
++            for line in f:
++                m1 = re.search("[0-9a-f]{32}", line.lower())
++                if m1:
++                    break
++
++        with open(dec_file) as f:
++            m2 = re.search("[0-9a-f]{32}", f.readline())
++    except:
++        pass
++
++    if  m1 and m2 and m1.group() == m2.group():
++        print >> flog, "Match: " + m1.group()
++        rv = 0
++    elif not m1:
++        print >> flog, "****** Cannot find m1"
++        rv = 3
++    elif not m2:
++        print >> flog, "****** Cannot find m2"
++        rv = 2
++    else:
++        print >> flog, "****** Mismatch: " + m1.group() + " != " + m2.group()
++        rv = 1
++    flog.close()
++    return rv
++
++def scandir(root):
++    aconf = []
++    ents = os.listdir(root)
++    ents.sort(key=str.lower)
++    for name in ents:
++        test_path = os.path.join(root, name)
++        if S_ISDIR(os.stat(test_path).st_mode):
++            files = os.listdir(test_path)
++            es_file = "?"
++            md5_file = "?"
++            for f in files:
++                (base, ext) = os.path.splitext(f)
++                if base[0] == '.':
++                    pass
++                elif ext == ".bit" or ext == ".bin":
++                    es_file = f
++                elif ext == ".md5" or (ext == ".txt" and (base[-4:] == "_md5" or base[-6:] == "md5sum")):
++                    if md5_file == "?":
++                        md5_file = f
++                    elif base[-3:] == "yuv":
++                        md5_file = f
++            aconf.append((1, name, es_file, md5_file))
++    return aconf
++
++def runtest(name, tests):
++    if not tests:
++        return True
++    for t in tests:
++        if name[0:len(t)] == t or name.find("/" + t) != -1:
++            return True
++    return False
++
++def doconf(csva, tests, test_root, vcodec, dectype, ffmpeg_exec):
++    unx_failures = []
++    unx_success = []
++    failures = 0
++    successes = 0
++    for a in csva:
++        exp_test = int(a[0])
++        if (exp_test and runtest(a[1], tests)):
++            name = a[1]
++            print "==== ", name,
++            sys.stdout.flush()
++
++            rv = testone(os.path.join(test_root, name), name, a[2], a[3], a[4], dectype=dectype, vcodec=vcodec, ffmpeg_exec=ffmpeg_exec)
++            if (rv == 0):
++                successes += 1
++            else:
++                failures += 1
++
++            if (rv == 0):
++                if exp_test == 2:
++                    print ": * OK *"
++                    unx_success.append(name)
++                else:
++                    print ": ok"
++            elif exp_test == 2 and rv == 1:
++                print ": fail"
++            elif exp_test == 3 and rv == 2:
++                # Call an expected "crash" an abort
++                print ": abort"
++            else:
++                unx_failures.append(name)
++                if rv == 1:
++                    print ": * FAIL *"
++                elif (rv == 2) :
++                    print ": * CRASH *"
++                elif (rv == 3) :
++                    print ": * MD5 MISSING *"
++                else :
++                    print ": * BANG *"
++
++    if unx_failures or unx_success:
++        print "Unexpected Failures:", unx_failures
++        print "Unexpected Success: ", unx_success
++    else:
++        print "All tests normal:", successes, "ok,", failures, "failed"
++
++
++class ConfCSVDialect(csv.Dialect):
++    delimiter = ','
++    doublequote = True
++    lineterminator = '\n'
++    quotechar='"'
++    quoting = csv.QUOTE_MINIMAL
++    skipinitialspace = True
++    strict = True
++
++if __name__ == '__main__':
++
++    argp = argparse.ArgumentParser(description="FFmpeg h265 conformance tester")
++    argp.add_argument("tests", nargs='*')
++    argp.add_argument("--pi4", action='store_true', help="Force pi4 cmd line")
++    argp.add_argument("--drm", action='store_true', help="Force v4l2 drm cmd line")
++    argp.add_argument("--vaapi", action='store_true', help="Force vaapi cmd line")
++    argp.add_argument("--test_root", default="/opt/conform/h265.2016", help="Root dir for test")
++    argp.add_argument("--csvgen", action='store_true', help="Generate CSV file for dir")
++    argp.add_argument("--csv", default="pi-util/conf_h265.2016.csv", help="CSV filename")
++    argp.add_argument("--vcodec", default="hevc_rpi", help="vcodec name to use")
++    argp.add_argument("--ffmpeg", default="./ffmpeg", help="ffmpeg exec name")
++    args = argp.parse_args()
++
++    if args.csvgen:
++        csv.writer(sys.stdout).writerows(scandir(args.test_root))
++        exit(0)
++
++    with open(args.csv, 'rt') as csvfile:
++        csva = [a for a in csv.reader(csvfile, ConfCSVDialect())]
++
++    dectype = CODEC_HEVC_RPI
++    if os.path.exists("/dev/rpivid-hevcmem"):
++        dectype = HWACCEL_RPI
++    if args.drm or os.path.exists("/sys/module/rpivid_hevc"):
++        dectype = HWACCEL_DRM
++
++    if args.pi4:
++        dectype = HWACCEL_RPI
++    elif args.drm:
++        dectype = HWACCEL_DRM
++    elif args.vaapi:
++        dectype = HWACCEL_VAAPI
++
++    doconf(csva, args.tests, args.test_root, args.vcodec, dectype, args.ffmpeg)
++
+--- /dev/null
++++ b/pi-util/ffperf.py
+@@ -0,0 +1,128 @@
++#!/usr/bin/env python3
++
++import time
++import string
++import os
++import tempfile
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++class tstats:
++    close_threshold = 0.01
++
++    def __init__(self, stats_dict=None):
++        if stats_dict != None:
++            self.name = stats_dict["name"]
++            self.elapsed = float(stats_dict["elapsed"])
++            self.user = float(stats_dict["user"])
++            self.sys = float(stats_dict["sys"])
++
++    def times_str(self):
++        ctime = self.sys + self.user
++        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
++
++    def dict(self):
++        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
++
++    def is_close(self, other):
++        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
++
++    def __lt__(self, other):
++        return self.elapsed < other.elapsed
++    def __gt__(self, other):
++        return self.elapsed > other.elapsed
++
++    def time_file(name, prefix, ffmpeg="./ffmpeg"):
++        stats = tstats()
++        stats.name = name
++        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++        cproc = subprocess.Popen([ffmpeg, "-no_cvt_hw",
++                                  "-vcodec", "hevc_rpi",
++                                  "-t", "30", "-i", prefix + name,
++                                  "-f", "vout_rpi", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
++        pinfo = os.wait4(cproc.pid, 0)
++        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++        stats.elapsed = end_time - start_time
++        stats.user = pinfo[2].ru_utime
++        stats.sys = pinfo[2].ru_stime
++        return stats
++
++
++def common_prefix(s1, s2):
++    for i in range(min(len(s1),len(s2))):
++        if s1[i] != s2[i]:
++            return s1[:i]
++    return s1[:i+1]
++
++def main():
++    global flog
++
++    argp = argparse.ArgumentParser(description="FFmpeg performance tester", epilog="""
++To blank the screen before starting use "xdg-screensaver activate"
++(For some reason this doesn't seem to work from within python).
++""")
++
++    argp.add_argument("streams", nargs='*')
++    argp.add_argument("--csv_out", default="ffperf_out.csv", help="CSV output filename")
++    argp.add_argument("--csv_in", help="CSV input filename")
++    argp.add_argument("--prefix", help="Filename prefix (include terminal '/' if a directory).")
++    argp.add_argument("--repeat", default=3, type=int, help="Run repeat count")
++    argp.add_argument("--ffmpeg", default="./ffmpeg", help="FFmpeg executable")
++
++    args = argp.parse_args()
++
++    csv_out = csv.DictWriter(open(args.csv_out, 'w', newline=''), ["name", "elapsed", "user", "sys"])
++    csv_out.writeheader()
++
++    stats_in = {}
++    if args.csv_in != None:
++        with open(args.csv_in, 'r', newline='') as f_in:
++            stats_in = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++
++    flog = open(os.path.join(tempfile.gettempdir(), "ffperf.log"), "wt")
++
++    streams = args.streams
++    if not streams:
++        if not stats_in:
++            print ("No source streams specified")
++            return 1
++        prefix = "" if args.prefix == None else args.prefix
++        streams = [k for k in stats_in]
++    elif args.prefix != None:
++        prefix = args.prefix
++    else:
++        prefix = streams[0]
++        for f in streams[1:]:
++            prefix = common_prefix(prefix, f)
++        pp = prefix.rpartition(os.sep)
++        prefix = pp[0] + pp[1]
++        streams = [s[len(prefix):] for s in streams]
++
++    for f in sorted(streams, key=lambda x : "~" * x.count(os.sep) + x.lower()):
++        print ("====", f)
++
++        t0 = tstats({"name":f, "elapsed":999, "user":999, "sys":999})
++        for i in range(args.repeat):
++            t = tstats.time_file(f, prefix, args.ffmpeg)
++            print ("...", t.times_str())
++            if t0 > t:
++                t0 = t
++
++        if t0.name in stats_in:
++            pstat = stats_in[t0.name]
++            print("---" if pstat.is_close(t0) else "<<<" if t0 < pstat else ">>>", pstat.times_str())
++
++        csv_out.writerow(t0.dict())
++
++        print ()
++
++    return 0
++
++
++if __name__ == '__main__':
++    exit(main())
++
+--- /dev/null
++++ b/pi-util/genpatch.sh
+@@ -0,0 +1,35 @@
++set -e
++
++NOPATCH=
++if [ "$1" == "--notag" ]; then
++  shift
++  NOPATCH=1
++fi
++
++if [ "$1" == "" ]; then
++  echo Usage: $0 [--notag] \<patch_tag\>
++  echo e.g.: $0 mmal_4
++  exit 1
++fi
++
++VERSION=`cat RELEASE`
++if [ "$VERSION" == "" ]; then
++  echo Can\'t find version RELEASE
++  exit 1
++fi
++
++PATCHFILE=../ffmpeg-$VERSION-$1.patch
++
++if [ $NOPATCH ]; then
++  echo Not tagged
++else
++  # Only continue if we are all comitted
++  git diff --name-status --exit-code
++
++  PATCHTAG=pi/$VERSION/$1
++  echo Tagging: $PATCHTAG
++
++  git tag $PATCHTAG
++fi
++echo Generating patch: $PATCHFILE
++git diff n$VERSION -- > $PATCHFILE
+--- /dev/null
++++ b/pi-util/make_array.py
+@@ -0,0 +1,23 @@
++#!/usr/bin/env python
++
++# Usage
++#   make_array file.bin
++#   Produces file.h with array of bytes.
++#
++import sys
++for file in sys.argv[1:]:
++  prefix,suffix = file.split('.')
++  assert suffix=='bin'
++  name=prefix.split('/')[-1]
++  print 'Converting',file
++  with open(prefix+'.h','wb') as out:
++    print >>out, 'static const unsigned char',name,'[] = {'
++    with open(file,'rb') as fd:
++      i = 0
++      for byte in fd.read():
++        print >>out, '0x%02x, ' % ord(byte),
++        i = i + 1
++        if i % 8 == 0:
++          print >>out, ' // %04x' % (i - 8)
++    print >>out,'};'
++
+--- /dev/null
++++ b/pi-util/mkinst.sh
+@@ -0,0 +1,5 @@
++set -e
++
++make install
++
++cp -r install/* ../vlc/sysroot/raspian_stretch_pi1-sysroot/usr
+--- /dev/null
++++ b/pi-util/patkodi.sh
+@@ -0,0 +1,9 @@
++set -e
++KODIBASE=/home/jc/rpi/kodi/xbmc
++JOBS=-j20
++make $JOBS
++git diff xbmc/release/4.3-kodi > $KODIBASE/tools/depends/target/ffmpeg/pfcd_hevc_optimisations.patch
++make -C $KODIBASE/tools/depends/target/ffmpeg $JOBS
++make -C $KODIBASE/build install
++
++
+--- /dev/null
++++ b/pi-util/perfcmp.py
+@@ -0,0 +1,101 @@
++#!/usr/bin/env python3
++
++import time
++import string
++import os
++import tempfile
++import subprocess
++import re
++import argparse
++import sys
++import csv
++from stat import *
++
++class tstats:
++    close_threshold = 0.01
++
++    def __init__(self, stats_dict=None):
++        if stats_dict != None:
++            self.name = stats_dict["name"]
++            self.elapsed = float(stats_dict["elapsed"])
++            self.user = float(stats_dict["user"])
++            self.sys = float(stats_dict["sys"])
++
++    def times_str(self):
++        ctime = self.sys + self.user
++        return "time=%6.2f, cpu=%6.2f (%4.2f%%)" % (self.elapsed, ctime, (ctime * 100.0) / self.elapsed)
++
++    def dict(self):
++        return {"name":self.name, "elapsed":self.elapsed, "user":self.user, "sys":self.sys}
++
++    def is_close(self, other):
++        return abs(self.elapsed - other.elapsed) / self.elapsed < self.close_threshold
++
++    def __lt__(self, other):
++        return self.elapsed < other.elapsed
++    def __gt__(self, other):
++        return self.elapsed > other.elapsed
++
++    def time_file(name, prefix):
++        stats = tstats()
++        stats.name = name
++        start_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++        cproc = subprocess.Popen(["./ffmpeg", "-t", "30", "-i", prefix + name,
++                                  "-f", "null", os.devnull], bufsize=-1, stdout=flog, stderr=flog);
++        pinfo = os.wait4(cproc.pid, 0)
++        end_time = time.clock_gettime(time.CLOCK_MONOTONIC);
++        stats.elapsed = end_time - start_time
++        stats.user = pinfo[2].ru_utime
++        stats.sys = pinfo[2].ru_stime
++        return stats
++
++
++def common_prefix(s1, s2):
++    for i in range(min(len(s1),len(s2))):
++        if s1[i] != s2[i]:
++            return s1[:i]
++    return s1[:i+1]
++
++def main():
++    argp = argparse.ArgumentParser(description="FFmpeg performance compare")
++
++    argp.add_argument("stream0", help="CSV to compare")
++    argp.add_argument("stream1", nargs='?', default="ffperf_out.csv", help="CSV to compare")
++
++    args = argp.parse_args()
++
++    with open(args.stream0, 'r', newline='') as f_in:
++        stats0 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++    with open(args.stream1, 'r', newline='') as f_in:
++        stats1 = {x["name"]:tstats(x) for x in csv.DictReader(f_in)}
++
++    print (args.stream0, "<<-->>", args.stream1)
++    print ()
++
++    for f in sorted(stats0.keys() | stats1.keys(), key=lambda x : "~" * x.count(os.sep) + x.lower()):
++       if not (f in stats0) :
++           print ("           XX               :", f)
++           continue
++       if not (f in stats1) :
++           print ("       XX                   :", f)
++           continue
++
++       s0 = stats0[f]
++       s1 = stats1[f]
++
++       pcent = ((s0.elapsed - s1.elapsed) / s0.elapsed) * 100.0
++       thresh = 0.3
++       tc = 6
++
++       nchar = min(tc - 1, int(abs(pcent) / thresh))
++       cc = "  --  " if nchar == 0 else "<" * nchar + " " * (tc - nchar) if pcent < 0 else " " * (tc - nchar) + ">" * nchar
++
++       print ("%6.2f %s%6.2f (%+5.2f) : %s" %
++           (s0.elapsed, cc, s1.elapsed, pcent, f))
++
++    return 0
++
++
++if __name__ == '__main__':
++    exit(main())
++
+--- /dev/null
++++ b/pi-util/qem.sh
+@@ -0,0 +1,9 @@
++TARGET_DIR=../src/eupton_vc4dev_2012a/software/vc4/DEV/applications/tutorials/user_shader_example_tex
++QASM=python\ ../local/bin/qasm.py
++SRC_FILE=libavcodec/rpi_hevc_shader.qasm
++DST_BASE=shader
++
++cp libavcodec/rpi_hevc_shader_cmd.h $TARGET_DIR
++$QASM -mc_c:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.c
++$QASM -mc_h:$DST_BASE,$DST_BASE,$DST_BASE $SRC_FILE > $TARGET_DIR/$DST_BASE.h
++
+--- /dev/null
++++ b/pi-util/v3dusage.py
+@@ -0,0 +1,128 @@
++#!/usr/bin/env python
++
++import sys
++import argparse
++import re
++
++def do_logparse(logname):
++
++    rmatch = re.compile(r'^([0-9]+\.[0-9]{3}): (done )?((vpu0)|(vpu1)|(qpu1)) ([A-Z_]+) cb:([0-9a-f]+) ')
++    rqcycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs doing vertex/coordinate shading +([0-9]+)$')
++    rqtscycle = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: QPU Total clock cycles for all QPUs stalled waiting for TMUs +([0-9]+)$')
++    rl2hits = re.compile(r'^([0-9]+\.[0-9]{3}): v3d: L2C Total Level 2 cache ([a-z]+) +([0-9]+)$')
++
++    ttotal = {'idle':0.0}
++    tstart = {}
++    qctotal = {}
++    qtstotal = {}
++    l2hits = {}
++    l2total = {}
++    time0 = None
++    idle_start = None
++    qpu_op_no = 0
++    op_count = 0
++
++    with open(logname, "rt") as infile:
++        for line in infile:
++            match = rmatch.match(line)
++            if match:
++#                print match.group(1), ":", match.group(2), ":", match.group(3), ":", match.group(7), ":"
++                time = float(match.group(1))
++                unit = match.group(3)
++                opstart = not match.group(2)
++                optype = match.group(7)
++                hascb = match.group(8) != "0"
++
++                if unit == 'qpu1':
++                    unit = unit + "." + str(qpu_op_no)
++                    if not opstart:
++                        if hascb or optype == 'EXECUTE_SYNC':
++                            qpu_op_no = 0
++                        else:
++                            qpu_op_no += 1
++
++                # Ignore sync type
++                if optype == 'EXECUTE_SYNC':
++                    continue
++
++                if not time0:
++                    time0 = time
++
++                if opstart:
++                    tstart[unit] = time;
++                elif unit in tstart:
++                    op_count += 1
++                    if not unit in ttotal:
++                        ttotal[unit] = 0.0
++                    ttotal[unit] += time - tstart[unit]
++                    del tstart[unit]
++
++                if not idle_start and not tstart:
++                    idle_start = time
++                elif idle_start and tstart:
++                    ttotal['idle'] += time - idle_start
++                    idle_start = None
++
++            match = rqcycle.match(line)
++            if match:
++                unit = "qpu1." + str(qpu_op_no)
++                if not unit in qctotal:
++                    qctotal[unit] = 0
++                qctotal[unit] += int(match.group(2))
++
++            match = rqtscycle.match(line)
++            if match:
++                unit = "qpu1." + str(qpu_op_no)
++                if not unit in qtstotal:
++                    qtstotal[unit] = 0
++                qtstotal[unit] += int(match.group(2))
++
++            match = rl2hits.match(line)
++            if match:
++                unit = "qpu1." + str(qpu_op_no)
++                if not unit in l2total:
++                    l2total[unit] = 0
++                    l2hits[unit] = 0
++                l2total[unit] += int(match.group(3))
++                if match.group(2) == "hits":
++                    l2hits[unit] += int(match.group(3))
++
++
++    if not time0:
++        print "No v3d profile records found"
++    else:
++        tlogged = time - time0
++
++        print "Logged time:", tlogged, "  Op count:", op_count
++        for unit in sorted(ttotal):
++            print b'%6s: %10.3f    %7.3f%%' % (unit, ttotal[unit], ttotal[unit] * 100.0 / tlogged)
++        print
++        for unit in sorted(qctotal):
++            if not unit in qtstotal:
++                qtstotal[unit] = 0;
++            print b'%6s: Qcycles: %10d, TMU stall: %10d (%7.3f%%)' % (unit, qctotal[unit], qtstotal[unit], (qtstotal[unit] * 100.0)/qctotal[unit])
++            if unit in l2total:
++                print b'        L2Total: %10d, hits:      %10d (%7.3f%%)' % (l2total[unit], l2hits[unit], (l2hits[unit] * 100.0)/l2total[unit])
++
++
++
++if __name__ == '__main__':
++    argp = argparse.ArgumentParser(
++        formatter_class=argparse.RawDescriptionHelpFormatter,
++        description="QPU/VPU perf summary from VC logging",
++        epilog = """
++Will also summarise TMU stalls if logging requests set in qpu noflush param
++in the profiled code.
++
++Example use:
++  vcgencmd set_logging level=0xc0
++  <command to profile>
++  sudo vcdbg log msg >& t.log
++  v3dusage.py t.log
++""")
++
++    argp.add_argument("logfile")
++    args = argp.parse_args()
++
++    do_logparse(args.logfile)
++
diff --git a/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0005-fix_flags.diff b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0005-fix_flags.diff
new file mode 100644
index 0000000..1eb1074
--- /dev/null
+++ b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/files/0005-fix_flags.diff
@@ -0,0 +1,19 @@
+Upstream-status: Pending
+
+--- a/configure
++++ b/configure
+@@ -6467,11 +6467,9 @@ enabled mbedtls           && { check_pkg
+                                die "ERROR: mbedTLS not found"; }
+ enabled mediacodec        && { enabled jni || die "ERROR: mediacodec requires --enable-jni"; }
+ ( enabled rpi ||
+-  enabled mmal )          && { check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host ||
+-                               { ! enabled cross_compile &&
+-                                 add_cflags -isystem/opt/vc/include/ -isystem/opt/vc/include/interface/vmcs_host/linux -isystem/opt/vc/include/interface/vcos/pthreads -fgnu89-inline &&
+-                                 add_ldflags -L/opt/vc/lib/ &&
+-                                 check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host -lvcos -lvcsm -lvchostif -lvchiq_arm; } ||
++  enabled mmal )          && { { add_cflags -isystem/opt/vc/include/ -isystem/opt/vc/include/interface/vmcs_host/linux -isystem/opt/vc/include/interface/vcos/pthreads -fgnu89-inline &&
++                               add_ldflags -L/opt/vc/lib/ &&
++                               check_lib mmal interface/mmal/mmal.h mmal_port_connect -lmmal_core -lmmal_util -lmmal_vc_client -lbcm_host -lvcsm -lvchostif -lvchiq_arm -lvcos; } ||
+                                die "ERROR: mmal not found" &&
+                                check_func_headers interface/mmal/mmal.h "MMAL_PARAMETER_VIDEO_MAX_NUM_CALLBACKS"; }
+ enabled openal            && { { for al_extralibs in "${OPENAL_LIBS}" "-lopenal" "-lOpenAL32"; do
diff --git a/poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.bb b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_4.3.2.bb
similarity index 74%
copy from poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.bb
copy to meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_4.3.2.bb
index bb507b4..bf8d4b8 100644
--- a/poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.bb
+++ b/meta-raspberrypi/recipes-multimedia/rpidistro-ffmpeg/rpidistro-ffmpeg_4.3.2.bb
@@ -22,27 +22,37 @@
                     file://COPYING.LGPLv2.1;md5=bd7a443320af8c812e4c18d1b79df004 \
                     file://COPYING.LGPLv3;md5=e6a600fd5e1d9cbde2d983680233ad02"
 
-SRC_URI = "https://www.ffmpeg.org/releases/${BP}.tar.xz"
-SRC_URI[sha256sum] = "55eb6aab5ee235550fa54a33eaf8bf1b4ec66c01453182b12f6a993d75698b03"
-
 # Build fails when thumb is enabled: https://bugzilla.yoctoproject.org/show_bug.cgi?id=7717
-ARM_INSTRUCTION_SET:armv4 = "arm"
-ARM_INSTRUCTION_SET:armv5 = "arm"
-ARM_INSTRUCTION_SET:armv6 = "arm"
-
+ARM_INSTRUCTION_SET_armv4 = "arm"
+ARM_INSTRUCTION_SET_armv5 = "arm"
+ARM_INSTRUCTION_SET_armv6 = "arm"
 # Should be API compatible with libav (which was a fork of ffmpeg)
 # libpostproc was previously packaged from a separate recipe
-PROVIDES = "libav libpostproc"
-
+PROVIDES = "ffmpeg libav libpostproc"
+RPROVIDES:${PN} = "${PROVIDES}"
 DEPENDS = "nasm-native"
 
 inherit autotools pkgconfig
-
-PACKAGECONFIG ??= "avdevice avfilter avcodec avformat swresample swscale postproc \
-                   alsa bzlib lzma pic pthreads shared theora zlib \
+PACKAGECONFIG ??= "avdevice avfilter avcodec avformat swresample swscale postproc avresample \
+                   opengl udev sdl2 ffplay alsa bzlib lzma pic pthreads shared theora zlib \
+                   libvorbis x264 gpl sand rpi vout-drm vout-egl \
+                   ${@bb.utils.contains('MACHINE_FEATURES', 'vc4graphics', '', 'mmal', d)} \
                    ${@bb.utils.contains('AVAILTUNES', 'mips32r2', 'mips32r2', '', d)} \
                    ${@bb.utils.contains('DISTRO_FEATURES', 'x11', 'xv xcb', '', d)}"
 
+SRC_URI = "\
+    git://git@github.com/RPi-Distro/ffmpeg;protocol=https;branch=pios/bullseye \
+    file://0001-avcodec-arm-sbcenc-avoid-callee-preserved-vfp-regist.patch \
+    file://0002-Fix-build-on-powerpc-and-ppc64.patch \
+    file://0003-avcodec-pngenc-remove-monowhite-from-apng-formats.patch \
+    file://0004-ffmpeg-4.3.2-rpi_10.patch \
+    file://0005-fix_flags.diff \
+"
+
+SRCREV = "ea72093f350f38edcd39c480b331c3219c377642"
+
+S = "${WORKDIR}/git"
+
 # libraries to build in addition to avutil
 PACKAGECONFIG[avdevice] = "--enable-avdevice,--disable-avdevice"
 PACKAGECONFIG[avfilter] = "--enable-avfilter,--disable-avfilter"
@@ -51,17 +61,20 @@
 PACKAGECONFIG[swresample] = "--enable-swresample,--disable-swresample"
 PACKAGECONFIG[swscale] = "--enable-swscale,--disable-swscale"
 PACKAGECONFIG[postproc] = "--enable-postproc,--disable-postproc"
+PACKAGECONFIG[avresample] = "--enable-avresample,--disable-avresample"
 
 # features to support
+PACKAGECONFIG[ffplay] = "--enable-ffplay,--disable-ffplay"
 PACKAGECONFIG[alsa] = "--enable-alsa,--disable-alsa,alsa-lib"
 PACKAGECONFIG[altivec] = "--enable-altivec,--disable-altivec,"
 PACKAGECONFIG[bzlib] = "--enable-bzlib,--disable-bzlib,bzip2"
 PACKAGECONFIG[fdk-aac] = "--enable-libfdk-aac --enable-nonfree,--disable-libfdk-aac,fdk-aac"
 PACKAGECONFIG[gpl] = "--enable-gpl,--disable-gpl"
+PACKAGECONFIG[opengl] = "--enable-opengl,--disable-opengl,virtual/libgl"
 PACKAGECONFIG[gsm] = "--enable-libgsm,--disable-libgsm,libgsm"
 PACKAGECONFIG[jack] = "--enable-indev=jack,--disable-indev=jack,jack"
-PACKAGECONFIG[libopus] = "--enable-libopus,--disable-libopus,libopus"
 PACKAGECONFIG[libvorbis] = "--enable-libvorbis,--disable-libvorbis,libvorbis"
+PACKAGECONFIG[libopus] = "--enable-libopus,--disable-libopus,libopus"
 PACKAGECONFIG[lzma] = "--enable-lzma,--disable-lzma,xz"
 PACKAGECONFIG[mfx] = "--enable-libmfx,--disable-libmfx,intel-mediasdk"
 PACKAGECONFIG[mp3lame] = "--enable-libmp3lame,--disable-libmp3lame,lame"
@@ -74,10 +87,17 @@
 PACKAGECONFIG[vdpau] = "--enable-vdpau,--disable-vdpau,libvdpau"
 PACKAGECONFIG[vpx] = "--enable-libvpx,--disable-libvpx,libvpx"
 PACKAGECONFIG[x264] = "--enable-libx264,--disable-libx264,x264"
-PACKAGECONFIG[x265] = "--enable-libx265,--disable-libx265,x265"
 PACKAGECONFIG[xcb] = "--enable-libxcb,--disable-libxcb,libxcb"
 PACKAGECONFIG[xv] = "--enable-outdev=xv,--disable-outdev=xv,libxv"
 PACKAGECONFIG[zlib] = "--enable-zlib,--disable-zlib,zlib"
+#PACKAGECONFIG[snappy] = "--enable-libsnappy,--enable-libsnappy,snappy"
+PACKAGECONFIG[udev] = "--enable-libudev,--disable-libudev,udev"
+PACKAGECONFIG[v4l2] = "--enable-libv4l2 --enable-v4l2-request --enable-libdrm,,v4l-utils"
+PACKAGECONFIG[mmal] = "--enable-omx --enable-omx-rpi --enable-mmal,,userland"
+PACKAGECONFIG[sand] = "--enable-sand,,"
+PACKAGECONFIG[rpi] = "--enable-rpi,,"
+PACKAGECONFIG[vout-drm] = "--enable-vout-drm,,libdrm"
+PACKAGECONFIG[vout-egl] = "--enable-vout-egl,,virtual/egl"
 
 # other configuration options
 PACKAGECONFIG[mips32r2] = ",--disable-mipsdsp --disable-mipsdspr2"
@@ -100,15 +120,15 @@
     \
     --cross-prefix=${TARGET_PREFIX} \
     \
-    --ld='${CCLD}' \
-    --cc='${CC}' \
-    --cxx='${CXX}' \
+    --ld="${CCLD}" \
+    --cc="${CC}" \
+    --cxx="${CXX}" \
     --arch=${TARGET_ARCH} \
-    --target-os='linux' \
+    --target-os="linux" \
     --enable-cross-compile \
-    --extra-cflags='${CFLAGS} ${HOST_CC_ARCH}${TOOLCHAIN_OPTIONS}' \
-    --extra-ldflags='${LDFLAGS}' \
-    --sysroot='${STAGING_DIR_TARGET}' \
+    --extra-cflags="${CFLAGS} ${HOST_CC_ARCH}${TOOLCHAIN_OPTIONS}" \
+    --extra-ldflags="${LDFLAGS}" \
+    --sysroot="${STAGING_DIR_TARGET}" \
     ${EXTRA_FFCONF} \
     --libdir=${libdir} \
     --shlibdir=${libdir} \
@@ -116,24 +136,16 @@
     --cpu=${@cpu(d)} \
     --pkg-config=pkg-config \
 "
-
 EXTRA_OECONF:append:linux-gnux32 = " --disable-asm"
 
-EXTRA_OECONF += "${@bb.utils.contains('TUNE_FEATURES', 'mipsisa64r6', '--disable-mips64r2 --disable-mips32r2', '', d)}"
-EXTRA_OECONF += "${@bb.utils.contains('TUNE_FEATURES', 'mipsisa64r2', '--disable-mips64r6 --disable-mips32r6', '', d)}"
-EXTRA_OECONF += "${@bb.utils.contains('TUNE_FEATURES', 'mips32r2', '--disable-mips64r6 --disable-mips32r6', '', d)}"
-EXTRA_OECONF += "${@bb.utils.contains('TUNE_FEATURES', 'mips32r6', '--disable-mips64r2 --disable-mips32r2', '', d)}"
-EXTRA_OECONF:append:mips = " --extra-libs=-latomic --disable-mips32r5 --disable-mipsdsp --disable-mipsdspr2 \
-                             --disable-loongson2 --disable-loongson3 --disable-mmi --disable-msa"
-EXTRA_OECONF:append:riscv32 = " --extra-libs=-latomic"
-EXTRA_OECONF:append:armv5 = " --extra-libs=-latomic"
-EXTRA_OECONF:append:powerpc = " --extra-libs=-latomic"
+# Directly specify the include directories the contain headers for
+#   libdrm
+#   openmaxil
+TARGET_CFLAGS:append = " -I${STAGING_INCDIR}/IL -I${STAGING_INCDIR}/drm"
 
 # gold crashes on x86, another solution is to --disable-asm but thats more hacky
 # ld.gold: internal error in relocate_section, at ../../gold/i386.cc:3684
-
 LDFLAGS:append:x86 = "${@bb.utils.contains('DISTRO_FEATURES', 'ld-is-gold', ' -fuse-ld=bfd ', '', d)}"
-
 EXTRA_OEMAKE = "V=1"
 
 do_configure() {
@@ -142,33 +154,41 @@
 
 # patch out build host paths for reproducibility
 do_compile:prepend:class-target() {
-        sed -i -e "s,${WORKDIR},,g" ${B}/config.h
+    sed -i -e "s,${WORKDIR},,g" ${B}/config.h
 }
 
 PACKAGES =+ "libavcodec \
              libavdevice \
              libavfilter \
              libavformat \
+             libavresample \
              libavutil \
              libpostproc \
              libswresample \
              libswscale"
 
+FILES:${PN}:append = " /usr/share/ffmpeg"
 FILES:libavcodec = "${libdir}/libavcodec${SOLIBS}"
 FILES:libavdevice = "${libdir}/libavdevice${SOLIBS}"
 FILES:libavfilter = "${libdir}/libavfilter${SOLIBS}"
 FILES:libavformat = "${libdir}/libavformat${SOLIBS}"
+FILES:libavresample = "${libdir}/libavresample${SOLIBS}"
 FILES:libavutil = "${libdir}/libavutil${SOLIBS}"
 FILES:libpostproc = "${libdir}/libpostproc${SOLIBS}"
 FILES:libswresample = "${libdir}/libswresample${SOLIBS}"
 FILES:libswscale = "${libdir}/libswscale${SOLIBS}"
-
 # ffmpeg disables PIC on some platforms (e.g. x86-32)
 INSANE_SKIP:${MLPREFIX}libavcodec = "textrel"
 INSANE_SKIP:${MLPREFIX}libavdevice = "textrel"
 INSANE_SKIP:${MLPREFIX}libavfilter = "textrel"
 INSANE_SKIP:${MLPREFIX}libavformat = "textrel"
 INSANE_SKIP:${MLPREFIX}libavutil = "textrel"
+INSANE_SKIP:${MLPREFIX}libavresample = "textrel"
 INSANE_SKIP:${MLPREFIX}libswscale = "textrel"
 INSANE_SKIP:${MLPREFIX}libswresample = "textrel"
 INSANE_SKIP:${MLPREFIX}libpostproc = "textrel"
+
+# Only enable it for rpi class of machines
+COMPATIBLE_HOST = "null"
+COMPATIBLE_HOST:rpi = "'(.*)'"
+
diff --git a/meta-security/dynamic-layers/networking-layer/recipes-security/sssd/sssd_2.7.3.bb b/meta-security/dynamic-layers/networking-layer/recipes-security/sssd/sssd_2.7.4.bb
similarity index 98%
rename from meta-security/dynamic-layers/networking-layer/recipes-security/sssd/sssd_2.7.3.bb
rename to meta-security/dynamic-layers/networking-layer/recipes-security/sssd/sssd_2.7.4.bb
index 95065b3..78d29c3 100644
--- a/meta-security/dynamic-layers/networking-layer/recipes-security/sssd/sssd_2.7.3.bb
+++ b/meta-security/dynamic-layers/networking-layer/recipes-security/sssd/sssd_2.7.4.bb
@@ -26,7 +26,7 @@
            file://musl_fixup.patch \
            "
 
-SRC_URI[sha256sum] = "ab3c3fe2a69cc7b2557715a11000aaf358c0afd65f2828ca47a2d3b2651d871b"
+SRC_URI[sha256sum] = "10ef90c63fdbfda905145077679035bd5ad16b24daad13160de8d0ff82ea9950"
 
 UPSTREAM_CHECK_URI = "https://github.com/SSSD/${BPN}/releases"
 
diff --git a/meta-security/recipes-ids/suricata/suricata_6.0.6.bb b/meta-security/recipes-ids/suricata/suricata_6.0.6.bb
index 35054ef..ce9aca8 100644
--- a/meta-security/recipes-ids/suricata/suricata_6.0.6.bb
+++ b/meta-security/recipes-ids/suricata/suricata_6.0.6.bb
@@ -145,6 +145,8 @@
 CACHED_CONFIGUREVARS = "ac_cv_func_malloc_0_nonnull=yes ac_cv_func_realloc_0_nonnull=yes"
 
 do_configure:prepend () {
+    # use host for RUST_SURICATA_LIB_XC_DIR
+    sed -i -e 's,\${host_alias},${RUST_HOST_SYS},' ${S}/configure.ac
     oe_runconf
 }
 
diff --git a/meta-security/recipes-security/chipsec/chipsec_1.8.7.bb b/meta-security/recipes-security/chipsec/chipsec_1.8.8.bb
similarity index 94%
rename from meta-security/recipes-security/chipsec/chipsec_1.8.7.bb
rename to meta-security/recipes-security/chipsec/chipsec_1.8.8.bb
index 60272be..858ece5 100644
--- a/meta-security/recipes-security/chipsec/chipsec_1.8.7.bb
+++ b/meta-security/recipes-security/chipsec/chipsec_1.8.8.bb
@@ -10,7 +10,7 @@
 DEPENDS = "virtual/kernel nasm-native"
 
 SRC_URI = "git://github.com/chipsec/chipsec.git;branch=main;protocol=https"
-SRCREV = "7b9cc5cd449f2e1e4b5dad46c0eb14348e54e3f0"
+SRCREV = "fd1a98688978fd4b8ca77b512a72eae49c3beffa"
 
 S = "${WORKDIR}/git"
 
diff --git a/meta-security/recipes-security/cryptmount/cryptmount_5.3.3.bb b/meta-security/recipes-security/cryptmount/cryptmount_5.3.3.bb
index fb522cb..6741a5f 100644
--- a/meta-security/recipes-security/cryptmount/cryptmount_5.3.3.bb
+++ b/meta-security/recipes-security/cryptmount/cryptmount_5.3.3.bb
@@ -22,6 +22,16 @@
 PACKAGECONFIG[luks] = "--enable-luks, --disable-luks, cryptsetup"
 PACKAGECONFIG[nls] = "--enable-nls, --disable-nls, "
 
+SYSTEMD_PACKAGES = "${PN}"
 SYSTEMD_SERVICE:${PN} = "cryptmount.service"
 
+do_install:append () {
+    if ${@bb.utils.contains('DISTRO_FEATURES','systemd','true','false',d)}; then
+        install -D -m 0644 ${S}/sysinit/cryptmount.service ${D}${systemd_system_unitdir}/cryptmount.service
+        rm -fr ${D}/usr/lib
+    fi
+}
+
+FILES:${PN} += "${systemd_system_unitdir}"
+
 RDEPENDS:${PN} = "libdevmapper"
diff --git a/poky/bitbake/bin/bitbake b/poky/bitbake/bin/bitbake
index b56f620..7cbf88f 100755
--- a/poky/bitbake/bin/bitbake
+++ b/poky/bitbake/bin/bitbake
@@ -28,7 +28,7 @@
 if sys.getfilesystemencoding() != "utf-8":
     sys.exit("Please use a locale setting which supports UTF-8 (such as LANG=en_US.UTF-8).\nPython can't change the filesystem locale after loading so we need a UTF-8 when Python starts or things won't work.")
 
-__version__ = "2.0.1"
+__version__ = "2.2.0"
 
 if __name__ == "__main__":
     if __version__ != bb.__version__:
diff --git a/poky/bitbake/doc/bitbake-user-manual/bitbake-user-manual-metadata.rst b/poky/bitbake/doc/bitbake-user-manual/bitbake-user-manual-metadata.rst
index 3378216..af99471 100644
--- a/poky/bitbake/doc/bitbake-user-manual/bitbake-user-manual-metadata.rst
+++ b/poky/bitbake/doc/bitbake-user-manual/bitbake-user-manual-metadata.rst
@@ -1922,6 +1922,15 @@
 the commonly used functions ``bb.utils.contains()`` and
 ``bb.utils.mkdirhier()``, which come with docstrings.
 
+Testing and Debugging BitBake Python code
+-----------------------------------------
+
+The OpenEmbedded build system implements a convenient ``pydevshell`` target which
+you can use to access the BitBake datastore and experiment with your own Python
+code. See :yocto_docs:`Using a Python Development Shell
+</dev-manual/common-tasks.html#using-a-python-development-shell>` in the Yocto
+Project manual for details.
+
 Task Checksums and Setscene
 ===========================
 
diff --git a/poky/bitbake/doc/bitbake-user-manual/bitbake-user-manual-ref-variables.rst b/poky/bitbake/doc/bitbake-user-manual/bitbake-user-manual-ref-variables.rst
index 12aef3c..725e6c2 100644
--- a/poky/bitbake/doc/bitbake-user-manual/bitbake-user-manual-ref-variables.rst
+++ b/poky/bitbake/doc/bitbake-user-manual/bitbake-user-manual-ref-variables.rst
@@ -483,6 +483,31 @@
          You must set this variable in the external environment in order
          for it to work.
 
+   :term:`BB_PRESSURE_MAX_CPU`
+      The threshold for maximum CPU pressure before BitBake prevents the
+      scheduling of new tasks. Once the :term:`BB_PRESSURE_MAX_CPU` threshold
+      is exceeded, new tasks are not started until the pressure subsides to
+      below the threshold. If :term:`BB_PRESSURE_MAX_CPU` is not set, CPU
+      pressure is not monitored. A threshold can be set in ``conf/local.conf``
+      as::
+
+         BB_PRESSURE_MAX_CPU = "500"
+
+   :term:`BB_PRESSURE_MAX_IO`
+      The threshold for maximum IO pressure experienced before BitBake
+      prevents the scheduling of new tasks. The IO pressure is regulated in the
+      same way as :term:`BB_PRESSURE_MAX_CPU`. At this point in time,
+      experiments show that IO pressure tends to be short-lived and regulating
+      just the CPU can help to reduce it.
+
+   :term:`BB_PRESSURE_MAX_MEMORY`
+      The threshold for maximum memory pressure experienced before BitBake
+      prevents the scheduling of new tasks. The memory pressure is regulated in
+      the same way as :term:`BB_PRESSURE_MAX_CPU`. Note that any memory
+      pressure indicates that a system is being pushed beyond its capacity. At
+      this point in time, experiments show that memory pressure tends to be
+      short-lived and regulating just the CPU can help to reduce it.
+
    :term:`BB_RUNFMT`
       Specifies the name of the executable script files (i.e. run files)
       saved into ``${``\ :term:`T`\ ``}``. By default, the
diff --git a/poky/bitbake/lib/bb/__init__.py b/poky/bitbake/lib/bb/__init__.py
index 6b470aa..99cb5a0 100644
--- a/poky/bitbake/lib/bb/__init__.py
+++ b/poky/bitbake/lib/bb/__init__.py
@@ -9,7 +9,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 #
 
-__version__ = "2.0.1"
+__version__ = "2.2.0"
 
 import sys
 if sys.version_info < (3, 6, 0):
@@ -60,6 +60,10 @@
                 return
             if loglevel < bb.msg.loggerDefaultLogLevel:
                 return
+
+        if not isinstance(level, int) or not isinstance(msg, str):
+            mainlogger.warning("Invalid arguments in bbdebug: %s" % repr((level, msg,) + args))
+
         return self.log(loglevel, msg, *args, **kwargs)
 
     def plain(self, msg, *args, **kwargs):
diff --git a/poky/bitbake/lib/bb/asyncrpc/client.py b/poky/bitbake/lib/bb/asyncrpc/client.py
index 881434d..fa042bb 100644
--- a/poky/bitbake/lib/bb/asyncrpc/client.py
+++ b/poky/bitbake/lib/bb/asyncrpc/client.py
@@ -31,7 +31,17 @@
 
     async def connect_unix(self, path):
         async def connect_sock():
-            return await asyncio.open_unix_connection(path)
+            # AF_UNIX has path length issues so chdir here to workaround
+            cwd = os.getcwd()
+            try:
+                os.chdir(os.path.dirname(path))
+                # The socket must be opened synchronously so that CWD doesn't get
+                # changed out from underneath us so we pass as a sock into asyncio
+                sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM, 0)
+                sock.connect(os.path.basename(path))
+            finally:
+               os.chdir(cwd)
+            return await asyncio.open_unix_connection(sock=sock)
 
         self._connect_sock = connect_sock
 
@@ -150,14 +160,8 @@
             setattr(self, m, self._get_downcall_wrapper(downcall))
 
     def connect_unix(self, path):
-        # AF_UNIX has path length issues so chdir here to workaround
-        cwd = os.getcwd()
-        try:
-            os.chdir(os.path.dirname(path))
-            self.loop.run_until_complete(self.client.connect_unix(os.path.basename(path)))
-            self.loop.run_until_complete(self.client.connect())
-        finally:
-            os.chdir(cwd)
+        self.loop.run_until_complete(self.client.connect_unix(path))
+        self.loop.run_until_complete(self.client.connect())
 
     @property
     def max_chunk(self):
diff --git a/poky/bitbake/lib/bb/cooker.py b/poky/bitbake/lib/bb/cooker.py
index 1b6ee30..32a529f 100644
--- a/poky/bitbake/lib/bb/cooker.py
+++ b/poky/bitbake/lib/bb/cooker.py
@@ -425,7 +425,7 @@
                         sock = socket.create_connection(upstream.split(":"), 5)
                         sock.close()
                     except socket.error as e:
-                        bb.warn("BB_HASHSERVE_UPSTREAM is not valid, unable to connect hash equivalence server at '%s': %s" 
+                        bb.warn("BB_HASHSERVE_UPSTREAM is not valid, unable to connect hash equivalence server at '%s': %s"
                                  % (upstream, repr(e)))
 
                 self.hashservaddr = "unix://%s/hashserve.sock" % self.data.getVar("TOPDIR")
@@ -1277,15 +1277,15 @@
                                     except bb.utils.VersionStringException as vse:
                                         bb.fatal('Error parsing LAYERRECOMMENDS_%s: %s' % (c, str(vse)))
                                     if not res:
-                                        parselog.debug(3,"Layer '%s' recommends version %s of layer '%s', but version %s is currently enabled in your configuration. Check that you are using the correct matching versions/branches of these two layers.", c, opstr, rec, layerver)
+                                        parselog.debug3("Layer '%s' recommends version %s of layer '%s', but version %s is currently enabled in your configuration. Check that you are using the correct matching versions/branches of these two layers.", c, opstr, rec, layerver)
                                         continue
                                 else:
-                                    parselog.debug(3,"Layer '%s' recommends version %s of layer '%s', which exists in your configuration but does not specify a version. Check that you are using the correct matching versions/branches of these two layers.", c, opstr, rec)
+                                    parselog.debug3("Layer '%s' recommends version %s of layer '%s', which exists in your configuration but does not specify a version. Check that you are using the correct matching versions/branches of these two layers.", c, opstr, rec)
                                     continue
-                            parselog.debug(3,"Layer '%s' recommends layer '%s', so we are adding it", c, rec)
+                            parselog.debug3("Layer '%s' recommends layer '%s', so we are adding it", c, rec)
                             collection_depends[c].append(rec)
                         else:
-                            parselog.debug(3,"Layer '%s' recommends layer '%s', but this layer is not enabled in your configuration", c, rec)
+                            parselog.debug3("Layer '%s' recommends layer '%s', but this layer is not enabled in your configuration", c, rec)
 
             # Recursively work out collection priorities based on dependencies
             def calc_layer_priority(collection):
@@ -1297,7 +1297,7 @@
                         if depprio > max_depprio:
                             max_depprio = depprio
                     max_depprio += 1
-                    parselog.debug(1, "Calculated priority of layer %s as %d", collection, max_depprio)
+                    parselog.debug("Calculated priority of layer %s as %d", collection, max_depprio)
                     collection_priorities[collection] = max_depprio
 
             # Calculate all layer priorities using calc_layer_priority and store in bbfile_config_priorities
@@ -1309,7 +1309,7 @@
                     errors = True
                     continue
                 elif regex == "":
-                    parselog.debug(1, "BBFILE_PATTERN_%s is empty" % c)
+                    parselog.debug("BBFILE_PATTERN_%s is empty" % c)
                     cre = re.compile('^NULL$')
                     errors = False
                 else:
@@ -1678,7 +1678,7 @@
         self.state = state.parsing
 
         if not self.parser.parse_next():
-            collectlog.debug(1, "parsing complete")
+            collectlog.debug("parsing complete")
             if self.parser.error:
                 raise bb.BBHandledException()
             self.show_appends_with_no_recipes()
@@ -1723,7 +1723,7 @@
 
         if 'universe' in pkgs_to_build:
             parselog.verbnote("The \"universe\" target is only intended for testing and may produce errors.")
-            parselog.debug(1, "collating packages for \"universe\"")
+            parselog.debug("collating packages for \"universe\"")
             pkgs_to_build.remove('universe')
             for mc in self.multiconfigs:
                 for t in self.recipecaches[mc].universe_target:
@@ -1836,7 +1836,7 @@
         """Collect all available .bb build files"""
         masked = 0
 
-        collectlog.debug(1, "collecting .bb files")
+        collectlog.debug("collecting .bb files")
 
         files = (config.getVar( "BBFILES") or "").split()
 
@@ -1923,7 +1923,7 @@
         bbappend = []
         for f in newfiles:
             if bbmask and bbmask_compiled.search(f):
-                collectlog.debug(1, "skipping masked file %s", f)
+                collectlog.debug("skipping masked file %s", f)
                 masked += 1
                 continue
             if f.endswith('.bb'):
@@ -1931,7 +1931,7 @@
             elif f.endswith('.bbappend'):
                 bbappend.append(f)
             else:
-                collectlog.debug(1, "skipping %s: unknown file extension", f)
+                collectlog.debug("skipping %s: unknown file extension", f)
 
         # Build a list of .bbappend files for each .bb file
         for f in bbappend:
diff --git a/poky/bitbake/lib/bb/cookerdata.py b/poky/bitbake/lib/bb/cookerdata.py
index 9706948..8a354fe 100644
--- a/poky/bitbake/lib/bb/cookerdata.py
+++ b/poky/bitbake/lib/bb/cookerdata.py
@@ -356,7 +356,7 @@
 
         layerconf = self._findLayerConf(data)
         if layerconf:
-            parselog.debug(2, "Found bblayers.conf (%s)", layerconf)
+            parselog.debug2("Found bblayers.conf (%s)", layerconf)
             # By definition bblayers.conf is in conf/ of TOPDIR.
             # We may have been called with cwd somewhere else so reset TOPDIR
             data.setVar("TOPDIR", os.path.dirname(os.path.dirname(layerconf)))
@@ -384,7 +384,7 @@
                 raise bb.BBHandledException()
 
             for layer in layers:
-                parselog.debug(2, "Adding layer %s", layer)
+                parselog.debug2("Adding layer %s", layer)
                 if 'HOME' in approved and '~' in layer:
                     layer = os.path.expanduser(layer)
                 if layer.endswith('/'):
diff --git a/poky/bitbake/lib/bb/fetch2/__init__.py b/poky/bitbake/lib/bb/fetch2/__init__.py
index 0fb718b..893ec6f 100644
--- a/poky/bitbake/lib/bb/fetch2/__init__.py
+++ b/poky/bitbake/lib/bb/fetch2/__init__.py
@@ -545,7 +545,7 @@
         bb.warn('Invalid mirror data %s, should have paired members.' % data)
     return list(zip(*[iter(mirrors)]*2))
 
-def verify_checksum(ud, d, precomputed={}):
+def verify_checksum(ud, d, precomputed={}, localpath=None, fatal_nochecksum=True):
     """
     verify the MD5 and SHA256 checksum for downloaded src
 
@@ -563,13 +563,16 @@
     if ud.ignore_checksums or not ud.method.supports_checksum(ud):
         return {}
 
+    if localpath is None:
+        localpath = ud.localpath
+
     def compute_checksum_info(checksum_id):
         checksum_name = getattr(ud, "%s_name" % checksum_id)
 
         if checksum_id in precomputed:
             checksum_data = precomputed[checksum_id]
         else:
-            checksum_data = getattr(bb.utils, "%s_file" % checksum_id)(ud.localpath)
+            checksum_data = getattr(bb.utils, "%s_file" % checksum_id)(localpath)
 
         checksum_expected = getattr(ud, "%s_expected" % checksum_id)
 
@@ -595,7 +598,7 @@
             checksum_lines = ["SRC_URI[%s] = \"%s\"" % (ci["name"], ci["data"])]
 
     # If no checksum has been provided
-    if ud.method.recommends_checksum(ud) and all(ci["expected"] is None for ci in checksum_infos):
+    if fatal_nochecksum and ud.method.recommends_checksum(ud) and all(ci["expected"] is None for ci in checksum_infos):
         messages = []
         strict = d.getVar("BB_STRICT_CHECKSUM") or "0"
 
@@ -627,7 +630,7 @@
     for ci in checksum_infos:
         if ci["expected"] and ci["expected"] != ci["data"]:
             messages.append("File: '%s' has %s checksum '%s' when '%s' was " \
-                            "expected" % (ud.localpath, ci["id"], ci["data"], ci["expected"]))
+                            "expected" % (localpath, ci["id"], ci["data"], ci["expected"]))
             bad_checksum = ci["data"]
 
     if bad_checksum:
@@ -977,6 +980,7 @@
 
                 try:
                     newud = FetchData(newuri, ld)
+                    newud.ignore_checksums = True
                     newud.setup_localpath(ld)
                 except bb.fetch2.BBFetchException as e:
                     logger.debug("Mirror fetch failure for url %s (original url: %s)" % (newuri, origud.url))
diff --git a/poky/bitbake/lib/bb/fetch2/crate.py b/poky/bitbake/lib/bb/fetch2/crate.py
index f4ddc78..f091200 100644
--- a/poky/bitbake/lib/bb/fetch2/crate.py
+++ b/poky/bitbake/lib/bb/fetch2/crate.py
@@ -71,7 +71,7 @@
         ud.parm['downloadfilename'] = "%s-%s.crate" % (name, version)
         ud.parm['name'] = name
 
-        logger.debug("Fetching %s to %s" % (ud.url, ud.parm['downloadfilename']))
+        logger.debug2("Fetching %s to %s" % (ud.url, ud.parm['downloadfilename']))
 
     def unpack(self, ud, rootdir, d):
         """
diff --git a/poky/bitbake/lib/bb/fetch2/git.py b/poky/bitbake/lib/bb/fetch2/git.py
index 4534bd7..17d4904 100644
--- a/poky/bitbake/lib/bb/fetch2/git.py
+++ b/poky/bitbake/lib/bb/fetch2/git.py
@@ -567,13 +567,12 @@
         source_found = False
         source_error = []
 
-        if not source_found:
-            clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
-            if clonedir_is_up_to_date:
-                runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
-                source_found = True
-            else:
-                source_error.append("clone directory not available or not up to date: " + ud.clonedir)
+        clonedir_is_up_to_date = not self.clonedir_need_update(ud, d)
+        if clonedir_is_up_to_date:
+            runfetchcmd("%s clone %s %s/ %s" % (ud.basecmd, ud.cloneflags, ud.clonedir, destdir), d)
+            source_found = True
+        else:
+            source_error.append("clone directory not available or not up to date: " + ud.clonedir)
 
         if not source_found:
             if ud.shallow:
diff --git a/poky/bitbake/lib/bb/fetch2/ssh.py b/poky/bitbake/lib/bb/fetch2/ssh.py
index 8d082b3..0cbb2a6 100644
--- a/poky/bitbake/lib/bb/fetch2/ssh.py
+++ b/poky/bitbake/lib/bb/fetch2/ssh.py
@@ -150,8 +150,6 @@
         )
 
         check_network_access(d, cmd, urldata.url)
+        runfetchcmd(cmd, d)
 
-        if runfetchcmd(cmd, d):
-            return True
-
-        return False
+        return True
diff --git a/poky/bitbake/lib/bb/fetch2/wget.py b/poky/bitbake/lib/bb/fetch2/wget.py
index b2b542e..821afa5 100644
--- a/poky/bitbake/lib/bb/fetch2/wget.py
+++ b/poky/bitbake/lib/bb/fetch2/wget.py
@@ -132,6 +132,11 @@
 
         self._runwget(ud, d, fetchcmd, False)
 
+        # Try and verify any checksum now, meaning if it isn't correct, we don't remove the
+        # original file, which might be a race (imagine two recipes referencing the same
+        # source, one with an incorrect checksum)
+        bb.fetch2.verify_checksum(ud, d, localpath=localpath, fatal_nochecksum=False)
+
         # Remove the ".tmp" and move the file into position atomically
         # Our lock prevents multiple writers but mirroring code may grab incomplete files
         os.rename(localpath, localpath[:-4])
diff --git a/poky/bitbake/lib/bb/utils.py b/poky/bitbake/lib/bb/utils.py
index 92d44c5..e6e21e2 100644
--- a/poky/bitbake/lib/bb/utils.py
+++ b/poky/bitbake/lib/bb/utils.py
@@ -29,6 +29,8 @@
 import copy
 import ctypes
 import random
+import socket
+import struct
 import tempfile
 from subprocess import getstatusoutput
 from contextlib import contextmanager
@@ -1603,6 +1605,44 @@
     except:
         pass
 
+def enable_loopback_networking():
+    # From bits/ioctls.h
+    SIOCGIFFLAGS = 0x8913
+    SIOCSIFFLAGS = 0x8914
+    SIOCSIFADDR = 0x8916
+    SIOCSIFNETMASK = 0x891C
+
+    # if.h
+    IFF_UP = 0x1
+    IFF_RUNNING = 0x40
+
+    # bits/socket.h
+    AF_INET = 2
+
+    # char ifr_name[IFNAMSIZ=16]
+    ifr_name = struct.pack("@16s", b"lo")
+    def netdev_req(fd, req, data = b""):
+        # Pad and add interface name
+        data = ifr_name + data + (b'\x00' * (16 - len(data)))
+        # Return all data after interface name
+        return fcntl.ioctl(fd, req, data)[16:]
+
+    with socket.socket(socket.AF_INET, socket.SOCK_DGRAM, socket.IPPROTO_IP) as sock:
+        fd = sock.fileno()
+
+        # struct sockaddr_in ifr_addr { unsigned short family; uint16_t sin_port ; uint32_t in_addr; }
+        req = struct.pack("@H", AF_INET) + struct.pack("=H4B", 0, 127, 0, 0, 1)
+        netdev_req(fd, SIOCSIFADDR, req)
+
+        # short ifr_flags
+        flags = struct.unpack_from('@h', netdev_req(fd, SIOCGIFFLAGS))[0]
+        flags |= IFF_UP | IFF_RUNNING
+        netdev_req(fd, SIOCSIFFLAGS, struct.pack('@h', flags))
+
+        # struct sockaddr_in ifr_netmask
+        req = struct.pack("@H", AF_INET) + struct.pack("=H4B", 0, 255, 0, 0, 0)
+        netdev_req(fd, SIOCSIFNETMASK, req)
+
 def disable_network(uid=None, gid=None):
     """
     Disable networking in the current process if the kernel supports it, else
@@ -1624,7 +1664,7 @@
 
     ret = libc.unshare(CLONE_NEWNET | CLONE_NEWUSER)
     if ret != 0:
-        logger.debug("System doesn't suport disabling network without admin privs")
+        logger.debug("System doesn't support disabling network without admin privs")
         return
     with open("/proc/self/uid_map", "w") as f:
         f.write("%s %s 1" % (uid, uid))
diff --git a/poky/bitbake/lib/toaster/orm/fixtures/poky.xml b/poky/bitbake/lib/toaster/orm/fixtures/poky.xml
index ed86114..20fcc01 100644
--- a/poky/bitbake/lib/toaster/orm/fixtures/poky.xml
+++ b/poky/bitbake/lib/toaster/orm/fixtures/poky.xml
@@ -42,7 +42,7 @@
   <!-- Releases available -->
   <object model="orm.release" pk="1">
     <field type="CharField" name="name">kirkstone</field>
-    <field type="CharField" name="description">Yocto Project 3.5 "Kirkstone"</field>
+    <field type="CharField" name="description">Yocto Project 4.0 "Kirkstone"</field>
     <field rel="ManyToOneRel" to="orm.bitbakeversion" name="bitbake_version">1</field>
     <field type="CharField" name="branch_name">kirkstone</field>
     <field type="TextField" name="helptext">Toaster will run your builds using the tip of the &lt;a href="https://git.yoctoproject.org/cgit/cgit.cgi/poky/log/?h=kirkstone"&gt;Yocto Project Kirkstone branch&lt;/a&gt;.</field>
diff --git a/poky/documentation/dev-manual/common-tasks.rst b/poky/documentation/dev-manual/common-tasks.rst
index b08a553..559709d 100644
--- a/poky/documentation/dev-manual/common-tasks.rst
+++ b/poky/documentation/dev-manual/common-tasks.rst
@@ -653,39 +653,6 @@
 command::
 
    $ bitbake-layers --help
-   NOTE: Starting bitbake server...
-   usage: bitbake-layers [-d] [-q] [-F] [--color COLOR] [-h] <subcommand> ...
-
-   BitBake layers utility
-
-   optional arguments:
-     -d, --debug           Enable debug output
-     -q, --quiet           Print only errors
-     -F, --force           Force add without recipe parse verification
-     --color COLOR         Colorize output (where COLOR is auto, always, never)
-     -h, --help            show this help message and exit
-
-   subcommands:
-     <subcommand>
-       layerindex-fetch    Fetches a layer from a layer index along with its
-                           dependent layers, and adds them to conf/bblayers.conf.
-       layerindex-show-depends
-                           Find layer dependencies from layer index.
-       add-layer           Add one or more layers to bblayers.conf.
-       remove-layer        Remove one or more layers from bblayers.conf.
-       flatten             flatten layer configuration into a separate output
-                           directory.
-       show-layers         show current configured layers.
-       show-overlayed      list overlayed recipes (where the same recipe exists
-                           in another layer)
-       show-recipes        list available recipes, showing the layer they are
-                           provided by
-       show-appends        list bbappend files and recipe files they apply to
-       show-cross-depends  Show dependencies between recipes that cross layer
-                           boundaries.
-       create-layer        Create a basic layer
-
-   Use bitbake-layers <subcommand> --help to get help on a specific command
 
 The following list describes the available commands:
 
@@ -759,8 +726,18 @@
 -  ``layerindex-show-depends``: Finds layer dependencies from the
    layer index.
 
+-  ``save-build-conf``: Saves the currently active build configuration
+   (``conf/local.conf``, ``conf/bblayers.conf``) as a template into a layer.
+   This template can later be used for setting up builds via :term:``TEMPLATECONF``.
+   For information about saving and using configuration templates, see
+   ":ref:`dev-manual/common-tasks:creating a custom template configuration directory`".
+
 -  ``create-layer``: Creates a basic layer.
 
+-  ``create-layers-setup``: Writes out a configuration file and/or a script that
+   can replicate the directory structure and revisions of the layers in a current build.
+   For more information, see ":ref:`dev-manual/common-tasks:saving and restoring the layers setup`".
+
 Creating a General Layer Using the ``bitbake-layers`` Script
 ------------------------------------------------------------
 
@@ -880,6 +857,62 @@
    During a build, the OpenEmbedded build system looks in the layers
    from the top of the list down to the bottom in that order.
 
+Saving and restoring the layers setup
+-------------------------------------
+
+Once you have a working build with the correct set of layers, it is beneficial
+to capture the layer setup --- what they are, which repositories they come from
+and which SCM revisions they're at --- into a configuration file, so that this
+setup can be easily replicated later, perhaps on a different machine. Here's
+how to do this::
+
+   $ bitbake-layers create-layers-setup /srv/work/alex/meta-alex/
+   NOTE: Starting bitbake server...
+   NOTE: Created /srv/work/alex/meta-alex/setup-layers.json
+   NOTE: Created /srv/work/alex/meta-alex/setup-layers
+
+The tool needs a single argument which tells where to place the output, consisting
+of a json formatted layer configuration, and a ``setup-layers`` script that can use that configuration
+to restore the layers in a different location, or on a different host machine. The argument
+can point to a custom layer (which is then deemed a "bootstrap" layer that needs to be
+checked out first), or into a completely independent location.
+
+The replication of the layers is performed by running the ``setup-layers`` script provided
+above:
+
+1. Clone the bootstrap layer or some other repository to obtain
+   the json config and the setup script that can use it.
+
+2. Run the script directly with no options::
+
+      alex@Zen2:/srv/work/alex/my-build$ meta-alex/setup-layers
+      Note: not checking out source meta-alex, use --force-bootstraplayer-checkout to override.
+
+      Setting up source meta-intel, revision 15.0-hardknott-3.3-310-g0a96edae, branch master
+      Running 'git init -q /srv/work/alex/my-build/meta-intel'
+      Running 'git remote remove origin > /dev/null 2>&1; git remote add origin git://git.yoctoproject.org/meta-intel' in /srv/work/alex/my-build/meta-intel
+      Running 'git fetch -q origin || true' in /srv/work/alex/my-build/meta-intel
+      Running 'git checkout -q 0a96edae609a3f48befac36af82cf1eed6786b4a' in /srv/work/alex/my-build/meta-intel
+
+      Setting up source poky, revision 4.1_M1-372-g55483d28f2, branch akanavin/setup-layers
+      Running 'git init -q /srv/work/alex/my-build/poky'
+      Running 'git remote remove origin > /dev/null 2>&1; git remote add origin git://git.yoctoproject.org/poky' in /srv/work/alex/my-build/poky
+      Running 'git fetch -q origin || true' in /srv/work/alex/my-build/poky
+      Running 'git remote remove poky-contrib > /dev/null 2>&1; git remote add poky-contrib ssh://git@push.yoctoproject.org/poky-contrib' in /srv/work/alex/my-build/poky
+      Running 'git fetch -q poky-contrib || true' in /srv/work/alex/my-build/poky
+      Running 'git checkout -q 11db0390b02acac1324e0f827beb0e2e3d0d1d63' in /srv/work/alex/my-build/poky
+
+.. note::
+   This will work to update an existing checkout as well.
+
+.. note::
+   The script is self-sufficient and requires only python3
+   and git on the build machine.
+
+.. note::
+   Both the ``create-layers-setup`` and the ``setup-layers`` provided several additional options
+   that customize their behavior - you are welcome to study them via ``--help`` command line parameter.
+
 Customizing Images
 ==================
 
@@ -2577,7 +2610,7 @@
 
       S = "${WORKDIR}/postfix-${PV}"
       CFLAGS += "-DNO_ASM"
-      SRC_URI:append = " file://fixup.patch"
+      CFLAGS:append = " --enable-important-feature"
 
 -  *Functions:* Functions provide a series of actions to be performed.
    You usually use functions to override the default implementation of a
@@ -2708,19 +2741,21 @@
    to existing variables. This operator does not add any additional
    space. Also, the operator is applied after all the ``+=``, and ``=+``
    operators have been applied and after all ``=`` assignments have
-   occurred.
+   occurred. This means that if ``:append`` is used in a recipe, it can
+   only be overridden by another layer using the  special ``:remove``
+   operator, which in turn will prevent further layers from adding it back.
 
    The following example shows the space being explicitly added to the
    start to ensure the appended value is not merged with the existing
    value::
 
-      SRC_URI:append = " file://fix-makefile.patch"
+      CFLAGS:append = " --enable-important-feature"
 
    You can also use
    the ``:append`` operator with overrides, which results in the actions
    only being performed for the specified target or machine::
 
-      SRC_URI:append:sh4 = " file://fix-makefile.patch"
+      CFLAGS:append:sh4 = " --enable-important-sh4-specific-feature"
 
 -  *Prepending (:prepend):* Use the ``:prepend`` operator to prepend
    values to existing variables. This operator does not add any
@@ -3560,6 +3595,9 @@
    pydevshell> bb.build.exec_func("do_unpack", d)
    pydevshell>
 
+See the ":ref:`bitbake:bitbake-user-manual/bitbake-user-manual-metadata:functions you can call from within python`"
+section in the BitBake User Manual for details about available functions.
+
 The commands execute just as if the OpenEmbedded build
 system were executing them. Consequently, working this way can be
 helpful when debugging a build or preparing software to be used with the
@@ -6431,71 +6469,51 @@
 ==================================================
 
 If you are producing your own customized version of the build system for
-use by other users, you might want to customize the message shown by the
-setup script or you might want to change the template configuration
-files (i.e. ``local.conf`` and ``bblayers.conf``) that are created in a
-new build directory.
+use by other users, you might want to provide a custom build configuration
+that includes all the necessary settings and layers (i.e. ``local.conf`` and
+``bblayers.conf`` that are created in a new build directory) and a custom
+message that is shown when setting up the build. This can be done by
+creating one or more template configuration directories in your
+custom distribution layer.
+
+This can be done by using ``bitbake-layers save-build-conf``::
+
+   $ bitbake-layers save-build-conf ../../meta-alex/ test-1
+   NOTE: Starting bitbake server...
+   NOTE: Configuration template placed into /srv/work/alex/meta-alex/conf/templates/test-1
+   Please review the files in there, and particularly provide a configuration description in /srv/work/alex/meta-alex/conf/templates/test-1/conf-notes.txt
+   You can try out the configuration with
+   TEMPLATECONF=/srv/work/alex/meta-alex/conf/templates/test-1 . /srv/work/alex/poky/oe-init-build-env build-try-test-1
+
+The above command takes the config files from the currently active build directory under ``conf``,
+replaces site-specific paths in ``bblayers.conf`` with ``##OECORE##``-relative paths, and copies
+the config files into a specified layer under a specified template name.
+
+To use those saved templates as a starting point for a build, users should point
+to one of them with :term:`TEMPLATECONF` environment variable::
+
+   TEMPLATECONF=/srv/work/alex/meta-alex/conf/templates/test-1 . /srv/work/alex/poky/oe-init-build-env build-try-test-1
 
 The OpenEmbedded build system uses the environment variable
 :term:`TEMPLATECONF` to locate the directory from which it gathers
 configuration information that ultimately ends up in the
 :term:`Build Directory` ``conf`` directory.
-By default, :term:`TEMPLATECONF` is set as follows in the ``poky``
-repository::
 
-   TEMPLATECONF=${TEMPLATECONF:-meta-poky/conf}
+If :term:`TEMPLATECONF` is not set, the default value is obtained
+from ``.templateconf`` file that is read from the same directory as
+``oe-init-build-env`` script. For the Poky reference distribution this
+would be::
 
-This is the
-directory used by the build system to find templates from which to build
-some key configuration files. If you look at this directory, you will
+   TEMPLATECONF=${TEMPLATECONF:-meta-poky/conf/templates/default}
+
+If you look at a configuration template directory, you will
 see the ``bblayers.conf.sample``, ``local.conf.sample``, and
 ``conf-notes.txt`` files. The build system uses these files to form the
-respective ``bblayers.conf`` file, ``local.conf`` file, and display the
-list of BitBake targets when running the setup script.
-
-To override these default configuration files with configurations you
-want used within every new Build Directory, simply set the
-:term:`TEMPLATECONF` variable to your directory. The :term:`TEMPLATECONF`
-variable is set in the ``.templateconf`` file, which is in the top-level
-:term:`Source Directory` folder
-(e.g. ``poky``). Edit the ``.templateconf`` so that it can locate your
-directory.
-
-Best practices dictate that you should keep your template configuration
-directory in your custom distribution layer. For example, suppose you
-have a layer named ``meta-mylayer`` located in your home directory and
-you want your template configuration directory named ``myconf``.
-Changing the ``.templateconf`` as follows causes the OpenEmbedded build
-system to look in your directory and base its configuration files on the
-``*.sample`` configuration files it finds. The final configuration files
-(i.e. ``local.conf`` and ``bblayers.conf`` ultimately still end up in
-your Build Directory, but they are based on your ``*.sample`` files.
-::
-
-   TEMPLATECONF=${TEMPLATECONF:-meta-mylayer/myconf}
-
-Aside from the ``*.sample`` configuration files, the ``conf-notes.txt``
-also resides in the default ``meta-poky/conf`` directory. The script
-that sets up the build environment (i.e.
-:ref:`structure-core-script`) uses this file to
-display BitBake targets as part of the script output. Customizing this
-``conf-notes.txt`` file is a good way to make sure your list of custom
-targets appears as part of the script's output.
-
-Here is the default list of targets displayed as a result of running
-either of the setup scripts::
-
-   You can now run 'bitbake <target>'
-
-   Common targets are:
-       core-image-minimal
-       core-image-sato
-       meta-toolchain
-       meta-ide-support
-
-Changing the listed common targets is as easy as editing your version of
-``conf-notes.txt`` in your custom template configuration directory and
-making sure you have :term:`TEMPLATECONF` set to your directory.
+respective ``bblayers.conf`` file, ``local.conf`` file, and show
+users a note about the build they're setting up
+when running the ``oe-init-build-env`` setup script. These can be
+edited further if needed to improve or change the build configurations
+available to the users.
 
 Conserving Disk Space
 =====================
@@ -11417,12 +11435,12 @@
    # clean up the .git repos
    $ find . -name ".git" -type d -exec rm -rf {} \;
 
-One
-thing a development organization might want to consider for end-user
-convenience is to modify ``meta-poky/conf/bblayers.conf.sample`` to
-ensure that when the end user utilizes the released build system to
-build an image, the development organization's layers are included in
-the ``bblayers.conf`` file automatically::
+One thing a development organization might want to consider for end-user
+convenience is to modify
+``meta-poky/conf/templates/default/bblayers.conf.sample`` to ensure that when
+the end user utilizes the released build system to build an image, the
+development organization's layers are included in the ``bblayers.conf`` file
+automatically::
 
    # POKY_BBLAYERS_CONF_VERSION is increased each time build/conf/bblayers.conf
    # changes incompatibly
@@ -11451,9 +11469,9 @@
 meta-spdxscanner provides several kinds of scanner. If you want to enable
 this function, you have to follow the following steps:
 
-1. Add meta-spdxscanner layer into ``bblayers.conf``. 
+1. Add meta-spdxscanner layer into ``bblayers.conf``.
 
-2. Refer to the README in meta-spdxscanner to setup the environment (e.g, 
+2. Refer to the README in meta-spdxscanner to setup the environment (e.g,
    setup a fossology server) needed for the scanner.
 
 3. Meta-spdxscanner provides several methods within the bbclass to create spdx files.
diff --git a/poky/documentation/kernel-dev/common.rst b/poky/documentation/kernel-dev/common.rst
index 16ef645..fb8d7cd 100644
--- a/poky/documentation/kernel-dev/common.rst
+++ b/poky/documentation/kernel-dev/common.rst
@@ -360,9 +360,9 @@
 
       FILESEXTRAPATHS:prepend := "${THISDIR}/${PN}:"
 
-      SRC_URI:append = " file://patch-file-one.patch"
-      SRC_URI:append = " file://patch-file-two.patch"
-      SRC_URI:append = " file://patch-file-three.patch"
+      SRC_URI += "file://patch-file-one.patch"
+      SRC_URI += "file://patch-file-two.patch"
+      SRC_URI += "file://patch-file-three.patch"
 
    The :term:`FILESEXTRAPATHS` and :term:`SRC_URI` statements
    enable the OpenEmbedded build system to find patch files. For more
@@ -1002,7 +1002,7 @@
    contents::
 
       FILESEXTRAPATHS:prepend := "${THISDIR}/${PN}:"
-      SRC_URI:append = " file://0001-calibrate.c-Added-some-printk-statements.patch"
+      SRC_URI += "file://0001-calibrate.c-Added-some-printk-statements.patch"
 
    The :term:`FILESEXTRAPATHS` and :term:`SRC_URI` statements
    enable the OpenEmbedded build system to find the patch file.
@@ -1875,7 +1875,7 @@
 2. *Add the Feature File to SRC_URI:* Add the ``.scc`` file to the
    recipe's :term:`SRC_URI` statement::
 
-      SRC_URI:append = " file://test.scc"
+      SRC_URI += "file://test.scc"
 
    The leading space before the path is important as the path is
    appended to the existing path.
@@ -1884,7 +1884,7 @@
    :term:`KERNEL_FEATURES` statement to specify the feature as a kernel
    feature::
 
-      KERNEL_FEATURES:append = " test.scc"
+      KERNEL_FEATURES += "test.scc"
 
    The OpenEmbedded build
    system processes the kernel feature when it builds the kernel.
diff --git a/poky/documentation/overview-manual/concepts.rst b/poky/documentation/overview-manual/concepts.rst
index 83339da..8e0303f 100644
--- a/poky/documentation/overview-manual/concepts.rst
+++ b/poky/documentation/overview-manual/concepts.rst
@@ -269,7 +269,7 @@
 build environment. Here is a list of a few. To see the default
 configurations in a ``local.conf`` file created by the build environment
 script, see the
-:yocto_git:`local.conf.sample </poky/tree/meta-poky/conf/local.conf.sample>`
+:yocto_git:`local.conf.sample </poky/tree/meta-poky/conf/templates/default/local.conf.sample>`
 in the ``meta-poky`` layer:
 
 -  *Target Machine Selection:* Controlled by the
@@ -1006,7 +1006,7 @@
 stage of package installation, post installation scripts that are part
 of the packages are run. Any scripts that fail to run on the build host
 are run on the target when the target system is first booted. If you are
-using a 
+using a
 :ref:`read-only root filesystem <dev-manual/common-tasks:creating a read-only root filesystem>`,
 all the post installation scripts must succeed on the build host during
 the package installation phase since the root filesystem on the target
diff --git a/poky/documentation/ref-manual/classes.rst b/poky/documentation/ref-manual/classes.rst
index 424c505..11e0d47 100644
--- a/poky/documentation/ref-manual/classes.rst
+++ b/poky/documentation/ref-manual/classes.rst
@@ -580,8 +580,8 @@
 .. note::
 
    From a security perspective, hardcoding a default password is not
-   generally a good idea or even legal in some jurisdictions. It is 
-   recommended that you do not do this if you are building a production 
+   generally a good idea or even legal in some jurisdictions. It is
+   recommended that you do not do this if you are building a production
    image.
 
 
@@ -792,7 +792,7 @@
 ``icecc``, for both native and cross compilers. Depending on each
 configure or compile, the OpenEmbedded build system adds the directories
 at the head of the ``PATH`` list and then sets the ``ICECC_CXX`` and
-``ICEC_CC`` variables, which are the paths to the ``g++`` and ``gcc``
+``ICECC_CC`` variables, which are the paths to the ``g++`` and ``gcc``
 compilers, respectively.
 
 For the cross compiler, the class creates a ``tar.gz`` file that
@@ -800,8 +800,8 @@
 is the version of the cross-compiler used in the cross-development
 toolchain, accordingly.
 
-The class handles all three different compile stages (i.e native
-,cross-kernel and target) and creates the necessary environment
+The class handles all three different compile stages (i.e native,
+cross-kernel and target) and creates the necessary environment
 ``tar.gz`` file to be used by the remote machines. The class also
 supports SDK generation.
 
@@ -811,12 +811,13 @@
 in your ``local.conf`` file, the variable should point to the
 ``icecc-create-env`` script provided by the user. If you do not point to
 a user-provided script, the build system uses the default script
-provided by the recipe ``icecc-create-env-native.bb``.
+provided by the recipe :oe_git:`icecc-create-env_0.1.bb
+</openembedded-core/tree/meta/recipes-devtools/icecc-create-env/icecc-create-env_0.1.bb>`.
 
 .. note::
 
    This script is a modified version and not the one that comes with
-   icecc.
+   ``icecream``.
 
 If you do not want the Icecream distributed compile support to apply to
 specific recipes or classes, you can ask them to be ignored by Icecream
diff --git a/poky/documentation/ref-manual/faq.rst b/poky/documentation/ref-manual/faq.rst
index 2fcbf7d..6104952 100644
--- a/poky/documentation/ref-manual/faq.rst
+++ b/poky/documentation/ref-manual/faq.rst
@@ -137,9 +137,10 @@
    #use_proxy = on
 
 The Yocto Project also includes a
-``meta-poky/conf/site.conf.sample`` file that shows how to configure CVS
-and Git proxy servers if needed. For more information on setting up
-various proxy types and configuring proxy servers, see the
+``meta-poky/conf/templates/default/site.conf.sample`` file that shows
+how to configure CVS and Git proxy servers if needed. For more
+information on setting up various proxy types and configuring proxy
+servers, see the
 ":yocto_wiki:`Working Behind a Network Proxy </Working_Behind_a_Network_Proxy>`"
 Wiki page.
 
diff --git a/poky/documentation/ref-manual/features.rst b/poky/documentation/ref-manual/features.rst
index 8dfe29d..5e853ca 100644
--- a/poky/documentation/ref-manual/features.rst
+++ b/poky/documentation/ref-manual/features.rst
@@ -113,10 +113,15 @@
 information.
 
 This list only represents features as shipped with the Yocto Project
-metadata:
+metadata, as extra layers can define their own:
 
--  *alsa:* Include ALSA support (OSS compatibility kernel modules
-   installed if available).
+-  *3g:* Include support for cellular data.
+
+-  *acl:* Include
+   `Access Control List <https://en.wikipedia.org/wiki/Access-control_list>`__ support.
+
+-  *alsa:* Include `Advanced Linux Sound Architecture <https://en.wikipedia.org/wiki/Advanced_Linux_Sound_Architecture>`__
+   support (OSS compatibility kernel modules installed if available).
 
 -  *api-documentation:* Enables generation of API documentation during
    recipe builds. The resulting documentation is added to SDK tarballs
@@ -129,6 +134,10 @@
 
 -  *cramfs:* Include CramFS support.
 
+-  *debuginfod:* Include support for getting ELF debugging information through
+   a :ref:`debuginfod <dev-manual/common-tasks:using the debuginfod server method>`
+   server.
+
 -  *directfb:* Include DirectFB support.
 
 -  *ext2:* Include tools for supporting for devices with internal
@@ -136,14 +145,25 @@
 
 -  *ipsec:* Include IPSec support.
 
+-  *ipv4:* Include IPv4 support.
+
 -  *ipv6:* Include IPv6 support.
 
 -  *keyboard:* Include keyboard support (e.g. keymaps will be loaded
    during boot).
 
+-  *largefile:* Enable building applications with
+   `argefile support <https://en.wikipedia.org/wiki/Large-file_support>`__.
+
+-  *multiarch:* Enable building applications with multiple architecture
+   support.
+
 -  *ldconfig:* Include support for ldconfig and ``ld.so.conf`` on the
    target.
 
+-  *nfc:* Include support for
+   `Near Field Communication <https://en.wikipedia.org/wiki/Near-field_communication>`__.
+
 -  *nfs:* Include NFS client support (for mounting NFS exports on
    device).
 
@@ -162,6 +182,11 @@
    ":ref:`dev-manual/common-tasks:testing packages with ptest`" section
    in the Yocto Project Development Tasks Manual.
 
+-  *seccomp:* Enables building applications with
+   `seccomp <https://en.wikipedia.org/wiki/Seccomp>`__ support, to
+   allow them to strictly restrict the system calls that they are allowed
+   to invoke.
+
 -  *smbfs:* Include SMB networks client support (for mounting
    Samba/Microsoft Windows shares on device).
 
@@ -180,6 +205,12 @@
    directories into their respective counterparts in the ``/usr``
    directory to provide better package and application compatibility.
 
+-  *vfat:* Include `FAT filesystem <https://en.wikipedia.org/wiki/File_Allocation_Table>`__
+   support.
+
+-  *vulkan:* Include support for the
+   `Vulkan API <https://en.wikipedia.org/wiki/Vulkan>`__.
+
 -  *wayland:* Include the Wayland display server protocol and the
    library that supports it.
 
@@ -187,6 +218,12 @@
 
 -  *x11:* Include the X server and libraries.
 
+-  *xattr:* Include support for
+   `extended file attributes <https://en.wikipedia.org/wiki/Extended_file_attributes>`__.
+
+-  *zeroconf:* Include support for
+   `zero configuration networking <https://en.wikipedia.org/wiki/Zero-configuration_networking>`__.
+
 .. _ref-features-image:
 
 Image Features
diff --git a/poky/documentation/ref-manual/structure.rst b/poky/documentation/ref-manual/structure.rst
index bdcffc1..429f81e 100644
--- a/poky/documentation/ref-manual/structure.rst
+++ b/poky/documentation/ref-manual/structure.rst
@@ -68,6 +68,9 @@
 (commonly described as an "out of tree" build), see the
 ":ref:`structure-core-script`" section.
 
+See the ":ref:`The Build Directory --- build/ <structure-build>`" section for details
+about the contents of the :term:`Build Directory`.
+
 .. _handbook:
 
 ``documentation/``
@@ -189,7 +192,7 @@
    $ source oe-init-build-env ~/mybuilds
 
 The OpenEmbedded build system uses the template configuration files, which
-are found by default in the ``meta-poky/conf/`` directory in the Source
+are found by default in the ``meta-poky/conf/templates/default`` directory in the Source
 Directory. See the
 ":ref:`dev-manual/common-tasks:creating a custom template configuration directory`"
 section in the Yocto Project Development Tasks Manual for more
@@ -237,6 +240,18 @@
 ":ref:`dev-manual/common-tasks:maintaining build output quality`"
 section in the Yocto Project Development Tasks Manual.
 
+.. _structure-build-cache:
+
+``build/cache/``
+----------------
+
+This directory contains several internal files used by the OpenEmbedded
+build system.
+
+It also contains ``sanity_info``, a text file keeping track of important
+build information such as the values of :term:`TMPDIR`, :term:`SSTATE_DIR`,
+as well as the name and version of the host distribution.
+
 .. _structure-build-conf-local.conf:
 
 ``build/conf/local.conf``
@@ -261,15 +276,15 @@
 :ref:`structure-core-script`.
 
 The source ``local.conf.sample`` file used depends on the
-:term:`TEMPLATECONF` script variable, which defaults to ``meta-poky/conf/``
+:term:`TEMPLATECONF` script variable, which defaults to ``meta-poky/conf/templates/default``
 when you are building from the Yocto Project development environment,
-and to ``meta/conf/`` when you are building from the OpenEmbedded-Core
+and to ``meta/conf/templates/default`` when you are building from the OpenEmbedded-Core
 environment. Because the script variable points to the source of the
 ``local.conf.sample`` file, this implies that you can configure your
 build environment from any layer by setting the variable in the
 top-level build environment setup script as follows::
 
-   TEMPLATECONF=your_layer/conf
+   TEMPLATECONF=your_layer/conf/templates/your_template_name
 
 Once the build process gets the sample
 file, it uses ``sed`` to substitute final
@@ -281,7 +296,7 @@
    You can see how the :term:`TEMPLATECONF` variable is used by looking at the
    ``scripts/oe-setup-builddir`` script in the :term:`Source Directory`.
    You can find the Yocto Project version of the ``local.conf.sample`` file in
-   the ``meta-poky/conf`` directory.
+   the ``meta-poky/conf/templates/default`` directory.
 
 .. _structure-build-conf-bblayers.conf:
 
@@ -301,14 +316,14 @@
 
 As with the ``local.conf`` file, the source ``bblayers.conf.sample``
 file used depends on the :term:`TEMPLATECONF` script variable, which
-defaults to ``meta-poky/conf/`` when you are building from the Yocto
-Project development environment, and to ``meta/conf/`` when you are
+defaults to ``meta-poky/conf/templates/default`` when you are building from the Yocto
+Project development environment, and to ``meta/conf/templates/default`` when you are
 building from the OpenEmbedded-Core environment. Because the script
 variable points to the source of the ``bblayers.conf.sample`` file, this
 implies that you can base your build from any layer by setting the
 variable in the top-level build environment setup script as follows::
 
-   TEMPLATECONF=your_layer/conf
+   TEMPLATECONF=your_layer/conf/templates/your_template_name
 
 Once the build process gets the sample file, it uses ``sed`` to substitute final
 ``${``\ :term:`OEROOT`\ ``}`` values for all ``##OEROOT##`` values.
@@ -317,17 +332,9 @@
 
    You can see how the :term:`TEMPLATECONF` variable is defined by the ``scripts/oe-setup-builddir``
    script in the :term:`Source Directory`. You can find the Yocto Project
-   version of the ``bblayers.conf.sample`` file in the ``meta-poky/conf/``
+   version of the ``bblayers.conf.sample`` file in the ``meta-poky/conf/templates/default``
    directory.
 
-.. _structure-build-conf-sanity_info:
-
-``build/cache/sanity_info``
----------------------------
-
-This file indicates the state of the sanity checks and is created during
-the build.
-
 .. _structure-build-downloads:
 
 ``build/downloads/``
@@ -366,14 +373,14 @@
 .. _structure-build-tmp-buildstats:
 
 ``build/tmp/buildstats/``
--------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~
 
 This directory stores the build statistics.
 
 .. _structure-build-tmp-cache:
 
 ``build/tmp/cache/``
---------------------
+~~~~~~~~~~~~~~~~~~~~
 
 When BitBake parses the metadata (recipes and configuration files), it
 caches the results in ``build/tmp/cache/`` to speed up future builds.
@@ -389,7 +396,7 @@
 .. _structure-build-tmp-deploy:
 
 ``build/tmp/deploy/``
----------------------
+~~~~~~~~~~~~~~~~~~~~~
 
 This directory contains any "end result" output from the OpenEmbedded
 build process. The :term:`DEPLOY_DIR` variable points
@@ -402,7 +409,7 @@
 .. _structure-build-tmp-deploy-deb:
 
 ``build/tmp/deploy/deb/``
--------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
 This directory receives any ``.deb`` packages produced by the build
 process. The packages are sorted into feeds for different architecture
@@ -411,7 +418,7 @@
 .. _structure-build-tmp-deploy-rpm:
 
 ``build/tmp/deploy/rpm/``
--------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
 This directory receives any ``.rpm`` packages produced by the build
 process. The packages are sorted into feeds for different architecture
@@ -420,14 +427,14 @@
 .. _structure-build-tmp-deploy-ipk:
 
 ``build/tmp/deploy/ipk/``
--------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
 This directory receives ``.ipk`` packages produced by the build process.
 
 .. _structure-build-tmp-deploy-licenses:
 
 ``build/tmp/deploy/licenses/``
-------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 This directory receives package licensing information. For example, the
 directory contains sub-directories for ``bash``, ``busybox``, and
@@ -440,7 +447,7 @@
 .. _structure-build-tmp-deploy-images:
 
 ``build/tmp/deploy/images/``
-----------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 This directory is populated with the basic output objects of the build
 (think of them as the "generated artifacts" of the build process),
@@ -467,7 +474,7 @@
 .. _structure-build-tmp-deploy-sdk:
 
 ``build/tmp/deploy/sdk/``
--------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The OpenEmbedded build system creates this directory to hold toolchain
 installer scripts which, when executed, install the sysroot that matches
@@ -479,7 +486,7 @@
 .. _structure-build-tmp-sstate-control:
 
 ``build/tmp/sstate-control/``
------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The OpenEmbedded build system uses this directory for the shared state
 manifest files. The shared state code uses these files to record the
@@ -492,7 +499,7 @@
 .. _structure-build-tmp-sysroots-components:
 
 ``build/tmp/sysroots-components/``
-----------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 This directory is the location of the sysroot contents that the task
 :ref:`ref-tasks-prepare_recipe_sysroot`
@@ -507,7 +514,7 @@
 .. _structure-build-tmp-sysroots:
 
 ``build/tmp/sysroots/``
------------------------
+~~~~~~~~~~~~~~~~~~~~~~~
 
 Previous versions of the OpenEmbedded build system used to create a
 global shared sysroot per machine along with a native sysroot. Since
@@ -525,7 +532,7 @@
 .. _structure-build-tmp-stamps:
 
 ``build/tmp/stamps/``
----------------------
+~~~~~~~~~~~~~~~~~~~~~
 
 This directory holds information that BitBake uses for accounting
 purposes to track what tasks have run and when they have run. The
@@ -545,7 +552,7 @@
 .. _structure-build-tmp-log:
 
 ``build/tmp/log/``
-------------------
+~~~~~~~~~~~~~~~~~~
 
 This directory contains general logs that are not otherwise placed using
 the package's :term:`WORKDIR`. Examples of logs are the output from the
@@ -555,7 +562,7 @@
 .. _structure-build-tmp-work:
 
 ``build/tmp/work/``
--------------------
+~~~~~~~~~~~~~~~~~~~
 
 This directory contains architecture-specific work sub-directories for
 packages built by BitBake. All tasks execute from the appropriate work
@@ -587,7 +594,7 @@
 .. _structure-build-tmp-work-tunearch-recipename-version:
 
 ``build/tmp/work/tunearch/recipename/version/``
------------------------------------------------
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The recipe work directory --- ``${WORKDIR}``.
 
@@ -645,7 +652,7 @@
 .. _structure-build-work-shared:
 
 ``build/tmp/work-shared/``
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 For efficiency, the OpenEmbedded build system creates and uses this
 directory to hold recipes that share a work directory with other
@@ -695,7 +702,7 @@
 .. _structure-meta-conf-machine:
 
 ``meta/conf/machine/``
-----------------------
+~~~~~~~~~~~~~~~~~~~~~~
 
 This directory contains all the machine configuration files. If you set
 ``MACHINE = "qemux86"``, the OpenEmbedded build system looks for a
@@ -706,7 +713,7 @@
 .. _structure-meta-conf-distro:
 
 ``meta/conf/distro/``
----------------------
+~~~~~~~~~~~~~~~~~~~~~
 
 The contents of this directory controls any distribution-specific
 configurations. For the Yocto Project, the ``defaultsetup.conf`` is the
@@ -718,7 +725,7 @@
 .. _structure-meta-conf-machine-sdk:
 
 ``meta/conf/machine-sdk/``
---------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~
 
 The OpenEmbedded build system searches this directory for configuration
 files that correspond to the value of
diff --git a/poky/documentation/ref-manual/variables.rst b/poky/documentation/ref-manual/variables.rst
index cf817e9..2971654 100644
--- a/poky/documentation/ref-manual/variables.rst
+++ b/poky/documentation/ref-manual/variables.rst
@@ -135,7 +135,6 @@
       appear in :term:`DISTRO_FEATURES` within the current configuration, then
       the recipe will be skipped, and if the build system attempts to build
       the recipe then an error will be triggered.
-      
 
    :term:`APPEND`
       An override list of append strings for each target specified with
@@ -729,22 +728,6 @@
       ":ref:`dev-manual/common-tasks:building images for multiple targets using multiple configurations`"
       section in the Yocto Project Development Tasks Manual.
 
-   :term:`BBPATH`
-      Used by BitBake to locate ``.bbclass`` and configuration files. This
-      variable is analogous to the ``PATH`` variable.
-
-      .. note::
-
-         If you run BitBake from a directory outside of the
-         :term:`Build Directory`, you must be sure to set :term:`BBPATH`
-         to point to the Build Directory. Set the variable as you would any
-         environment variable and then run BitBake::
-
-                 $ BBPATH = "build_directory"
-                 $ export BBPATH
-                 $ bitbake target
-
-
    :term:`BBSERVER`
       If defined in the BitBake environment, :term:`BBSERVER` points to the
       BitBake remote server.
@@ -2386,8 +2369,8 @@
       .. note::
 
          From a security perspective, hardcoding a default password is not
-         generally a good idea or even legal in some jurisdictions. It is 
-         recommended that you do not do this if you are building a production 
+         generally a good idea or even legal in some jurisdictions. It is
+         recommended that you do not do this if you are building a production
          image.
 
       Additionally there is a special ``passwd-expire`` command that will
@@ -2967,8 +2950,10 @@
 
       If you do not point to a script that you provide, the OpenEmbedded
       build system uses the default script provided by the
-      ``icecc-create-env.bb`` recipe, which is a modified version and not
-      the one that comes with ``icecc``.
+      :oe_git:`icecc-create-env_0.1.bb
+      </openembedded-core/tree/meta/recipes-devtools/icecc-create-env/icecc-create-env_0.1.bb>`
+      recipe, which is a modified version and not the one that comes with
+      ``icecream``.
 
    :term:`ICECC_PARALLEL_MAKE`
       Extra options passed to the ``make`` command during the
@@ -3690,10 +3675,10 @@
          to "core-image-minimal-initramfs".
 
       You can also find more information by referencing the
-      ``meta-poky/conf/local.conf.sample.extended`` configuration file in
-      the Source Directory, the :ref:`image <ref-classes-image>` class,
-      and the :ref:`kernel <ref-classes-kernel>` class to see how to use
-      the :term:`INITRAMFS_IMAGE` variable.
+      ``meta-poky/conf/templates/default/local.conf.sample.extended``
+      configuration file in the Source Directory, the :ref:`image
+      <ref-classes-image>` class, and the :ref:`kernel <ref-classes-kernel>`
+      class to see how to use the :term:`INITRAMFS_IMAGE` variable.
 
       If :term:`INITRAMFS_IMAGE` is empty, which is the default, then no
       initramfs image is built.
@@ -3752,7 +3737,7 @@
          configuration file. You cannot set the variable in a recipe file.
 
       See the
-      :yocto_git:`local.conf.sample.extended </poky/tree/meta-poky/conf/local.conf.sample.extended>`
+      :yocto_git:`local.conf.sample.extended </poky/tree/meta-poky/conf/templates/default/local.conf.sample.extended>`
       file for additional information. Also, for information on creating an
       initramfs, see the ":ref:`dev-manual/common-tasks:building an initial ram filesystem (initramfs) image`" section
       in the Yocto Project Development Tasks Manual.
@@ -3984,11 +3969,10 @@
 
          KCONFIG_MODE = "alldefconfig"
 
-
    :term:`KERNEL_ALT_IMAGETYPE`
       Specifies an alternate kernel image type for creation in addition to
-      the kernel image type specified using the
-      :term:`KERNEL_IMAGETYPE` variable.
+      the kernel image type specified using the :term:`KERNEL_IMAGETYPE` and
+      :term:`KERNEL_IMAGETYPES` variables.
 
    :term:`KERNEL_ARTIFACT_NAME`
       Specifies the name of all of the build artifacts. You can change the
@@ -4172,9 +4156,12 @@
       when building the kernel and is passed to ``make`` as the target to
       build.
 
-      If you want to build an alternate kernel image type in addition to that
-      specified by :term:`KERNEL_IMAGETYPE`, use the :term:`KERNEL_ALT_IMAGETYPE`
-      variable.
+      To build additional kernel image types, use :term:`KERNEL_IMAGETYPES`.
+
+   :term:`KERNEL_IMAGETYPES`
+      Lists additional types of kernel images to build for a device in addition
+      to image type specified in :term:`KERNEL_IMAGETYPE`. Usually set by the
+      machine configuration files.
 
    :term:`KERNEL_MODULE_AUTOLOAD`
       Lists kernel modules that need to be auto-loaded during boot.
@@ -6075,9 +6062,9 @@
    :term:`PRSERV_HOST`
       The network based :term:`PR` service host and port.
 
-      The ``conf/local.conf.sample.extended`` configuration file in the
-      :term:`Source Directory` shows how the
-      :term:`PRSERV_HOST` variable is set::
+      The ``conf/templates/default/local.conf.sample.extended`` configuration
+      file in the :term:`Source Directory` shows how the :term:`PRSERV_HOST`
+      variable is set::
 
          PRSERV_HOST = "localhost:0"
 
@@ -6588,7 +6575,7 @@
 
    :term:`SDK_CUSTOM_TEMPLATECONF`
       When building the extensible SDK, if :term:`SDK_CUSTOM_TEMPLATECONF` is set to
-      "1" and a ``conf/templateconf.conf`` file exists in the build directory
+      "1" and a ``conf/templateconf.cfg`` file exists in the build directory
       (:term:`TOPDIR`) then this will be copied into the SDK.
 
    :term:`SDK_DEPLOY`
@@ -7973,6 +7960,12 @@
       toolchain. You can use ``meta-sourcery`` as a template for adding
       support for other external toolchains.
 
+   :term:`TC_CXX_RUNTIME`
+      Specifies the C/C++ STL and runtime variant to use during
+      the build process. Default value is 'gnu'
+
+      You can select "gnu", "llvm", or "android".
+
    :term:`TEMPLATECONF`
       Specifies the directory used by the build system to find templates
       from which to build the ``bblayers.conf`` and ``local.conf`` files.
@@ -8666,7 +8659,8 @@
          USER_CLASSES ?= "buildstats"
 
       For more information, see
-      ``meta-poky/conf/local.conf.sample`` in the :term:`Source Directory`.
+      ``meta-poky/conf/templates/default/local.conf.sample`` in the
+      :term:`Source Directory`.
 
    :term:`USERADD_ERROR_DYNAMIC`
       If set to ``error``, forces the OpenEmbedded build system to produce
@@ -8924,4 +8918,3 @@
 
       On systems where many tasks run in parallel, setting a limit to this
       can be helpful in controlling system resource usage.
-
diff --git a/poky/meta-poky/conf/distro/poky.conf b/poky/meta-poky/conf/distro/poky.conf
index 856c885..4dc5db4 100644
--- a/poky/meta-poky/conf/distro/poky.conf
+++ b/poky/meta-poky/conf/distro/poky.conf
@@ -1,6 +1,7 @@
 DISTRO = "poky"
 DISTRO_NAME = "Poky (Yocto Project Reference Distro)"
-DISTRO_VERSION = "4.1+snapshot-${METADATA_REVISION}"
+#DISTRO_VERSION = "4.1+snapshot-${METADATA_REVISION}"
+DISTRO_VERSION = "4.1"
 DISTRO_CODENAME = "langdale"
 SDK_VENDOR = "-pokysdk"
 SDK_VERSION = "${@d.getVar('DISTRO_VERSION').replace('snapshot-${METADATA_REVISION}', 'snapshot')}"
@@ -33,8 +34,8 @@
 PACKAGE_CLASSES ?= "package_rpm"
 
 SANITY_TESTED_DISTROS ?= " \
-            poky-3.3 \n \
-            poky-3.4 \n \
+            poky-4.0 \n \
+            poky-4.1 \n \
             ubuntu-18.04 \n \
             ubuntu-20.04 \n \
             ubuntu-21.10 \n \
diff --git a/poky/meta-poky/conf/templates/default/local.conf.sample b/poky/meta-poky/conf/templates/default/local.conf.sample
index b96dc90..7be423f 100644
--- a/poky/meta-poky/conf/templates/default/local.conf.sample
+++ b/poky/meta-poky/conf/templates/default/local.conf.sample
@@ -234,7 +234,7 @@
 # which will depend on your network.
 # Note: For this to work you also need hash-equivalence passthrough to the matching server
 #
-#BB_HASHSERVE_UPSTREAM = "typhoon.yocto.io:8687"
+#BB_HASHSERVE_UPSTREAM = "hashserv.yocto.io:8687"
 #SSTATE_MIRRORS ?= "file://.* http://sstate.yoctoproject.org/all/PATH;downloadfilename=PATH"
 
 #
diff --git a/poky/meta-selftest/lib/oeqa/runtime/cases/virgl.py b/poky/meta-selftest/lib/oeqa/runtime/cases/virgl.py
index 144decd..f19cdee 100644
--- a/poky/meta-selftest/lib/oeqa/runtime/cases/virgl.py
+++ b/poky/meta-selftest/lib/oeqa/runtime/cases/virgl.py
@@ -15,4 +15,4 @@
     def test_kmscube(self):
         status, output = self.target.run('kmscube')
         self.assertEqual(status, 0, "kmscube exited with non-zero status %d and output:\n%s" %(status, output))
-        self.assertIn('renderer: "virgl"', output, "kmscube does not seem to use virgl:\n%s" %(output))
+        self.assertIn('renderer: "virgl', output, "kmscube does not seem to use virgl:\n%s" %(output))
diff --git a/poky/meta-selftest/recipes-test/overlayfs-user/overlayfs-user.bb b/poky/meta-selftest/recipes-test/overlayfs-user/overlayfs-user.bb
index 913a4d1..50cba95 100644
--- a/poky/meta-selftest/recipes-test/overlayfs-user/overlayfs-user.bb
+++ b/poky/meta-selftest/recipes-test/overlayfs-user/overlayfs-user.bb
@@ -12,6 +12,11 @@
 
 do_install() {
     install -d ${D}/usr/share/my-application
+    install -d ${D}${sysconfdir}
+    echo "Original file in /etc" >> ${D}${sysconfdir}/lower-layer-test.txt
 }
 
-FILES:${PN} += "/usr"
+FILES:${PN} += "\
+    ${exec_prefix} \
+    ${sysconfdir \
+"
diff --git a/poky/meta-selftest/recipes-test/poison/poison.bb b/poky/meta-selftest/recipes-test/poison/poison.bb
index 7ace901..e9eee0c 100644
--- a/poky/meta-selftest/recipes-test/poison/poison.bb
+++ b/poky/meta-selftest/recipes-test/poison/poison.bb
@@ -9,8 +9,12 @@
 # will result in compiler errors.  This recipe should will fail to build and
 # oe-selftest has a test that verifies that.
 do_compile() {
-    touch empty.c
-    ${CPP} ${CFLAGS} -I/usr/include empty.c
+    bbnote Testing preprocessor
+    echo "int main(int argc, char** argv) {}" | ${CPP} -I/usr/include -
+    bbnote Testing C compiler
+    echo "int main(int argc, char** argv) {}" | ${CC} -x c -I/usr/include -
+    bbnote Testing C++ compiler
+    echo "int main(int argc, char** argv) {}" | ${CC} -x c++ -I/usr/include -
 }
 
 EXCLUDE_FROM_WORLD = "1"
diff --git a/poky/meta-skeleton/recipes-skeleton/useradd/useradd-example.bb b/poky/meta-skeleton/recipes-skeleton/useradd/useradd-example.bb
index 3f4c42d..cff624e 100644
--- a/poky/meta-skeleton/recipes-skeleton/useradd/useradd-example.bb
+++ b/poky/meta-skeleton/recipes-skeleton/useradd/useradd-example.bb
@@ -33,8 +33,8 @@
 USERADD_PARAM:${PN} = "-u 1200 -d /home/user1 -r -s /bin/bash user1; -u 1201 -d /home/user2 -r -s /bin/bash user2"
 
 # user3 will be managed in the useradd-example-user3 pacakge:
-# As an example, we use the -P option to set clear text password for user3
-USERADD_PARAM:${PN}-user3 = "-u 1202 -d /home/user3 -r -s /bin/bash -P 'user3' user3"
+# As an example, we use the -p option to set password ('user3') for user3
+USERADD_PARAM:${PN}-user3 = "-u 1202 -d /home/user3 -r -s /bin/bash -p '\$6\$XAWr.8nc\$bUE4pYYaVb8n6BbnBitU0zeJMtfhTpFpiOBLL9zRl4e4YQo88UU4r/1kjRzmTimCy.BvDh4xoFwVqcO.pihLa1' user3"
 
 # GROUPADD_PARAM works the same way, which you set to the options
 # you'd normally pass to the groupadd command. This will create
diff --git a/poky/meta-yocto-bsp/README.hardware.md b/poky/meta-yocto-bsp/README.hardware.md
index 9151d64..44dbb35 100644
--- a/poky/meta-yocto-bsp/README.hardware.md
+++ b/poky/meta-yocto-bsp/README.hardware.md
@@ -203,7 +203,7 @@
 
  5. Download the kernel and boot:
 
- => tftp tftp $loadaddr vmlinux
+ => tftp $loadaddr vmlinux
  => bootoctlinux $loadaddr coremask=0x3 root=/dev/nfs rw nfsroot=<nfsroot ip>:<rootfs path> ip=<board ip>:<server ip>:<gateway ip>:<netmask>:edgerouter:eth0:off mtdparts=phys_mapped_flash:512k(boot0),512k(boot1),64k@3072k(eeprom)
 
 --- Booting from USB disk ---
diff --git a/poky/meta/classes-global/sstate.bbclass b/poky/meta/classes-global/sstate.bbclass
index cd77c58..2c8e7b8 100644
--- a/poky/meta/classes-global/sstate.bbclass
+++ b/poky/meta/classes-global/sstate.bbclass
@@ -784,6 +784,7 @@
         uris += ['file://{0}.sig;downloadfilename={0}.sig'.format(sstatefetch)]
 
     for srcuri in uris:
+        localdata.delVar('SRC_URI')
         localdata.setVar('SRC_URI', srcuri)
         try:
             fetcher = bb.fetch2.Fetch([srcuri], localdata, cache=False)
diff --git a/poky/meta/classes-recipe/core-image.bbclass b/poky/meta/classes-recipe/core-image.bbclass
index 4b5f2c9..90d9eb9 100644
--- a/poky/meta/classes-recipe/core-image.bbclass
+++ b/poky/meta/classes-recipe/core-image.bbclass
@@ -62,6 +62,10 @@
 # IMAGE_FEATURES_REPLACES_foo = 'bar1 bar2'
 # Including image feature foo would replace the image features bar1 and bar2
 IMAGE_FEATURES_REPLACES_ssh-server-openssh = "ssh-server-dropbear"
+# Do not install openssh complementary packages if either packagegroup-core-ssh-dropbear or dropbear
+# is installed # to avoid openssh-dropbear conflict
+# see [Yocto #14858] for more information
+PACKAGE_EXCLUDE_COMPLEMENTARY:append = "${@bb.utils.contains_any('PACKAGE_INSTALL', 'packagegroup-core-ssh-dropbear dropbear', 'openssh', '' , d)}"
 
 # IMAGE_FEATURES_CONFLICTS_foo = 'bar1 bar2'
 # An error exception would be raised if both image features foo and bar1(or bar2) are included
diff --git a/poky/meta/classes-recipe/externalsrc.bbclass b/poky/meta/classes-recipe/externalsrc.bbclass
index 51dbe9e..06a9548 100644
--- a/poky/meta/classes-recipe/externalsrc.bbclass
+++ b/poky/meta/classes-recipe/externalsrc.bbclass
@@ -91,16 +91,18 @@
                 # Since configure will likely touch ${S}, ensure only we lock so one task has access at a time
                 d.appendVarFlag(task, "lockfiles", " ${S}/singletask.lock")
 
-            for funcname in [task, "base_" + task, "kernel_" + task]:
+        for v in d.keys():
+            cleandirs = d.getVarFlag(v, "cleandirs", False)
+            if cleandirs:
                 # We do not want our source to be wiped out, ever (kernel.bbclass does this for do_clean)
-                cleandirs = oe.recipeutils.split_var_value(d.getVarFlag(funcname, 'cleandirs', False) or '')
+                cleandirs = oe.recipeutils.split_var_value(cleandirs)
                 setvalue = False
                 for cleandir in cleandirs[:]:
                     if oe.path.is_path_parent(externalsrc, d.expand(cleandir)):
                         cleandirs.remove(cleandir)
                         setvalue = True
                 if setvalue:
-                    d.setVarFlag(funcname, 'cleandirs', ' '.join(cleandirs))
+                    d.setVarFlag(v, 'cleandirs', ' '.join(cleandirs))
 
         fetch_tasks = ['do_fetch', 'do_unpack']
         # If we deltask do_patch, there's no dependency to ensure do_unpack gets run, so add one
@@ -228,15 +230,16 @@
             env['GIT_INDEX_FILE'] = tmp_index.name
             subprocess.check_output(['git', 'add', '-A', '.'], cwd=s_dir, env=env)
             git_sha1 = subprocess.check_output(['git', 'write-tree'], cwd=s_dir, env=env).decode("utf-8")
-            submodule_helper = subprocess.check_output(['git', 'submodule--helper', 'list'], cwd=s_dir, env=env).decode("utf-8")
-            for line in submodule_helper.splitlines():
-                module_dir = os.path.join(s_dir, line.rsplit(maxsplit=1)[1])
-                if os.path.isdir(module_dir):
-                    proc = subprocess.Popen(['git', 'add', '-A', '.'], cwd=module_dir, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
-                    proc.communicate()
-                    proc = subprocess.Popen(['git', 'write-tree'], cwd=module_dir, env=env, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
-                    stdout, _ = proc.communicate()
-                    git_sha1 += stdout.decode("utf-8")
+            if os.path.exists(".gitmodules"):
+                submodule_helper = subprocess.check_output(["git", "config", "--file", ".gitmodules", "--get-regexp", "path"], cwd=s_dir, env=env).decode("utf-8")
+                for line in submodule_helper.splitlines():
+                    module_dir = os.path.join(s_dir, line.rsplit(maxsplit=1)[1])
+                    if os.path.isdir(module_dir):
+                        proc = subprocess.Popen(['git', 'add', '-A', '.'], cwd=module_dir, env=env, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
+                        proc.communicate()
+                        proc = subprocess.Popen(['git', 'write-tree'], cwd=module_dir, env=env, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL)
+                        stdout, _ = proc.communicate()
+                        git_sha1 += stdout.decode("utf-8")
             sha1 = hashlib.sha1(git_sha1.encode("utf-8")).hexdigest()
         with open(oe_hash_file, 'w') as fobj:
             fobj.write(sha1)
diff --git a/poky/meta/classes-recipe/github-releases.bbclass b/poky/meta/classes-recipe/github-releases.bbclass
new file mode 100644
index 0000000..ed83b83
--- /dev/null
+++ b/poky/meta/classes-recipe/github-releases.bbclass
@@ -0,0 +1,3 @@
+GITHUB_BASE_URI ?= "https://github.com/${BPN}/${BPN}/releases/"
+UPSTREAM_CHECK_URI ?= "${GITHUB_BASE_URI}"
+UPSTREAM_CHECK_REGEX ?= "releases/tag/v?(?P<pver>\d+(\.\d+)+)"
diff --git a/poky/meta/classes-recipe/gtk-icon-cache.bbclass b/poky/meta/classes-recipe/gtk-icon-cache.bbclass
index 17c7eb7..9ecb499 100644
--- a/poky/meta/classes-recipe/gtk-icon-cache.bbclass
+++ b/poky/meta/classes-recipe/gtk-icon-cache.bbclass
@@ -9,7 +9,7 @@
 GTKIC_VERSION ??= '3'
 
 GTKPN = "${@ 'gtk4' if d.getVar('GTKIC_VERSION') == '4' else 'gtk+3' }"
-GTKIC_CMD = "${@ 'gtk-update-icon-cache-3.0.0' if d.getVar('GTKIC_VERSION') == '4' else 'gtk4-update-icon-cache' }"
+GTKIC_CMD = "${@ 'gtk4-update-icon-cache' if d.getVar('GTKIC_VERSION') == '4' else 'gtk-update-icon-cache-3.0' }"
 
 #gtk+3/gtk4 require GTK3DISTROFEATURES, DEPENDS on it make all the
 #recipes inherit this class require GTK3DISTROFEATURES
diff --git a/poky/meta/classes-recipe/kernel-module-split.bbclass b/poky/meta/classes-recipe/kernel-module-split.bbclass
index 1b4c864..08c2e54 100644
--- a/poky/meta/classes-recipe/kernel-module-split.bbclass
+++ b/poky/meta/classes-recipe/kernel-module-split.bbclass
@@ -18,7 +18,7 @@
 if [ -z "$D" ]; then
 	depmod -a ${KERNEL_VERSION}
 else
-	depmodwrapper -a -b $D ${KERNEL_VERSION}
+	depmodwrapper -a -b $D ${KERNEL_VERSION} ${KERNEL_PACKAGE_NAME}
 fi
 }
 
diff --git a/poky/meta/classes-recipe/kernel.bbclass b/poky/meta/classes-recipe/kernel.bbclass
index de1b80d..e4e69e0 100644
--- a/poky/meta/classes-recipe/kernel.bbclass
+++ b/poky/meta/classes-recipe/kernel.bbclass
@@ -677,7 +677,7 @@
 		mkdir -p $D/lib/modules/${KERNEL_VERSION}
 	fi
 	if [ -n "$D" ]; then
-		depmodwrapper -a -b $D ${KERNEL_VERSION}
+		depmodwrapper -a -b $D ${KERNEL_VERSION} ${KERNEL_PACKAGE_NAME}
 	else
 		depmod -a ${KERNEL_VERSION}
 	fi
diff --git a/poky/meta/classes/overlayfs-etc.bbclass b/poky/meta/classes-recipe/overlayfs-etc.bbclass
similarity index 94%
rename from poky/meta/classes/overlayfs-etc.bbclass
rename to poky/meta/classes-recipe/overlayfs-etc.bbclass
index d0bc3ec..f834310 100644
--- a/poky/meta/classes/overlayfs-etc.bbclass
+++ b/poky/meta/classes-recipe/overlayfs-etc.bbclass
@@ -40,6 +40,7 @@
 OVERLAYFS_ETC_USE_ORIG_INIT_NAME ??= "1"
 OVERLAYFS_ETC_MOUNT_OPTIONS ??= "defaults"
 OVERLAYFS_ETC_INIT_TEMPLATE ??= "${COREBASE}/meta/files/overlayfs-etc-preinit.sh.in"
+OVERLAYFS_ETC_EXPOSE_LOWER ??= "0"
 
 python create_overlayfs_etc_preinit() {
     overlayEtcMountPoint = d.getVar("OVERLAYFS_ETC_MOUNT_POINT")
@@ -60,13 +61,15 @@
     preinitPath = oe.path.join(d.getVar("IMAGE_ROOTFS"), d.getVar("base_sbindir"), "preinit")
     initBaseName = oe.path.join(d.getVar("base_sbindir"), "init")
     origInitNameSuffix = ".orig"
+    exposeLower = oe.types.boolean(d.getVar('OVERLAYFS_ETC_EXPOSE_LOWER'))
 
     args = {
         'OVERLAYFS_ETC_MOUNT_POINT': overlayEtcMountPoint,
         'OVERLAYFS_ETC_MOUNT_OPTIONS': d.getVar('OVERLAYFS_ETC_MOUNT_OPTIONS'),
         'OVERLAYFS_ETC_FSTYPE': overlayEtcFsType,
         'OVERLAYFS_ETC_DEVICE': overlayEtcDevice,
-        'SBIN_INIT_NAME': initBaseName + origInitNameSuffix if useOrigInit else initBaseName
+        'SBIN_INIT_NAME': initBaseName + origInitNameSuffix if useOrigInit else initBaseName,
+        'OVERLAYFS_ETC_EXPOSE_LOWER': "true" if exposeLower else "false"
     }
 
     if useOrigInit:
diff --git a/poky/meta/classes/overlayfs.bbclass b/poky/meta/classes-recipe/overlayfs.bbclass
similarity index 100%
rename from poky/meta/classes/overlayfs.bbclass
rename to poky/meta/classes-recipe/overlayfs.bbclass
diff --git a/poky/meta/classes-recipe/rootfs-postcommands.bbclass b/poky/meta/classes-recipe/rootfs-postcommands.bbclass
index 215e38e..690fa97 100644
--- a/poky/meta/classes-recipe/rootfs-postcommands.bbclass
+++ b/poky/meta/classes-recipe/rootfs-postcommands.bbclass
@@ -22,7 +22,7 @@
 # Create /etc/timestamp during image construction to give a reasonably sane default time setting
 ROOTFS_POSTPROCESS_COMMAND += "rootfs_update_timestamp; "
 
-# Tweak the mount options for rootfs in /etc/fstab if read-only-rootfs is enabled
+# Tweak files in /etc if read-only-rootfs is enabled
 ROOTFS_POSTPROCESS_COMMAND += '${@bb.utils.contains("IMAGE_FEATURES", "read-only-rootfs", "read_only_rootfs_hook; ", "",d)}'
 
 # We also need to do the same for the kernel boot parameters,
@@ -111,20 +111,24 @@
 	# If we're using openssh and the /etc/ssh directory has no pre-generated keys,
 	# we should configure openssh to use the configuration file /etc/ssh/sshd_config_readonly
 	# and the keys under /var/run/ssh.
-	if [ -d ${IMAGE_ROOTFS}/etc/ssh ]; then
-		if [ -e ${IMAGE_ROOTFS}/etc/ssh/ssh_host_rsa_key ]; then
-			echo "SYSCONFDIR=\${SYSCONFDIR:-/etc/ssh}" >> ${IMAGE_ROOTFS}/etc/default/ssh
-			echo "SSHD_OPTS=" >> ${IMAGE_ROOTFS}/etc/default/ssh
-		else
-			echo "SYSCONFDIR=\${SYSCONFDIR:-/var/run/ssh}" >> ${IMAGE_ROOTFS}/etc/default/ssh
-			echo "SSHD_OPTS='-f /etc/ssh/sshd_config_readonly'" >> ${IMAGE_ROOTFS}/etc/default/ssh
+	# If overlayfs-etc is used this is not done as /etc is treated as writable
+	# If stateless-rootfs is enabled this is always done as we don't want to save keys then
+	if ${@ 'true' if not bb.utils.contains('IMAGE_FEATURES', 'overlayfs-etc', True, False, d) or bb.utils.contains('IMAGE_FEATURES', 'stateless-rootfs', True, False, d) else 'false'}; then
+		if [ -d ${IMAGE_ROOTFS}/etc/ssh ]; then
+			if [ -e ${IMAGE_ROOTFS}/etc/ssh/ssh_host_rsa_key ]; then
+				echo "SYSCONFDIR=\${SYSCONFDIR:-/etc/ssh}" >> ${IMAGE_ROOTFS}/etc/default/ssh
+				echo "SSHD_OPTS=" >> ${IMAGE_ROOTFS}/etc/default/ssh
+			else
+				echo "SYSCONFDIR=\${SYSCONFDIR:-/var/run/ssh}" >> ${IMAGE_ROOTFS}/etc/default/ssh
+				echo "SSHD_OPTS='-f /etc/ssh/sshd_config_readonly'" >> ${IMAGE_ROOTFS}/etc/default/ssh
+			fi
 		fi
-	fi
 
-	# Also tweak the key location for dropbear in the same way.
-	if [ -d ${IMAGE_ROOTFS}/etc/dropbear ]; then
-		if [ ! -e ${IMAGE_ROOTFS}/etc/dropbear/dropbear_rsa_host_key ]; then
-			echo "DROPBEAR_RSAKEY_DIR=/var/lib/dropbear" >> ${IMAGE_ROOTFS}/etc/default/dropbear
+		# Also tweak the key location for dropbear in the same way.
+		if [ -d ${IMAGE_ROOTFS}/etc/dropbear ]; then
+			if [ ! -e ${IMAGE_ROOTFS}/etc/dropbear/dropbear_rsa_host_key ]; then
+				echo "DROPBEAR_RSAKEY_DIR=/var/lib/dropbear" >> ${IMAGE_ROOTFS}/etc/default/dropbear
+			fi
 		fi
 	fi
 
@@ -200,6 +204,7 @@
 	if [ -e ${IMAGE_ROOTFS}${sbindir}/dropbear ] ; then
 		if grep -q DROPBEAR_EXTRA_ARGS ${IMAGE_ROOTFS}${sysconfdir}/default/dropbear 2>/dev/null ; then
 			sed -i '/^DROPBEAR_EXTRA_ARGS=/ s/-w//' ${IMAGE_ROOTFS}${sysconfdir}/default/dropbear
+			sed -i '/^# Disallow root/d' ${IMAGE_ROOTFS}${sysconfdir}/default/dropbear
 		fi
 	fi
 }
diff --git a/poky/meta/classes-recipe/testimage.bbclass b/poky/meta/classes-recipe/testimage.bbclass
index 8d2fab2..5cc408b 100644
--- a/poky/meta/classes-recipe/testimage.bbclass
+++ b/poky/meta/classes-recipe/testimage.bbclass
@@ -189,11 +189,7 @@
                     search_login_succeeded,search_cmd_finished\n Make sure your TESTIMAGE_BOOT_PATTERNS=%s \
                     contains an accepted flag.' % d.getVar('TESTIMAGE_BOOT_PATTERNS'))
                     return
-                # We know boot prompt is searched through in binary format, others might be expressions
-                if flag == 'search_reached_prompt':
-                    boot_patterns[flag] = flagval.encode()
-                else:
-                    boot_patterns[flag] = flagval.encode().decode('unicode-escape')
+                boot_patterns[flag] = flagval.encode().decode('unicode-escape')
     return boot_patterns
 
 
@@ -472,10 +468,7 @@
         package_list = glob.glob(idx_path + "*/*.rpm")
 
         for pkg in package_list:
-            if os.path.basename(pkg).startswith(("curl-ptest")):
-                bb.utils.remove(pkg)
-
-            if not os.path.basename(pkg).startswith(("rpm", "run-postinsts", "busybox", "bash", "update-alternatives", "libc6", "curl", "musl")):
+            if not os.path.basename(pkg).startswith(("dnf-test-", "busybox", "update-alternatives", "libc6", "musl")):
                 bb.utils.remove(pkg)
 
         bb.utils.unlockfile(lf)
diff --git a/poky/meta/classes/icecc.bbclass b/poky/meta/classes/icecc.bbclass
index a11e781..312e0f1 100644
--- a/poky/meta/classes/icecc.bbclass
+++ b/poky/meta/classes/icecc.bbclass
@@ -4,35 +4,35 @@
 # SPDX-License-Identifier: MIT
 #
 
-# IceCream distributed compiling support
+# Icecream distributed compiling support
 #
 # Stages directories with symlinks from gcc/g++ to icecc, for both
 # native and cross compilers. Depending on each configure or compile,
 # the directories are added at the head of the PATH list and ICECC_CXX
-# and ICEC_CC are set.
+# and ICECC_CC are set.
 #
 # For the cross compiler, creates a tar.gz of our toolchain and sets
 # ICECC_VERSION accordingly.
 #
 # The class now handles all 3 different compile 'stages' (i.e native ,cross-kernel and target) creating the
 # necessary environment tar.gz file to be used by the remote machines.
-# It also supports meta-toolchain generation
+# It also supports meta-toolchain generation.
 #
 # If ICECC_PATH is not set in local.conf then the class will try to locate it using 'bb.utils.which'
-# but nothing is sure ;)
+# but nothing is sure. ;)
 #
 # If ICECC_ENV_EXEC is set in local.conf, then it should point to the icecc-create-env script provided by the user
-# or the default one provided by icecc-create-env.bb will be used
-# (NOTE that this is a modified version of the script need it and *not the one that comes with icecc*
+# or the default one provided by icecc-create-env_0.1.bb will be used.
+# (NOTE that this is a modified version of the needed script and *not the one that comes with icecream*).
 #
-# User can specify if specific recipes or recipes belonging to class should not use icecc to distribute
-# compile jobs to remote machines, but handled locally, by defining ICECC_CLASS_DISABLE and ICECC_RECIPE_DISABLE
+# User can specify if specific recipes or recipes inheriting specific classes should not use icecc to distribute
+# compile jobs to remote machines, but handle them locally by defining ICECC_CLASS_DISABLE and ICECC_RECIPE_DISABLE
 # with the appropriate values in local.conf. In addition the user can force to enable icecc for recipes
 # which set an empty PARALLEL_MAKE variable by defining ICECC_RECIPE_ENABLE.
 #
 #########################################################################################
-#Error checking is kept to minimum so double check any parameters you pass to the class
-###########################################################################################
+# Error checking is kept to minimum so double check any parameters you pass to the class
+#########################################################################################
 
 BB_BASEHASH_IGNORE_VARS += "ICECC_PARALLEL_MAKE ICECC_DISABLED ICECC_RECIPE_DISABLE \
     ICECC_CLASS_DISABLE ICECC_RECIPE_ENABLE ICECC_PATH ICECC_ENV_EXEC \
@@ -50,7 +50,7 @@
 # invalidate the version on the compile nodes. Changing it will cause a new
 # environment to be created.
 #
-# A useful thing to do for testing Icecream changes locally is to add a
+# A useful thing to do for testing icecream changes locally is to add a
 # subversion in local.conf:
 #  ICECC_ENV_VERSION:append = "-my-ver-1"
 ICECC_ENV_VERSION = "2"
@@ -72,16 +72,16 @@
 ICECC_ENV_DEBUG ??= ""
 
 # Disable recipe list contains a list of recipes that can not distribute
-# compile tasks for one reason or the other. When adding new entry, please
+# compile tasks for one reason or the other. When adding a new entry, please
 # document why (how it failed) so that we can re-evaluate it later e.g. when
-# there is new version
+# there is a new version.
 #
 # libgcc-initial - fails with CPP sanity check error if host sysroot contains
-#                  cross gcc built for another target tune/variant
+#                  cross gcc built for another target tune/variant.
 # pixman - prng_state: TLS reference mismatches non-TLS reference, possibly due to
-#          pragma omp threadprivate(prng_state)
+#          pragma omp threadprivate(prng_state).
 # systemtap - _HelperSDT.c undefs macros and uses the identifiers in macros emitting
-#             inline assembly
+#             inline assembly.
 # target-sdk-provides-dummy - ${HOST_PREFIX} is empty which triggers the "NULL
 #                             prefix" error.
 ICECC_RECIPE_DISABLE += "\
@@ -91,10 +91,10 @@
     target-sdk-provides-dummy \
     "
 
-# Classes that should not use icecc. When adding new entry, please
-# document why (how it failed) so that we can re-evaluate it later
+# Classes that should not use icecc. When adding a new entry, please
+# document why (how it failed) so that we can re-evaluate it later.
 #
-# image - Image aren't compiling, but the testing framework for images captures
+# image - images aren't compiling, but the testing framework for images captures
 #         PARALLEL_MAKE as part of the test environment. Many tests won't use
 #         icecream, but leaving the high level of parallelism can cause them to
 #         consume an unnecessary amount of resources.
@@ -103,7 +103,7 @@
     "
 
 def get_icecc_dep(d):
-    # INHIBIT_DEFAULT_DEPS doesn't apply to the patch command.  Whether or  not
+    # INHIBIT_DEFAULT_DEPS doesn't apply to the patch command. Whether or not
     # we need that built is the responsibility of the patch function / class, not
     # the application.
     if not d.getVar('INHIBIT_DEFAULT_DEPS'):
@@ -259,7 +259,7 @@
 def icecc_get_path_tool(tool, d):
     # This is a little ugly, but we want to make sure we add an actual
     # compiler to the toolchain, not ccache. Some distros (e.g. Fedora)
-    # have ccache enabled by default using symlinks PATH, meaning ccache
+    # have ccache enabled by default using symlinks in PATH, meaning ccache
     # would be found first when looking for the compiler.
     paths = os.getenv("PATH").split(':')
     while True:
@@ -380,7 +380,6 @@
     fi
     for compiler in $compilers; do
         ln -sf $ICECC_BIN $ICE_PATH/symlinks/$compiler
-        rm -f $ICE_PATH/$compiler
         cat <<-__EOF__ > $ICE_PATH/$compiler
 		#!/bin/sh -e
 		export ICECC_VERSION=$ICECC_VERSION
@@ -449,11 +448,11 @@
     set_icecc_env
 }
 
-# IceCream is not (currently) supported in the extensible SDK
+# Icecream is not (currently) supported in the extensible SDK
 ICECC_SDK_HOST_TASK = "nativesdk-icecc-toolchain"
 ICECC_SDK_HOST_TASK:task-populate-sdk-ext = ""
 
-# Don't include IceCream in uninative tarball
+# Don't include icecream in uninative tarball
 ICECC_SDK_HOST_TASK:pn-uninative-tarball = ""
 
 # Add the toolchain scripts to the SDK
diff --git a/poky/meta/conf/bitbake.conf b/poky/meta/conf/bitbake.conf
index 52a36d7..62cdd9a 100644
--- a/poky/meta/conf/bitbake.conf
+++ b/poky/meta/conf/bitbake.conf
@@ -389,6 +389,7 @@
 
 TCMODE ??= "default"
 TCLIBC ??= "glibc"
+TC_CXX_RUNTIME ??= "gnu"
 TMPDIR ?= "${TOPDIR}/tmp"
 
 CACHE = "${TMPDIR}/cache/${TCMODE}-${TCLIBC}${@['', '/' + str(d.getVar('MACHINE'))][bool(d.getVar('MACHINE'))]}${@['', '/' + str(d.getVar('SDKMACHINE'))][bool(d.getVar('SDKMACHINE'))]}"
diff --git a/poky/meta/conf/distro/include/maintainers.inc b/poky/meta/conf/distro/include/maintainers.inc
index 3c80a3a..fee4e52 100644
--- a/poky/meta/conf/distro/include/maintainers.inc
+++ b/poky/meta/conf/distro/include/maintainers.inc
@@ -626,6 +626,7 @@
 RECIPE_MAINTAINER:pn-python3-jsonpointer = "Bruce Ashfield <bruce.ashfield@gmail.com>"
 RECIPE_MAINTAINER:pn-python3-jsonschema = "Bruce Ashfield <bruce.ashfield@gmail.com>"
 RECIPE_MAINTAINER:pn-python3-libarchive-c = "Joshua Watt <JPEWhacker@gmail.com>"
+RECIPE_MAINTAINER:pn-python3-lxml = "Khem Raj <raj.khem@gmail.com>"
 RECIPE_MAINTAINER:pn-python3-magic = "Joshua Watt <JPEWhacker@gmail.com>"
 RECIPE_MAINTAINER:pn-python3-mako = "Oleksandr Kravchuk <open.source@oleksandr-kravchuk.com>"
 RECIPE_MAINTAINER:pn-python3-markdown = "Alexander Kanavin <alex.kanavin@gmail.com>"
diff --git a/poky/meta/conf/distro/include/tcmode-default.inc b/poky/meta/conf/distro/include/tcmode-default.inc
index 9abd121..59b226e 100644
--- a/poky/meta/conf/distro/include/tcmode-default.inc
+++ b/poky/meta/conf/distro/include/tcmode-default.inc
@@ -22,7 +22,7 @@
 GDBVERSION ?= "12.%"
 GLIBCVERSION ?= "2.36"
 LINUXLIBCVERSION ?= "5.19%"
-QEMUVERSION ?= "7.0%"
+QEMUVERSION ?= "7.1%"
 GOVERSION ?= "1.19%"
 # This can not use wildcards like 8.0.% since it is also used in mesa to denote
 # llvm version being used, so always bump it with llvm recipe version bump
diff --git a/poky/meta/conf/layer.conf b/poky/meta/conf/layer.conf
index 1387539..0ce9035 100644
--- a/poky/meta/conf/layer.conf
+++ b/poky/meta/conf/layer.conf
@@ -7,7 +7,7 @@
 BBFILE_PATTERN_core = "^${LAYERDIR}/"
 BBFILE_PRIORITY_core = "5"
 
-LAYERSERIES_CORENAMES = "kirkstone langdale"
+LAYERSERIES_CORENAMES = "langdale"
 
 # This should only be incremented on significant changes that will
 # cause compatibility issues with other layers
diff --git a/poky/meta/conf/machine/include/arm/arch-armv9a.inc b/poky/meta/conf/machine/include/arm/arch-armv9a.inc
new file mode 100644
index 0000000..c38d6cf
--- /dev/null
+++ b/poky/meta/conf/machine/include/arm/arch-armv9a.inc
@@ -0,0 +1,28 @@
+DEFAULTTUNE ?= "armv9a-crc"
+
+TUNEVALID[armv9a] = "Enable instructions for ARMv9-a"
+TUNE_CCARGS_MARCH .= "${@bb.utils.contains('TUNE_FEATURES', 'armv9a', ' -march=armv9-a', '', d)}"
+MACHINEOVERRIDES =. "${@bb.utils.contains('TUNE_FEATURES', 'armv9a', 'armv9a:', '', d)}"
+
+require conf/machine/include/arm/arch-arm64.inc
+require conf/machine/include/arm/feature-arm-crc.inc
+require conf/machine/include/arm/feature-arm-crypto.inc
+
+# Little Endian base configs
+AVAILTUNES += "armv9a armv9a-crc armv9a-crc-crypto armv9a-crypto"
+ARMPKGARCH:tune-armv9a                    ?= "armv9a"
+ARMPKGARCH:tune-armv9a-crc                ?= "armv9a"
+ARMPKGARCH:tune-armv9a-crypto             ?= "armv9a"
+ARMPKGARCH:tune-armv9a-crc-crypto         ?= "armv9a"
+TUNE_FEATURES:tune-armv9a                  = "aarch64 armv9a"
+TUNE_FEATURES:tune-armv9a-crc              = "${TUNE_FEATURES:tune-armv9a} crc"
+TUNE_FEATURES:tune-armv9a-crypto           = "${TUNE_FEATURES:tune-armv9a} crypto"
+TUNE_FEATURES:tune-armv9a-crc-crypto       = "${TUNE_FEATURES:tune-armv9a-crc} crypto"
+PACKAGE_EXTRA_ARCHS:tune-armv9a            = "aarch64 armv9a"
+PACKAGE_EXTRA_ARCHS:tune-armv9a-crc        = "${PACKAGE_EXTRA_ARCHS:tune-armv9a} armv9a-crc"
+PACKAGE_EXTRA_ARCHS:tune-armv9a-crypto     = "${PACKAGE_EXTRA_ARCHS:tune-armv9a} armv9a-crypto"
+PACKAGE_EXTRA_ARCHS:tune-armv9a-crc-crypto = "${PACKAGE_EXTRA_ARCHS:tune-armv9a-crc} armv9a-crypto armv9a-crc-crypto"
+BASE_LIB:tune-armv9a                       = "lib64"
+BASE_LIB:tune-armv9a-crc                   = "lib64"
+BASE_LIB:tune-armv9a-crypto                = "lib64"
+BASE_LIB:tune-armv9a-crc-crypto            = "lib64"
diff --git a/poky/meta/conf/machine/include/arm/armv9a/tune-neoversen2.inc b/poky/meta/conf/machine/include/arm/armv9a/tune-neoversen2.inc
index 36355f7..d26ab25 100644
--- a/poky/meta/conf/machine/include/arm/armv9a/tune-neoversen2.inc
+++ b/poky/meta/conf/machine/include/arm/armv9a/tune-neoversen2.inc
@@ -6,17 +6,15 @@
 TUNEVALID[neoversen2] = "Enable Neoverse-N2 specific processor optimizations"
 TUNE_CCARGS .= "${@bb.utils.contains('TUNE_FEATURES', 'neoversen2', ' -mcpu=neoverse-n2', '', d)}"
 
-# Even though the Neoverse N2 core implemnts the Arm v9.0-A architecture,
-# but the support of it in GCC is based on the Arm v8.5-A architecture.
-require conf/machine/include/arm/arch-armv8-5a.inc
+require conf/machine/include/arm/arch-armv9a.inc
 
 # Little Endian base configs
 AVAILTUNES                                         += "neoversen2 neoversen2-crypto"
 ARMPKGARCH:tune-neoversen2                          = "neoversen2"
 ARMPKGARCH:tune-neoversen2-crypto                   = "neoversen2-crypto"
-TUNE_FEATURES:tune-neoversen2                       = "${TUNE_FEATURES:tune-armv8-5a} neoversen2"
+TUNE_FEATURES:tune-neoversen2                       = "${TUNE_FEATURES:tune-armv9a} neoversen2"
 TUNE_FEATURES:tune-neoversen2-crypto                = "${TUNE_FEATURES:tune-neoversen2} crypto"
-PACKAGE_EXTRA_ARCHS:tune-neoversen2                 = "${PACKAGE_EXTRA_ARCHS:tune-armv8-5a} neoversen2"
-PACKAGE_EXTRA_ARCHS:tune-neoversen2-crypto          = "${PACKAGE_EXTRA_ARCHS:tune-armv8-5a-crypto} neoversen2 neoversen2-crypto"
+PACKAGE_EXTRA_ARCHS:tune-neoversen2                 = "${PACKAGE_EXTRA_ARCHS:tune-armv9a} neoversen2"
+PACKAGE_EXTRA_ARCHS:tune-neoversen2-crypto          = "${PACKAGE_EXTRA_ARCHS:tune-armv9a-crypto} neoversen2 neoversen2-crypto"
 BASE_LIB:tune-neoversen2                            = "lib64"
 BASE_LIB:tune-neoversen2-crypto                     = "lib64"
diff --git a/poky/meta/files/overlayfs-etc-preinit.sh.in b/poky/meta/files/overlayfs-etc-preinit.sh.in
index 43c9b04..8db076f 100644
--- a/poky/meta/files/overlayfs-etc-preinit.sh.in
+++ b/poky/meta/files/overlayfs-etc-preinit.sh.in
@@ -15,19 +15,32 @@
 
 [ -z "$CONSOLE" ] && CONSOLE="/dev/console"
 
+BASE_OVERLAY_ETC_DIR={OVERLAYFS_ETC_MOUNT_POINT}/overlay-etc
+UPPER_DIR=$BASE_OVERLAY_ETC_DIR/upper
+WORK_DIR=$BASE_OVERLAY_ETC_DIR/work
+LOWER_DIR=$BASE_OVERLAY_ETC_DIR/lower
+
 mkdir -p {OVERLAYFS_ETC_MOUNT_POINT}
 if mount -n -t {OVERLAYFS_ETC_FSTYPE} \
     -o {OVERLAYFS_ETC_MOUNT_OPTIONS} \
     {OVERLAYFS_ETC_DEVICE} {OVERLAYFS_ETC_MOUNT_POINT}
 then
-    mkdir -p {OVERLAYFS_ETC_MOUNT_POINT}/overlay-etc/upper
-    mkdir -p {OVERLAYFS_ETC_MOUNT_POINT}/overlay-etc/work
+    mkdir -p $UPPER_DIR
+    mkdir -p $WORK_DIR
+
+    if {OVERLAYFS_ETC_EXPOSE_LOWER}; then
+        mkdir -p $LOWER_DIR
+
+        # provide read-only access to original /etc content
+        mount -o bind,ro /etc $LOWER_DIR
+    fi
+
     mount -n -t overlay \
-        -o upperdir={OVERLAYFS_ETC_MOUNT_POINT}/overlay-etc/upper \
+        -o upperdir=$UPPER_DIR \
         -o lowerdir=/etc \
-        -o workdir={OVERLAYFS_ETC_MOUNT_POINT}/overlay-etc/work \
+        -o workdir=$WORK_DIR \
         -o index=off,xino=off,redirect_dir=off,metacopy=off \
-        {OVERLAYFS_ETC_MOUNT_POINT}/overlay-etc/upper /etc || \
+        $UPPER_DIR /etc || \
             echo "PREINIT: Mounting etc-overlay failed!"
 else
     echo "PREINIT: Mounting </data> failed!"
diff --git a/poky/meta/lib/oe/rootfs.py b/poky/meta/lib/oe/rootfs.py
index 0b9911e..890ba5f 100644
--- a/poky/meta/lib/oe/rootfs.py
+++ b/poky/meta/lib/oe/rootfs.py
@@ -173,14 +173,8 @@
         bb.utils.rename(self.image_rootfs + '-orig', self.image_rootfs)
 
     def _exec_shell_cmd(self, cmd):
-        fakerootcmd = self.d.getVar('FAKEROOT')
-        if fakerootcmd is not None:
-            exec_cmd = [fakerootcmd, cmd]
-        else:
-            exec_cmd = cmd
-
         try:
-            subprocess.check_output(exec_cmd, stderr=subprocess.STDOUT)
+            subprocess.check_output(cmd, stderr=subprocess.STDOUT)
         except subprocess.CalledProcessError as e:
             return("Command '%s' returned %d:\n%s" % (e.cmd, e.returncode, e.output))
 
@@ -325,19 +319,29 @@
             bb.note("No Kernel Modules found, not running depmod")
             return
 
-        kernel_abi_ver_file = oe.path.join(self.d.getVar('PKGDATA_DIR'), "kernel-depmod",
-                                           'kernel-abiversion')
-        if not os.path.exists(kernel_abi_ver_file):
-            bb.fatal("No kernel-abiversion file found (%s), cannot run depmod, aborting" % kernel_abi_ver_file)
+        pkgdatadir = self.d.getVar('PKGDATA_DIR')
 
-        with open(kernel_abi_ver_file) as f:
-            kernel_ver = f.read().strip(' \n')
+        # PKGDATA_DIR can include multiple kernels so we run depmod for each
+        # one of them.
+        for direntry in os.listdir(pkgdatadir):
+            match = re.match('(.*)-depmod', direntry)
+            if not match:
+                continue
+            kernel_package_name = match.group(1)
 
-        versioned_modules_dir = os.path.join(self.image_rootfs, modules_dir, kernel_ver)
+            kernel_abi_ver_file = oe.path.join(pkgdatadir, direntry, kernel_package_name + '-abiversion')
+            if not os.path.exists(kernel_abi_ver_file):
+                bb.fatal("No kernel-abiversion file found (%s), cannot run depmod, aborting" % kernel_abi_ver_file)
 
-        bb.utils.mkdirhier(versioned_modules_dir)
+            with open(kernel_abi_ver_file) as f:
+                kernel_ver = f.read().strip(' \n')
 
-        self._exec_shell_cmd(['depmodwrapper', '-a', '-b', self.image_rootfs, kernel_ver])
+            versioned_modules_dir = os.path.join(self.image_rootfs, modules_dir, kernel_ver)
+
+            bb.utils.mkdirhier(versioned_modules_dir)
+
+            bb.note("Running depmodwrapper for %s ..." % versioned_modules_dir)
+            self._exec_shell_cmd(['depmodwrapper', '-a', '-b', self.image_rootfs, kernel_ver, kernel_package_name])
 
     """
     Create devfs:
@@ -386,6 +390,10 @@
 
 
 def image_list_installed_packages(d, rootfs_dir=None):
+    # Theres no rootfs for baremetal images
+    if bb.data.inherits_class('baremetal-image', d):
+        return ""
+
     if not rootfs_dir:
         rootfs_dir = d.getVar('IMAGE_ROOTFS')
 
diff --git a/poky/meta/lib/oeqa/oetest.py b/poky/meta/lib/oeqa/oetest.py
index 9c84466..cf417db 100644
--- a/poky/meta/lib/oeqa/oetest.py
+++ b/poky/meta/lib/oeqa/oetest.py
@@ -28,7 +28,7 @@
     import oeqa.sdkext
 except ImportError:
     pass
-from oeqa.utils.decorators import LogResults, gettag, getResults
+from oeqa.utils.decorators import LogResults, gettag
 
 logger = logging.getLogger("BitBake")
 
@@ -57,7 +57,6 @@
 @LogResults
 class oeTest(unittest.TestCase):
 
-    pscmd = "ps"
     longMessage = True
 
     @classmethod
@@ -110,20 +109,6 @@
     def tearDown(self):
         # Uninstall packages in the DUT
         self.tc.install_uninstall_packages(self.id(), False)
-
-        res = getResults()
-        # If a test fails or there is an exception dump
-        # for QemuTarget only
-        if (type(self.target).__name__ == "QemuTarget" and
-                (self.id() in res.getErrorList() or
-                self.id() in  res.getFailList())):
-            self.tc.host_dumper.create_dir(self._testMethodName)
-            self.tc.host_dumper.dump_host()
-            self.target.target_dumper.dump_target(
-                    self.tc.host_dumper.dump_dir)
-            print ("%s dump data stored in %s" % (self._testMethodName,
-                     self.tc.host_dumper.dump_dir))
-
         self.tearDownLocal()
 
     # Method to be run after tearDown and implemented by child classes
@@ -398,11 +383,6 @@
     def _get_test_suites_required(self):
         return [t for t in self.d.getVar("TEST_SUITES").split() if t != "auto"]
 
-    def loadTests(self):
-        super(RuntimeTestContext, self).loadTests()
-        if oeTest.hasPackage("procps"):
-            oeRuntimeTest.pscmd = "ps -ef"
-
     def extract_packages(self):
         """
         Find packages that will be needed during runtime.
diff --git a/poky/meta/lib/oeqa/runtime/cases/dnf.py b/poky/meta/lib/oeqa/runtime/cases/dnf.py
index a8e23e5..410d456 100644
--- a/poky/meta/lib/oeqa/runtime/cases/dnf.py
+++ b/poky/meta/lib/oeqa/runtime/cases/dnf.py
@@ -75,48 +75,43 @@
     def test_dnf_makecache(self):
         self.dnf_with_repo('makecache')
 
-
-# Does not work when repo is specified on the command line
-#    @OETestDepends(['dnf.DnfRepoTest.test_dnf_makecache'])
-#    def test_dnf_repolist(self):
-#        self.dnf_with_repo('repolist')
-
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_makecache'])
     def test_dnf_repoinfo(self):
         self.dnf_with_repo('repoinfo')
 
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_makecache'])
     def test_dnf_install(self):
-        output = self.dnf_with_repo('list run-postinsts-dev')
-        if 'Installed Packages' in output:
-            self.dnf_with_repo('remove -y run-postinsts-dev')
-        self.dnf_with_repo('install -y run-postinsts-dev')
+        self.dnf_with_repo('remove -y dnf-test-*')
+        self.dnf_with_repo('install -y dnf-test-dep')
 
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_install'])
     def test_dnf_install_dependency(self):
-        self.dnf_with_repo('remove -y run-postinsts')
-        self.dnf_with_repo('install -y run-postinsts-dev')
+        self.dnf_with_repo('remove -y dnf-test-*')
+        self.dnf_with_repo('install -y dnf-test-main')
+        output = self.dnf('list --installed dnf-test-*')
+        self.assertIn("dnf-test-main.", output)
+        self.assertIn("dnf-test-dep.", output)
 
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_install_dependency'])
     def test_dnf_install_from_disk(self):
-        self.dnf_with_repo('remove -y run-postinsts-dev')
-        self.dnf_with_repo('install -y --downloadonly run-postinsts-dev')
-        status, output = self.target.run('find /var/cache/dnf -name run-postinsts-dev*rpm', 1500)
+        self.dnf_with_repo('remove -y dnf-test-dep')
+        self.dnf_with_repo('install -y --downloadonly dnf-test-dep')
+        status, output = self.target.run('find /var/cache/dnf -name dnf-test-dep*rpm')
         self.assertEqual(status, 0, output)
         self.dnf_with_repo('install -y %s' % output)
 
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_install_from_disk'])
     def test_dnf_install_from_http(self):
-        output = subprocess.check_output('%s %s -name run-postinsts-dev*' % (bb.utils.which(os.getenv('PATH'), "find"),
+        output = subprocess.check_output('%s %s -name dnf-test-dep*' % (bb.utils.which(os.getenv('PATH'), "find"),
                                                                            os.path.join(self.tc.td['WORKDIR'], 'oe-testimage-repo')), shell=True).decode("utf-8")
         rpm_path = output.split("/")[-2] + "/" + output.split("/")[-1]
         url = 'http://%s:%s/%s' %(self.target.server_ip, self.repo_server.port, rpm_path)
-        self.dnf_with_repo('remove -y run-postinsts-dev')
+        self.dnf_with_repo('remove -y dnf-test-dep')
         self.dnf_with_repo('install -y %s' % url)
 
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_install'])
     def test_dnf_reinstall(self):
-        self.dnf_with_repo('reinstall -y run-postinsts-dev')
+        self.dnf_with_repo('reinstall -y dnf-test-main')
 
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_makecache'])
     @skipIfInDataVar('DISTRO_FEATURES', 'usrmerge', 'Test run when not enable usrmerge')
@@ -139,14 +134,14 @@
         self.target.run('cp -r /etc/dnf %s/etc' % rootpath, 1500)
         self.target.run('cp /bin/sh %s/bin' % rootpath, 1500)
         self.target.run('mount -o bind /dev %s/dev/' % rootpath, 1500)
-        self.dnf_with_repo('install --installroot=%s -v -y --rpmverbosity=debug busybox run-postinsts' % rootpath)
+        self.dnf_with_repo('install --installroot=%s -v -y --rpmverbosity=debug busybox' % rootpath)
         status, output = self.target.run('test -e %s/var/cache/dnf' % rootpath, 1500)
         self.assertEqual(0, status, output)
         status, output = self.target.run('test -e %s/bin/busybox' % rootpath, 1500)
         self.assertEqual(0, status, output)
 
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_makecache'])
-    @skipIfNotInDataVar('DISTRO_FEATURES', 'usrmerge', 'Test run when enable usrmege')
+    @skipIfNotInDataVar('DISTRO_FEATURES', 'usrmerge', 'Test run when enable usrmerge')
     @OEHasPackage('busybox')
     def test_dnf_installroot_usrmerge(self):
         rootpath = '/home/root/chroot/test'
@@ -171,7 +166,7 @@
         self.target.run('cp -r /etc/dnf %s/etc' % rootpath, 1500)
         self.target.run('cp /bin/sh %s/bin' % rootpath, 1500)
         self.target.run('mount -o bind /dev %s/dev/' % rootpath, 1500)
-        self.dnf_with_repo('install --installroot=%s -v -y --rpmverbosity=debug busybox run-postinsts' % rootpath)
+        self.dnf_with_repo('install --installroot=%s -v -y --rpmverbosity=debug busybox' % rootpath)
         status, output = self.target.run('test -e %s/var/cache/dnf' % rootpath, 1500)
         self.assertEqual(0, status, output)
         status, output = self.target.run('test -e %s/bin/busybox' % rootpath, 1500)
@@ -179,15 +174,8 @@
 
     @OETestDepends(['dnf.DnfRepoTest.test_dnf_makecache'])
     def test_dnf_exclude(self):
-        excludepkg = 'curl-dev'
-        self.dnf_with_repo('install -y curl*')
-        self.dnf('list %s' % excludepkg, 0)
-        #Avoid remove dependencies to skip some errors on different archs and images
-        self.dnf_with_repo('remove --setopt=clean_requirements_on_remove=0 -y curl*')
-        #check curl-dev is not installed adter removing all curl occurrences
-        status, output = self.target.run('dnf list --installed | grep %s'% excludepkg, 1500)
-        self.assertEqual(1, status, "%s was not removed,  is listed as installed"%excludepkg)
-        self.dnf_with_repo('install -y --exclude=%s --exclude=curl-staticdev curl*' % excludepkg)
-        #check curl-dev is not installed after being excluded
-        status, output = self.target.run('dnf list --installed | grep %s'% excludepkg , 1500)
-        self.assertEqual(1, status, "%s was not excluded, is listed as installed"%excludepkg)
+        self.dnf_with_repo('remove -y dnf-test-*')
+        self.dnf_with_repo('install -y --exclude=dnf-test-dep dnf-test-*')
+        output = self.dnf('list --installed dnf-test-*')
+        self.assertIn("dnf-test-main.", output)
+        self.assertNotIn("dnf-test-dev.", output)
diff --git a/poky/meta/lib/oeqa/runtime/cases/parselogs.py b/poky/meta/lib/oeqa/runtime/cases/parselogs.py
index e16c230..e67d375 100644
--- a/poky/meta/lib/oeqa/runtime/cases/parselogs.py
+++ b/poky/meta/lib/oeqa/runtime/cases/parselogs.py
@@ -67,6 +67,8 @@
     "was skipped because of a failed condition check",
     "was skipped because all trigger condition checks failed",
     "xf86OpenConsole: Switching VT failed",
+    "Failed to read LoaderConfigTimeoutOneShot variable, ignoring: Operation not supported",
+    "Failed to read LoaderEntryOneShot variable, ignoring: Operation not supported",
     ]
 
 video_related = [
diff --git a/poky/meta/lib/oeqa/selftest/cases/bblayers.py b/poky/meta/lib/oeqa/selftest/cases/bblayers.py
index c6bd5a1..b048948 100644
--- a/poky/meta/lib/oeqa/selftest/cases/bblayers.py
+++ b/poky/meta/lib/oeqa/selftest/cases/bblayers.py
@@ -14,7 +14,9 @@
 
 class BitbakeLayers(OESelftestTestCase):
 
-    def setUpLocal(self):
+    @classmethod
+    def setUpClass(cls):
+        super(BitbakeLayers, cls).setUpClass()
         bitbake("python3-jsonschema-native")
         bitbake("-c addto_recipe_sysroot python3-jsonschema-native")
 
diff --git a/poky/meta/lib/oeqa/selftest/cases/debuginfod.py b/poky/meta/lib/oeqa/selftest/cases/debuginfod.py
index 01359ec..3c40119 100644
--- a/poky/meta/lib/oeqa/selftest/cases/debuginfod.py
+++ b/poky/meta/lib/oeqa/selftest/cases/debuginfod.py
@@ -10,16 +10,24 @@
 from oeqa.selftest.case import OESelftestTestCase
 from oeqa.utils.commands import bitbake, get_bb_var, runqemu
 
+
 class Debuginfod(OESelftestTestCase):
     def test_debuginfod(self):
-        self.write_config("""
+        self.write_config(
+            """
 DISTRO_FEATURES:append = " debuginfod"
 CORE_IMAGE_EXTRA_INSTALL += "elfutils"
-        """)
+        """
+        )
         bitbake("core-image-minimal elfutils-native:do_addto_recipe_sysroot")
 
         native_sysroot = get_bb_var("RECIPE_SYSROOT_NATIVE", "elfutils-native")
-        cmd = [os.path.join(native_sysroot, "usr", "bin", "debuginfod"), "--verbose", get_bb_var("DEPLOY_DIR")]
+        cmd = [
+            os.path.join(native_sysroot, "usr", "bin", "debuginfod"),
+            "--verbose",
+            "--database=:memory:",
+            get_bb_var("DEPLOY_DIR"),
+        ]
         for format in get_bb_var("PACKAGE_CLASSES").split():
             if format == "package_deb":
                 cmd.append("--scan-deb-dir")
@@ -36,7 +44,10 @@
             debuginfod = subprocess.Popen(cmd)
 
             with runqemu("core-image-minimal", runqemuparams="nographic") as qemu:
-                cmd = "DEBUGINFOD_URLS=http://%s:%d/ debuginfod-find debuginfo /usr/bin/debuginfod" % (qemu.server_ip, port)
+                cmd = (
+                    "DEBUGINFOD_URLS=http://%s:%d/ debuginfod-find debuginfo /usr/bin/debuginfod"
+                    % (qemu.server_ip, port)
+                )
                 status, output = qemu.run_serial(cmd)
                 # This should be more comprehensive
                 self.assertIn("/.cache/debuginfod_client/", output)
diff --git a/poky/meta/lib/oeqa/selftest/cases/overlayfs.py b/poky/meta/lib/oeqa/selftest/cases/overlayfs.py
index bff22f2..57a8c8b 100644
--- a/poky/meta/lib/oeqa/selftest/cases/overlayfs.py
+++ b/poky/meta/lib/oeqa/selftest/cases/overlayfs.py
@@ -333,24 +333,14 @@
         self.assertTrue("overlayfs-etc" in res.output, msg=res.output)
         self.assertTrue("package-management" in res.output, msg=res.output)
 
-    def test_image_feature_is_missing_class_included(self):
-        configAppend = """
-INHERIT += "overlayfs-etc"
-"""
-        self.run_check_image_feature(configAppend)
-
     def test_image_feature_is_missing(self):
-        self.run_check_image_feature()
-
-    def run_check_image_feature(self, appendToConfig=""):
         """
         Summary:   Overlayfs-etc class is not applied when image feature is not set
-                   even if we inherit it directly,
         Expected:  Image is created successfully but /etc is not an overlay
         Author:    Vyacheslav Yurkov <uvv.mail@gmail.com>
         """
 
-        config = f"""
+        config = """
 DISTRO_FEATURES:append = " systemd"
 
 # Use systemd as init manager
@@ -366,7 +356,6 @@
 # Image configuration for overlayfs-etc
 OVERLAYFS_ETC_MOUNT_POINT = "/data"
 OVERLAYFS_ETC_DEVICE = "/dev/sda3"
-{appendToConfig}
 """
 
         self.write_config(config)
@@ -392,7 +381,84 @@
         Author:    Vyacheslav Yurkov <uvv.mail@gmail.com>
         """
 
-        config = """
+        config = self.get_working_config()
+
+        args = {
+            'OVERLAYFS_INIT_OPTION': "" if origInit else "init=/sbin/preinit",
+            'OVERLAYFS_ETC_USE_ORIG_INIT_NAME': int(origInit == True)
+        }
+
+        self.write_config(config.format(**args))
+
+        bitbake('core-image-minimal')
+        testFile = "/etc/my-test-data"
+
+        with runqemu('core-image-minimal', image_fstype='wic', discard_writes=False) as qemu:
+            status, output = qemu.run_serial("/bin/mount")
+
+            line = getline_qemu(output, "/dev/sda3")
+            self.assertTrue("/data" in output, msg=output)
+
+            line = getline_qemu(output, "upperdir=/data/overlay-etc/upper")
+            self.assertTrue(line and line.startswith("/data/overlay-etc/upper on /etc type overlay"), msg=output)
+
+            # check that lower layer is not available
+            status, output = qemu.run_serial("ls -1 /data/overlay-etc/lower")
+            line = getline_qemu(output, "No such file or directory")
+            self.assertTrue(line, msg=output)
+
+            status, output = qemu.run_serial("touch " + testFile)
+            status, output = qemu.run_serial("sync")
+            status, output = qemu.run_serial("ls -1 " + testFile)
+            line = getline_qemu(output, testFile)
+            self.assertTrue(line and line.startswith(testFile), msg=output)
+
+        # Check that file exists in /etc after reboot
+        with runqemu('core-image-minimal', image_fstype='wic') as qemu:
+            status, output = qemu.run_serial("ls -1 " + testFile)
+            line = getline_qemu(output, testFile)
+            self.assertTrue(line and line.startswith(testFile), msg=output)
+
+    def test_lower_layer_access(self):
+        """
+        Summary:   Test that lower layer of /etc is available read-only when configured
+        Expected:  Can't write to lower layer. The files on lower and upper different after
+                   modification
+        Author:    Vyacheslav Yurkov <uvv.mail@gmail.com>
+        """
+
+        config = self.get_working_config()
+
+        configLower = """
+OVERLAYFS_ETC_EXPOSE_LOWER = "1"
+IMAGE_INSTALL:append = " overlayfs-user"
+"""
+        testFile = "lower-layer-test.txt"
+
+        args = {
+            'OVERLAYFS_INIT_OPTION': "",
+            'OVERLAYFS_ETC_USE_ORIG_INIT_NAME': 1
+        }
+
+        self.write_config(config.format(**args))
+
+        self.append_config(configLower)
+        bitbake('core-image-minimal')
+
+        with runqemu('core-image-minimal', image_fstype='wic') as qemu:
+            status, output = qemu.run_serial("echo \"Modified in upper\" > /etc/" + testFile)
+            status, output = qemu.run_serial("diff /etc/" + testFile + " /data/overlay-etc/lower/" + testFile)
+            line = getline_qemu(output, "Modified in upper")
+            self.assertTrue(line, msg=output)
+            line = getline_qemu(output, "Original file")
+            self.assertTrue(line, msg=output)
+
+            status, output = qemu.run_serial("touch /data/overlay-etc/lower/ro-test.txt")
+            line = getline_qemu(output, "Read-only file system")
+            self.assertTrue(line, msg=output)
+
+    def get_working_config(self):
+        return """
 DISTRO_FEATURES:append = " systemd"
 
 # Use systemd as init manager
@@ -414,34 +480,3 @@
 OVERLAYFS_ETC_DEVICE = "/dev/sda3"
 OVERLAYFS_ETC_USE_ORIG_INIT_NAME = "{OVERLAYFS_ETC_USE_ORIG_INIT_NAME}"
 """
-
-        args = {
-            'OVERLAYFS_INIT_OPTION': "" if origInit else "init=/sbin/preinit",
-            'OVERLAYFS_ETC_USE_ORIG_INIT_NAME': int(origInit == True)
-        }
-
-        self.write_config(config.format(**args))
-
-        bitbake('core-image-minimal')
-        testFile = "/etc/my-test-data"
-
-        with runqemu('core-image-minimal', image_fstype='wic', discard_writes=False) as qemu:
-            status, output = qemu.run_serial("/bin/mount")
-
-            line = getline_qemu(output, "/dev/sda3")
-            self.assertTrue("/data" in output, msg=output)
-
-            line = getline_qemu(output, "upperdir=/data/overlay-etc/upper")
-            self.assertTrue(line and line.startswith("/data/overlay-etc/upper on /etc type overlay"), msg=output)
-
-            status, output = qemu.run_serial("touch " + testFile)
-            status, output = qemu.run_serial("sync")
-            status, output = qemu.run_serial("ls -1 " + testFile)
-            line = getline_qemu(output, testFile)
-            self.assertTrue(line and line.startswith(testFile), msg=output)
-
-        # Check that file exists in /etc after reboot
-        with runqemu('core-image-minimal', image_fstype='wic') as qemu:
-            status, output = qemu.run_serial("ls -1 " + testFile)
-            line = getline_qemu(output, testFile)
-            self.assertTrue(line and line.startswith(testFile), msg=output)
diff --git a/poky/meta/lib/oeqa/utils/decorators.py b/poky/meta/lib/oeqa/utils/decorators.py
index aabf411..ea90164 100644
--- a/poky/meta/lib/oeqa/utils/decorators.py
+++ b/poky/meta/lib/oeqa/utils/decorators.py
@@ -16,91 +16,6 @@
 import signal
 from functools import wraps
 
-#get the "result" object from one of the upper frames provided that one of these upper frames is a unittest.case frame
-class getResults(object):
-    def __init__(self):
-        #dynamically determine the unittest.case frame and use it to get the name of the test method
-        ident = threading.current_thread().ident
-        upperf = sys._current_frames()[ident]
-        while (upperf.f_globals['__name__'] != 'unittest.case'):
-            upperf = upperf.f_back
-
-        def handleList(items):
-            ret = []
-            # items is a list of tuples, (test, failure) or (_ErrorHandler(), Exception())
-            for i in items:
-                s = i[0].id()
-                #Handle the _ErrorHolder objects from skipModule failures
-                if "setUpModule (" in s:
-                    ret.append(s.replace("setUpModule (", "").replace(")",""))
-                else:
-                    ret.append(s)
-                # Append also the test without the full path
-                testname = s.split('.')[-1]
-                if testname:
-                    ret.append(testname)
-            return ret
-        self.faillist = handleList(upperf.f_locals['result'].failures)
-        self.errorlist = handleList(upperf.f_locals['result'].errors)
-        self.skiplist = handleList(upperf.f_locals['result'].skipped)
-
-    def getFailList(self):
-        return self.faillist
-
-    def getErrorList(self):
-        return self.errorlist
-
-    def getSkipList(self):
-        return self.skiplist
-
-class skipIfFailure(object):
-
-    def __init__(self,testcase):
-        self.testcase = testcase
-
-    def __call__(self,f):
-        @wraps(f)
-        def wrapped_f(*args, **kwargs):
-            res = getResults()
-            if self.testcase in (res.getFailList() or res.getErrorList()):
-                raise unittest.SkipTest("Testcase dependency not met: %s" % self.testcase)
-            return f(*args, **kwargs)
-        wrapped_f.__name__ = f.__name__
-        return wrapped_f
-
-class skipIfSkipped(object):
-
-    def __init__(self,testcase):
-        self.testcase = testcase
-
-    def __call__(self,f):
-        @wraps(f)
-        def wrapped_f(*args, **kwargs):
-            res = getResults()
-            if self.testcase in res.getSkipList():
-                raise unittest.SkipTest("Testcase dependency not met: %s" % self.testcase)
-            return f(*args, **kwargs)
-        wrapped_f.__name__ = f.__name__
-        return wrapped_f
-
-class skipUnlessPassed(object):
-
-    def __init__(self,testcase):
-        self.testcase = testcase
-
-    def __call__(self,f):
-        @wraps(f)
-        def wrapped_f(*args, **kwargs):
-            res = getResults()
-            if self.testcase in res.getSkipList() or \
-                    self.testcase in res.getFailList() or \
-                    self.testcase in res.getErrorList():
-                raise unittest.SkipTest("Testcase dependency not met: %s" % self.testcase)
-            return f(*args, **kwargs)
-        wrapped_f.__name__ = f.__name__
-        wrapped_f._depends_on = self.testcase
-        return wrapped_f
-
 class testcase(object):
     def __init__(self, test_case):
         self.test_case = test_case
diff --git a/poky/meta/lib/oeqa/utils/qemurunner.py b/poky/meta/lib/oeqa/utils/qemurunner.py
index 4c3d201..6a85f57 100644
--- a/poky/meta/lib/oeqa/utils/qemurunner.py
+++ b/poky/meta/lib/oeqa/utils/qemurunner.py
@@ -85,7 +85,7 @@
         accepted_patterns = ['search_reached_prompt', 'send_login_user', 'search_login_succeeded', 'search_cmd_finished']
         default_boot_patterns = defaultdict(str)
         # Default to the usual paterns used to communicate with the target
-        default_boot_patterns['search_reached_prompt'] = b' login:'
+        default_boot_patterns['search_reached_prompt'] = ' login:'
         default_boot_patterns['send_login_user'] = 'root\n'
         default_boot_patterns['search_login_succeeded'] = r"root@[a-zA-Z0-9\-]+:~#"
         default_boot_patterns['search_cmd_finished'] = r"[a-zA-Z0-9]+@[a-zA-Z0-9\-]+:~#"
@@ -109,12 +109,15 @@
             sock.close()
             raise
 
+    def decode_qemulog(self, todecode):
+        # Sanitize the data received from qemu as it may contain control characters
+        msg = todecode.decode("utf-8", errors='ignore')
+        msg = re_control_char.sub('', msg)
+        return msg
+
     def log(self, msg):
         if self.logfile:
-            # It is needed to sanitize the data received from qemu
-            # because is possible to have control characters
-            msg = msg.decode("utf-8", errors='ignore')
-            msg = re_control_char.sub('', msg)
+            msg = self.decode_qemulog(msg)
             self.msg += msg
             with codecs.open(self.logfile, "a", encoding="utf-8") as f:
                 f.write("%s" % msg)
@@ -188,8 +191,8 @@
         importlib.invalidate_caches()
         try:
             qmp = importlib.import_module("qmp")
-        except:
-            self.logger.error("qemurunner: qmp.py missing, please ensure it's installed")
+        except Exception as e:
+            self.logger.error("qemurunner: qmp.py missing, please ensure it's installed (%s)" % str(e))
             return False
         # Path relative to tmpdir used as cwd for qemu below to avoid unix socket path length issues
         qmp_file = "." + next(tempfile._get_candidate_names())
@@ -325,7 +328,8 @@
         try:
             os.chdir(os.path.dirname(qmp_port))
             try:
-               self.qmp = qmp.QEMUMonitorProtocol(os.path.basename(qmp_port))
+               from qmp.legacy import QEMUMonitorProtocol
+               self.qmp = QEMUMonitorProtocol(os.path.basename(qmp_port))
             except OSError as msg:
                 self.logger.warning("Failed to initialize qemu monitor socket: %s File: %s" % (msg, msg.filename))
                 return False
@@ -467,13 +471,15 @@
                             self.log(data)
 
                         data = b''
-                        if self.boot_patterns['search_reached_prompt'] in bootlog:
+
+                        decodedlog = self.decode_qemulog(bootlog)
+                        if self.boot_patterns['search_reached_prompt'] in decodedlog:
                             self.server_socket = qemusock
                             stopread = True
                             reachedlogin = True
-                            self.logger.debug("Reached login banner in %s seconds (%s)" %
+                            self.logger.debug("Reached login banner in %s seconds (%s, %s)" %
                                               (time.time() - (endtime - self.boottime),
-                                              time.strftime("%D %H:%M:%S")))
+                                              time.strftime("%D %H:%M:%S"), time.time()))
                     else:
                         # no need to check if reachedlogin unless we support multiple connections
                         self.logger.debug("QEMU socket disconnected before login banner reached. (%s)" %
@@ -487,10 +493,10 @@
                 self.logger.warning("Target didn't reach login banner in %d seconds (%s)" %
                                   (self.boottime, time.strftime("%D %H:%M:%S")))
             tail = lambda l: "\n".join(l.splitlines()[-25:])
-            bootlog = bootlog.decode("utf-8")
+            bootlog = self.decode_qemulog(bootlog)
             # in case bootlog is empty, use tail qemu log store at self.msg
             lines = tail(bootlog if bootlog else self.msg)
-            self.logger.warning("Last 25 lines of text:\n%s" % lines)
+            self.logger.warning("Last 25 lines of text (%d):\n%s" % (len(bootlog), lines))
             self.logger.warning("Check full boot log: %s" % self.logfile)
             self._dump_host()
             self.stop()
diff --git a/poky/meta/recipes-bsp/grub/files/0001-configure-Remove-obsoleted-malign-jumps-loops-functi.patch b/poky/meta/recipes-bsp/grub/files/0001-configure-Remove-obsoleted-malign-jumps-loops-functi.patch
new file mode 100644
index 0000000..98142a7
--- /dev/null
+++ b/poky/meta/recipes-bsp/grub/files/0001-configure-Remove-obsoleted-malign-jumps-loops-functi.patch
@@ -0,0 +1,48 @@
+From eb486898dac8cbc29b2cc39f911b657c3417ae34 Mon Sep 17 00:00:00 2001
+From: Fangrui Song via Grub-devel <grub-devel@gnu.org>
+Date: Thu, 26 Aug 2021 09:02:31 -0700
+Subject: [PATCH 1/2] configure: Remove obsoleted -malign-{jumps, loops,
+ functions}
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+The GCC warns "cc1: warning: ‘-malign-loops’ is obsolete, use ‘-falign-loops’".
+The Clang silently ignores -malign-{jumps,loops,functions}.
+
+The preferred -falign-* forms have been supported since GCC 3.2. So, just
+remove -malign-{jumps,loops,functions}.
+
+Upstream-Status: Backport [https://git.savannah.gnu.org/cgit/grub.git/commit/?id=eb486898dac8cbc29b2cc39f911b657c3417ae34]
+Signed-off-by: Fangrui Song <maskray@google.com>
+Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
+---
+ configure.ac | 9 ---------
+ 1 file changed, 9 deletions(-)
+
+diff --git a/configure.ac b/configure.ac
+index bee28dbeb..9a12151bd 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -805,17 +805,8 @@ if test "x$target_cpu" = xi386; then
+ 	[grub_cv_cc_falign_loop=no])
+   ])
+ 
+-  AC_CACHE_CHECK([whether -malign-loops works], [grub_cv_cc_malign_loop], [
+-    CFLAGS="$TARGET_CFLAGS -malign-loops=1 -Werror"
+-    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[]])],
+-        [grub_cv_cc_malign_loop=yes],
+-	[grub_cv_cc_malign_loop=no])
+-  ])
+-
+   if test "x$grub_cv_cc_falign_loop" = xyes; then
+     TARGET_CFLAGS="$TARGET_CFLAGS -falign-jumps=1 -falign-loops=1 -falign-functions=1"
+-  elif test "x$grub_cv_cc_malign_loop" = xyes; then
+-    TARGET_CFLAGS="$TARGET_CFLAGS -malign-jumps=1 -malign-loops=1 -malign-functions=1"
+   fi
+ fi
+ 
+-- 
+2.37.3
+
diff --git a/poky/meta/recipes-bsp/grub/files/0002-configure-Check-for-falign-jumps-1-beside-falign-loo.patch b/poky/meta/recipes-bsp/grub/files/0002-configure-Check-for-falign-jumps-1-beside-falign-loo.patch
new file mode 100644
index 0000000..437e5b2
--- /dev/null
+++ b/poky/meta/recipes-bsp/grub/files/0002-configure-Check-for-falign-jumps-1-beside-falign-loo.patch
@@ -0,0 +1,59 @@
+From e372dcb0d4541ee9b9682cde088ec87a7b238ca2 Mon Sep 17 00:00:00 2001
+From: Fangrui Song via Grub-devel <grub-devel@gnu.org>
+Date: Thu, 26 Aug 2021 09:02:32 -0700
+Subject: [PATCH 2/2] configure: Check for -falign-jumps=1 beside
+ -falign-loops=1
+
+The Clang does not support -falign-jumps and only recently gained support
+for -falign-loops. The -falign-jumps=1 should be tested beside
+-fliang-loops=1 to avoid passing unrecognized options to the Clang:
+
+  clang-14: error: optimization flag '-falign-jumps=1' is not supported [-Werror,-Wignored-optimization-argument]
+
+The -falign-functions=1 is supported by GCC 5.1.0/Clang 3.8.0. So, just
+add the option unconditionally.
+
+Upstream-Status: Backport [https://git.savannah.gnu.org/cgit/grub.git/commit/?id=e372dcb0d4541ee9b9682cde088ec87a7b238ca2]
+Signed-off-by: Fangrui Song <maskray@google.com>
+Acked-by: Paul Menzel <pmenzel@molgen.mpg.de>
+Reviewed-by: Daniel Kiper <daniel.kiper@oracle.com>
+---
+ configure.ac | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+diff --git a/configure.ac b/configure.ac
+index 9a12151bd..eeb5d2211 100644
+--- a/configure.ac
++++ b/configure.ac
+@@ -798,6 +798,8 @@ fi
+ 
+ # Force no alignment to save space on i386.
+ if test "x$target_cpu" = xi386; then
++  TARGET_CFLAGS="$TARGET_CFLAGS -falign-functions=1"
++
+   AC_CACHE_CHECK([whether -falign-loops works], [grub_cv_cc_falign_loop], [
+     CFLAGS="$TARGET_CFLAGS -falign-loops=1 -Werror"
+     AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[]])],
+@@ -806,7 +808,18 @@ if test "x$target_cpu" = xi386; then
+   ])
+ 
+   if test "x$grub_cv_cc_falign_loop" = xyes; then
+-    TARGET_CFLAGS="$TARGET_CFLAGS -falign-jumps=1 -falign-loops=1 -falign-functions=1"
++    TARGET_CFLAGS="$TARGET_CFLAGS -falign-loops=1"
++  fi
++
++  AC_CACHE_CHECK([whether -falign-jumps works], [grub_cv_cc_falign_jumps], [
++    CFLAGS="$TARGET_CFLAGS -falign-jumps=1 -Werror"
++    AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[]])],
++        [grub_cv_cc_falign_jumps=yes],
++        [grub_cv_cc_falign_jumps=no])
++  ])
++
++  if test "x$grub_cv_cc_falign_jumps" = xyes; then
++    TARGET_CFLAGS="$TARGET_CFLAGS -falign-jumps=1"
+   fi
+ fi
+ 
+-- 
+2.37.3
+
diff --git a/poky/meta/recipes-bsp/grub/grub2.inc b/poky/meta/recipes-bsp/grub/grub2.inc
index 47ea561..2545b99 100644
--- a/poky/meta/recipes-bsp/grub/grub2.inc
+++ b/poky/meta/recipes-bsp/grub/grub2.inc
@@ -32,6 +32,8 @@
            file://CVE-2022-28734-net-http-Fix-OOB-write-for-split-http-headers.patch \
            file://CVE-2022-28734-net-http-Error-out-on-headers-with-LF-without-CR.patch \
            file://CVE-2022-28735-kern-efi-sb-Reject-non-kernel-files-in-the-shim_lock.patch \
+           file://0001-configure-Remove-obsoleted-malign-jumps-loops-functi.patch \
+           file://0002-configure-Check-for-falign-jumps-1-beside-falign-loo.patch \
 "
 
 SRC_URI[sha256sum] = "23b64b4c741569f9426ed2e3d0e6780796fca081bee4c99f62aa3f53ae803f5f"
@@ -43,9 +45,13 @@
 
 DEPENDS = "flex-native bison-native gettext-native"
 
-COMPATIBLE_HOST = '(x86_64.*|i.86.*|arm.*|aarch64.*|riscv.*)-(linux.*|freebsd.*)'
-COMPATIBLE_HOST:armv7a = 'null'
-COMPATIBLE_HOST:armv7ve = 'null'
+GRUB_COMPATIBLE_HOST = '(x86_64.*|i.86.*|arm.*|aarch64.*|riscv.*)-(linux.*|freebsd.*)'
+COMPATIBLE_HOST = "${GRUB_COMPATIBLE_HOST}"
+# Grub doesn't support hard float toolchain and won't be able to forcefully
+# disable it on some of the target CPUs. See 'configure.ac' for
+# supported/unsupported CPUs in hardfp.
+COMPATIBLE_HOST:armv7a = "${@'null' if d.getVar('TUNE_CCARGS_MFLOAT') == 'hardfp' else d.getVar('GRUB_COMPATIBLE_HOST')}"
+COMPATIBLE_HOST:armv7ve = "${@'null' if d.getVar('TUNE_CCARGS_MFLOAT') == 'hardfp' else d.getVar('GRUB_COMPATIBLE_HOST')}"
 
 # configure.ac has code to set this automagically from the target tuple
 # but the OE freeform one (core2-foo-bar-linux) don't work with that.
diff --git a/poky/meta/recipes-bsp/u-boot/libubootenv_0.3.2.bb b/poky/meta/recipes-bsp/u-boot/libubootenv_0.3.3.bb
similarity index 77%
rename from poky/meta/recipes-bsp/u-boot/libubootenv_0.3.2.bb
rename to poky/meta/recipes-bsp/u-boot/libubootenv_0.3.3.bb
index e8f5894..55f91b6 100644
--- a/poky/meta/recipes-bsp/u-boot/libubootenv_0.3.2.bb
+++ b/poky/meta/recipes-bsp/u-boot/libubootenv_0.3.3.bb
@@ -6,12 +6,12 @@
 provided by U-Boot"
 
 HOMEPAGE = "https://github.com/sbabic/libubootenv"
-LICENSE = "LGPL-2.1-only"
-LIC_FILES_CHKSUM = "file://Licenses/lgpl-2.1.txt;md5=4fbd65380cdd255951079008b364516c"
+LICENSE = "LGPL-2.1-or-later"
+LIC_FILES_CHKSUM = "file://LICENSES/LGPL-2.1-or-later.txt;md5=4fbd65380cdd255951079008b364516c"
 SECTION = "libs"
 
 SRC_URI = "git://github.com/sbabic/libubootenv;protocol=https;branch=master"
-SRCREV = "ba7564f5006d09bec51058cf4f5ac90d4dc18b3c"
+SRCREV = "108100622160bb0c7ef4b6186230fe1f26402791"
 
 S = "${WORKDIR}/git"
 
diff --git a/poky/meta/recipes-bsp/u-boot/u-boot_2022.07.bb b/poky/meta/recipes-bsp/u-boot/u-boot_2022.07.bb
index 0d2464d..1ae5757 100644
--- a/poky/meta/recipes-bsp/u-boot/u-boot_2022.07.bb
+++ b/poky/meta/recipes-bsp/u-boot/u-boot_2022.07.bb
@@ -1,7 +1,7 @@
 require u-boot-common.inc
 require u-boot.inc
 
-SRC_URI:append = " file://0001-riscv32-Use-double-float-ABI-for-rv32.patch \
+SRC_URI +=       " file://0001-riscv32-Use-double-float-ABI-for-rv32.patch \
                    file://0001-riscv-fix-build-with-binutils-2.38.patch \
                  "
 
diff --git a/poky/meta/recipes-connectivity/avahi/avahi_0.8.bb b/poky/meta/recipes-connectivity/avahi/avahi_0.8.bb
index a2efe7e..a830385 100644
--- a/poky/meta/recipes-connectivity/avahi/avahi_0.8.bb
+++ b/poky/meta/recipes-connectivity/avahi/avahi_0.8.bb
@@ -19,7 +19,7 @@
                     file://avahi-daemon/main.c;endline=21;md5=9ee77368c5407af77caaef1b07285969 \
                     file://avahi-client/client.h;endline=23;md5=f4ac741a25c4f434039ba3e18c8674cf"
 
-SRC_URI = "https://github.com/lathiat/avahi/releases/download/v${PV}/avahi-${PV}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/avahi-${PV}.tar.gz \
            file://00avahi-autoipd \
            file://99avahi-autoipd \
            file://initscript.patch \
@@ -28,7 +28,7 @@
            file://local-ping.patch \
            "
 
-UPSTREAM_CHECK_URI = "https://github.com/lathiat/avahi/releases/"
+GITHUB_BASE_URI = "https://github.com/lathiat/avahi/releases/"
 SRC_URI[md5sum] = "229c6aa30674fc43c202b22c5f8c2be7"
 SRC_URI[sha256sum] = "060309d7a333d38d951bc27598c677af1796934dbd98e1024e7ad8de798fedda"
 
@@ -48,7 +48,7 @@
 PACKAGECONFIG[libevent] = "--enable-libevent,--disable-libevent,libevent"
 PACKAGECONFIG[qt5] = "--enable-qt5,--disable-qt5,qtbase"
 
-inherit autotools pkgconfig gettext gobject-introspection
+inherit autotools pkgconfig gettext gobject-introspection github-releases
 
 EXTRA_OECONF = "--with-avahi-priv-access-group=adm \
              --disable-stack-protector \
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/0001-avoid-start-failure-with-bind-user.patch b/poky/meta/recipes-connectivity/bind/bind-9.18.7/0001-avoid-start-failure-with-bind-user.patch
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/0001-avoid-start-failure-with-bind-user.patch
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/0001-avoid-start-failure-with-bind-user.patch
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/0001-named-lwresd-V-and-start-log-hide-build-options.patch b/poky/meta/recipes-connectivity/bind/bind-9.18.7/0001-named-lwresd-V-and-start-log-hide-build-options.patch
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/0001-named-lwresd-V-and-start-log-hide-build-options.patch
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/0001-named-lwresd-V-and-start-log-hide-build-options.patch
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/bind-ensure-searching-for-json-headers-searches-sysr.patch b/poky/meta/recipes-connectivity/bind/bind-9.18.7/bind-ensure-searching-for-json-headers-searches-sysr.patch
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/bind-ensure-searching-for-json-headers-searches-sysr.patch
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/bind-ensure-searching-for-json-headers-searches-sysr.patch
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/bind9 b/poky/meta/recipes-connectivity/bind/bind-9.18.7/bind9
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/bind9
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/bind9
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/conf.patch b/poky/meta/recipes-connectivity/bind/bind-9.18.7/conf.patch
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/conf.patch
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/conf.patch
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/generate-rndc-key.sh b/poky/meta/recipes-connectivity/bind/bind-9.18.7/generate-rndc-key.sh
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/generate-rndc-key.sh
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/generate-rndc-key.sh
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/init.d-add-support-for-read-only-rootfs.patch b/poky/meta/recipes-connectivity/bind/bind-9.18.7/init.d-add-support-for-read-only-rootfs.patch
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/init.d-add-support-for-read-only-rootfs.patch
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/init.d-add-support-for-read-only-rootfs.patch
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/make-etc-initd-bind-stop-work.patch b/poky/meta/recipes-connectivity/bind/bind-9.18.7/make-etc-initd-bind-stop-work.patch
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/make-etc-initd-bind-stop-work.patch
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/make-etc-initd-bind-stop-work.patch
diff --git a/poky/meta/recipes-connectivity/bind/bind-9.18.6/named.service b/poky/meta/recipes-connectivity/bind/bind-9.18.7/named.service
similarity index 100%
rename from poky/meta/recipes-connectivity/bind/bind-9.18.6/named.service
rename to poky/meta/recipes-connectivity/bind/bind-9.18.7/named.service
diff --git a/poky/meta/recipes-connectivity/bind/bind_9.18.6.bb b/poky/meta/recipes-connectivity/bind/bind_9.18.7.bb
similarity index 97%
rename from poky/meta/recipes-connectivity/bind/bind_9.18.6.bb
rename to poky/meta/recipes-connectivity/bind/bind_9.18.7.bb
index 5f54942..4ab1148 100644
--- a/poky/meta/recipes-connectivity/bind/bind_9.18.6.bb
+++ b/poky/meta/recipes-connectivity/bind/bind_9.18.7.bb
@@ -20,7 +20,7 @@
            file://0001-avoid-start-failure-with-bind-user.patch \
            "
 
-SRC_URI[sha256sum] = "d43a0fed03c774d1685d203598218c0b7774a88fcc390a0170710d5feb7fbff1"
+SRC_URI[sha256sum] = "9e2acf1698f49d70ad12ffbad39ec6716a7da524e9ebd98429c7c70ba1262981"
 
 UPSTREAM_CHECK_URI = "https://ftp.isc.org/isc/bind9/"
 # follow the ESV versions divisible by 2
diff --git a/poky/meta/recipes-connectivity/connman/connman/connman b/poky/meta/recipes-connectivity/connman/connman/connman
index 310a696..a021fd4 100644
--- a/poky/meta/recipes-connectivity/connman/connman/connman
+++ b/poky/meta/recipes-connectivity/connman/connman/connman
@@ -10,48 +10,11 @@
 
 set -e
 
-nfsroot=0
-
-exec 9<&0 < /proc/mounts
-while read dev mtpt fstype rest; do
-	if test $mtpt = "/" ; then
-		case $fstype in
-		    nfs | nfs4)
-			nfsroot=1
-			break
-			;;
-		    *)
-			;;
-		esac
-	fi
-done
-
 do_start() {
-	if test $nfsroot -eq 1 ; then
-	    NET_DEVS=`cat /proc/net/dev | sed -ne 's/^\([a-zA-Z0-9 ]*\):.*$/\1/p'`
-	    NET_ADDR=`cat /proc/cmdline | sed -ne 's/^.*ip=\([^ :]*\).*$/\1/p'`
-
-	    if [ ! -z "$NET_ADDR" ]; then
-		if [ "$NET_ADDR" = dhcp ]; then
-		    ethn=`ifconfig | grep "^eth" | sed -e "s/\(eth[0-9]\)\(.*\)/\1/"`
-		    if [ ! -z "$ethn" ]; then
-			EXTRA_PARAM="$EXTRA_PARAM -I $ethn"
-		    fi
-		else
-		    for i in $NET_DEVS; do
-			ADDR=`ifconfig $i | sed 's/addr://g' | sed -ne 's/^.*inet \([0-9.]*\) .*$/\1/p'`
-			if [ "$NET_ADDR" = "$ADDR" ]; then
-			    EXTRA_PARAM="$EXTRA_PARAM -I $i"
-			    break
-			fi
-		    done
-		fi
-	    fi
-	fi
 	if [ -f @DATADIR@/connman/wired-setup ] ; then
 		. @DATADIR@/connman/wired-setup
 	fi
-	$DAEMON $EXTRA_PARAM
+	$DAEMON
 }
 
 do_stop() {
diff --git a/poky/meta/recipes-connectivity/inetutils/inetutils/CVE-2022-39028.patch b/poky/meta/recipes-connectivity/inetutils/inetutils/CVE-2022-39028.patch
new file mode 100644
index 0000000..3b07515
--- /dev/null
+++ b/poky/meta/recipes-connectivity/inetutils/inetutils/CVE-2022-39028.patch
@@ -0,0 +1,54 @@
+From d52349fa1b6baac77ffa2c74769636aa2ece2ec5 Mon Sep 17 00:00:00 2001
+From: Erik Auerswald <auerswal@unix-ag.uni-kl.de>
+Date: Sat, 3 Sep 2022 16:58:16 +0200
+Subject: [PATCH] telnetd: Handle early IAC EC or IAC EL receipt
+
+Fix telnetd crash if the first two bytes of a new connection
+are 0xff 0xf7 (IAC EC) or 0xff 0xf8 (IAC EL).
+
+The problem was reported in:
+<https://pierrekim.github.io/blog/2022-08-24-2-byte-dos-freebsd-netbsd-telnetd-netkit-telnetd-inetutils-telnetd-kerberos-telnetd.html>.
+
+* NEWS: Mention fix.
+* telnetd/state.c (telrcv): Handle zero slctab[SLC_EC].sptr and
+zero slctab[SLC_EL].sptr.
+
+CVE: CVE-2022-39028
+Upstream-Status: Backport [https://git.savannah.gnu.org/cgit/inetutils.git/commit/?id=fae8263e467380483c28513c0e5fac143e46f94f]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ telnetd/state.c | 12 +++++++++---
+ 1 file changed, 9 insertions(+), 3 deletions(-)
+
+diff --git a/telnetd/state.c b/telnetd/state.c
+index ffc6cba..c2d760f 100644
+--- a/telnetd/state.c
++++ b/telnetd/state.c
+@@ -312,15 +312,21 @@ telrcv (void)
+ 	    case EC:
+ 	    case EL:
+ 	      {
+-		cc_t ch;
++		cc_t ch = (cc_t) (_POSIX_VDISABLE);
+ 
+ 		DEBUG (debug_options, 1, printoption ("td: recv IAC", c));
+ 		ptyflush ();	/* half-hearted */
+ 		init_termbuf ();
+ 		if (c == EC)
+-		  ch = *slctab[SLC_EC].sptr;
++		  {
++		    if (slctab[SLC_EC].sptr)
++		      ch = *slctab[SLC_EC].sptr;
++		  }
+ 		else
+-		  ch = *slctab[SLC_EL].sptr;
++		  {
++		    if (slctab[SLC_EL].sptr)
++		      ch = *slctab[SLC_EL].sptr;
++		  }
+ 		if (ch != (cc_t) (_POSIX_VDISABLE))
+ 		  pty_output_byte ((unsigned char) ch);
+ 		break;
+-- 
+2.37.3
+
diff --git a/poky/meta/recipes-connectivity/inetutils/inetutils_2.3.bb b/poky/meta/recipes-connectivity/inetutils/inetutils_2.3.bb
index 1e8f636..2fce843 100644
--- a/poky/meta/recipes-connectivity/inetutils/inetutils_2.3.bb
+++ b/poky/meta/recipes-connectivity/inetutils/inetutils_2.3.bb
@@ -21,6 +21,7 @@
            file://tftpd.xinetd.inetutils \
            file://inetutils-1.9-PATH_PROCNET_DEV.patch \
            file://inetutils-only-check-pam_appl.h-when-pam-enabled.patch \
+           file://CVE-2022-39028.patch \
 "
 
 inherit autotools gettext update-alternatives texinfo
diff --git a/poky/meta/recipes-connectivity/iproute2/iproute2.inc b/poky/meta/recipes-connectivity/iproute2/iproute2.inc
deleted file mode 100644
index b1bcc14..0000000
--- a/poky/meta/recipes-connectivity/iproute2/iproute2.inc
+++ /dev/null
@@ -1,91 +0,0 @@
-SUMMARY = "TCP / IP networking and traffic control utilities"
-DESCRIPTION = "Iproute2 is a collection of utilities for controlling \
-TCP / IP networking and traffic control in Linux.  Of the utilities ip \
-and tc are the most important.  ip controls IPv4 and IPv6 \
-configuration and tc stands for traffic control."
-HOMEPAGE = "http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2"
-SECTION = "base"
-LICENSE = "GPL-2.0-or-later"
-LIC_FILES_CHKSUM = "file://COPYING;md5=eb723b61539feef013de476e68b5c50a \
-                    file://ip/ip.c;beginline=3;endline=8;md5=689d691d0410a4b64d3899f8d6e31817"
-
-DEPENDS = "flex-native bison-native iptables libcap"
-
-inherit update-alternatives bash-completion pkgconfig
-
-CLEANBROKEN = "1"
-
-PACKAGECONFIG ??= "tipc elf devlink"
-PACKAGECONFIG[tipc] = ",,libmnl,"
-PACKAGECONFIG[elf] = ",,elfutils,"
-PACKAGECONFIG[devlink] = ",,libmnl,"
-PACKAGECONFIG[rdma] = ",,libmnl,"
-
-IPROUTE2_MAKE_SUBDIRS = "lib tc ip bridge misc genl ${@bb.utils.filter('PACKAGECONFIG', 'devlink tipc rdma', d)}"
-
-EXTRA_OEMAKE = "\
-    CC='${CC}' \
-    KERNEL_INCLUDE=${STAGING_INCDIR} \
-    DOCDIR=${docdir}/iproute2 \
-    SUBDIRS='${IPROUTE2_MAKE_SUBDIRS}' \
-    SBINDIR='${base_sbindir}' \
-    LIBDIR='${libdir}' \
-"
-
-do_configure:append () {
-    sh configure ${STAGING_INCDIR}
-    # Explicitly disable ATM support
-    sed -i -e '/TC_CONFIG_ATM/d' config.mk
-}
-
-do_install () {
-    oe_runmake DESTDIR=${D} install
-    mv ${D}${base_sbindir}/ip ${D}${base_sbindir}/ip.iproute2
-    install -d ${D}${datadir}
-    mv ${D}/share/* ${D}${datadir}/ || true
-    rm ${D}/share -rf || true
-}
-
-# The .so files in iproute2-tc are modules, not traditional libraries
-INSANE_SKIP:${PN}-tc = "dev-so"
-
-IPROUTE2_PACKAGES =+ "\
-    ${PN}-devlink \
-    ${PN}-genl \
-    ${PN}-ifstat \
-    ${PN}-ip \
-    ${PN}-lnstat \
-    ${PN}-nstat \
-    ${PN}-rtacct \
-    ${PN}-ss \
-    ${PN}-tc \
-    ${PN}-tipc \
-    ${PN}-rdma \
-"
-
-PACKAGE_BEFORE_PN = "${IPROUTE2_PACKAGES}"
-RDEPENDS:${PN} += "${PN}-ip"
-
-FILES:${PN}-tc = "${base_sbindir}/tc* \
-                  ${libdir}/tc/*.so"
-FILES:${PN}-lnstat = "${base_sbindir}/lnstat \
-                      ${base_sbindir}/ctstat \
-                      ${base_sbindir}/rtstat"
-FILES:${PN}-ifstat = "${base_sbindir}/ifstat"
-FILES:${PN}-ip = "${base_sbindir}/ip.${PN} ${sysconfdir}/iproute2"
-FILES:${PN}-genl = "${base_sbindir}/genl"
-FILES:${PN}-rtacct = "${base_sbindir}/rtacct"
-FILES:${PN}-nstat = "${base_sbindir}/nstat"
-FILES:${PN}-ss = "${base_sbindir}/ss"
-FILES:${PN}-tipc = "${base_sbindir}/tipc"
-FILES:${PN}-devlink = "${base_sbindir}/devlink"
-FILES:${PN}-rdma = "${base_sbindir}/rdma"
-
-ALTERNATIVE:${PN}-ip = "ip"
-ALTERNATIVE_TARGET[ip] = "${base_sbindir}/ip.${BPN}"
-ALTERNATIVE_LINK_NAME[ip] = "${base_sbindir}/ip"
-ALTERNATIVE_PRIORITY = "100"
-
-ALTERNATIVE:${PN}-tc = "tc"
-ALTERNATIVE_LINK_NAME[tc] = "${base_sbindir}/tc"
-ALTERNATIVE_PRIORITY_${PN}-tc = "100"
diff --git a/poky/meta/recipes-connectivity/iproute2/iproute2_5.19.0.bb b/poky/meta/recipes-connectivity/iproute2/iproute2_5.19.0.bb
index 6a00779..3cbf80a 100644
--- a/poky/meta/recipes-connectivity/iproute2/iproute2_5.19.0.bb
+++ b/poky/meta/recipes-connectivity/iproute2/iproute2_5.19.0.bb
@@ -1,4 +1,15 @@
-require iproute2.inc
+SUMMARY = "TCP / IP networking and traffic control utilities"
+DESCRIPTION = "Iproute2 is a collection of utilities for controlling \
+TCP / IP networking and traffic control in Linux.  Of the utilities ip \
+and tc are the most important.  ip controls IPv4 and IPv6 \
+configuration and tc stands for traffic control."
+HOMEPAGE = "http://www.linuxfoundation.org/collaborate/workgroups/networking/iproute2"
+SECTION = "base"
+LICENSE = "GPL-2.0-or-later"
+LIC_FILES_CHKSUM = "file://COPYING;md5=eb723b61539feef013de476e68b5c50a \
+                    file://ip/ip.c;beginline=3;endline=8;md5=689d691d0410a4b64d3899f8d6e31817"
+
+DEPENDS = "flex-native bison-native iptables libcap"
 
 SRC_URI = "${KERNELORG_MIRROR}/linux/utils/net/${BPN}/${BP}.tar.xz \
            file://0001-libc-compat.h-add-musl-workaround.patch \
@@ -8,6 +19,82 @@
 
 SRC_URI[sha256sum] = "26b7a34d6a7fd2f7a42e2b39c5a90cb61bac522d1096067ffeb195e5693d7791"
 
+inherit update-alternatives bash-completion pkgconfig
+
+PACKAGECONFIG ??= "tipc elf devlink"
+PACKAGECONFIG[tipc] = ",,libmnl,"
+PACKAGECONFIG[elf] = ",,elfutils,"
+PACKAGECONFIG[devlink] = ",,libmnl,"
+PACKAGECONFIG[rdma] = ",,libmnl,"
+
+IPROUTE2_MAKE_SUBDIRS = "lib tc ip bridge misc genl ${@bb.utils.filter('PACKAGECONFIG', 'devlink tipc rdma', d)}"
+
 # CFLAGS are computed in Makefile and reference CCOPTS
 #
-EXTRA_OEMAKE:append = " CCOPTS='${CFLAGS}'"
+EXTRA_OEMAKE = "\
+    CC='${CC}' \
+    KERNEL_INCLUDE=${STAGING_INCDIR} \
+    DOCDIR=${docdir}/iproute2 \
+    SUBDIRS='${IPROUTE2_MAKE_SUBDIRS}' \
+    SBINDIR='${base_sbindir}' \
+    LIBDIR='${libdir}' \
+    CCOPTS='${CFLAGS}' \
+"
+
+do_configure:append () {
+    sh configure ${STAGING_INCDIR}
+    # Explicitly disable ATM support
+    sed -i -e '/TC_CONFIG_ATM/d' config.mk
+}
+
+do_install () {
+    oe_runmake DESTDIR=${D} install
+    mv ${D}${base_sbindir}/ip ${D}${base_sbindir}/ip.iproute2
+    install -d ${D}${datadir}
+    mv ${D}/share/* ${D}${datadir}/ || true
+    rm ${D}/share -rf || true
+}
+
+# The .so files in iproute2-tc are modules, not traditional libraries
+INSANE_SKIP:${PN}-tc = "dev-so"
+
+IPROUTE2_PACKAGES =+ "\
+    ${PN}-devlink \
+    ${PN}-genl \
+    ${PN}-ifstat \
+    ${PN}-ip \
+    ${PN}-lnstat \
+    ${PN}-nstat \
+    ${PN}-rtacct \
+    ${PN}-ss \
+    ${PN}-tc \
+    ${PN}-tipc \
+    ${PN}-rdma \
+"
+
+PACKAGE_BEFORE_PN = "${IPROUTE2_PACKAGES}"
+RDEPENDS:${PN} += "${PN}-ip"
+
+FILES:${PN}-tc = "${base_sbindir}/tc* \
+                  ${libdir}/tc/*.so"
+FILES:${PN}-lnstat = "${base_sbindir}/lnstat \
+                      ${base_sbindir}/ctstat \
+                      ${base_sbindir}/rtstat"
+FILES:${PN}-ifstat = "${base_sbindir}/ifstat"
+FILES:${PN}-ip = "${base_sbindir}/ip.${PN} ${sysconfdir}/iproute2"
+FILES:${PN}-genl = "${base_sbindir}/genl"
+FILES:${PN}-rtacct = "${base_sbindir}/rtacct"
+FILES:${PN}-nstat = "${base_sbindir}/nstat"
+FILES:${PN}-ss = "${base_sbindir}/ss"
+FILES:${PN}-tipc = "${base_sbindir}/tipc"
+FILES:${PN}-devlink = "${base_sbindir}/devlink"
+FILES:${PN}-rdma = "${base_sbindir}/rdma"
+
+ALTERNATIVE:${PN}-ip = "ip"
+ALTERNATIVE_TARGET[ip] = "${base_sbindir}/ip.${BPN}"
+ALTERNATIVE_LINK_NAME[ip] = "${base_sbindir}/ip"
+ALTERNATIVE_PRIORITY = "100"
+
+ALTERNATIVE:${PN}-tc = "tc"
+ALTERNATIVE_LINK_NAME[tc] = "${base_sbindir}/tc"
+ALTERNATIVE_PRIORITY_${PN}-tc = "100"
diff --git a/poky/meta/recipes-connectivity/ofono/ofono_1.34.bb b/poky/meta/recipes-connectivity/ofono/ofono_2.0.bb
similarity index 95%
rename from poky/meta/recipes-connectivity/ofono/ofono_1.34.bb
rename to poky/meta/recipes-connectivity/ofono/ofono_2.0.bb
index 2363174..afd43d2 100644
--- a/poky/meta/recipes-connectivity/ofono/ofono_1.34.bb
+++ b/poky/meta/recipes-connectivity/ofono/ofono_2.0.bb
@@ -13,7 +13,7 @@
     file://0001-mbim-add-an-optional-TEMP_FAILURE_RETRY-macro-copy.patch \
     file://0002-mbim-Fix-build-with-ell-0.39-by-restoring-unlikely-m.patch \
 "
-SRC_URI[sha256sum] = "c0b96d3013447ec2bcb74579bef90e4e59c68dbfa4b9c6fbce5d12401a43aac7"
+SRC_URI[sha256sum] = "b0a31bf4d8ff3030c4aef9f8413df999c54df9db2ff0a1d3ec1710e0a9d1a49e"
 
 inherit autotools pkgconfig update-rc.d systemd gobject-introspection-data
 
diff --git a/poky/meta/recipes-core/busybox/busybox/0001-devmem-add-128-bit-width.patch b/poky/meta/recipes-core/busybox/busybox/0001-devmem-add-128-bit-width.patch
new file mode 100644
index 0000000..985e2bf
--- /dev/null
+++ b/poky/meta/recipes-core/busybox/busybox/0001-devmem-add-128-bit-width.patch
@@ -0,0 +1,128 @@
+From d432049f288c9acdc4a7caa729c68ceba3c5dca1 Mon Sep 17 00:00:00 2001
+From: Aaro Koskinen <aaro.koskinen@nokia.com>
+Date: Thu, 25 Aug 2022 18:47:02 +0300
+Subject: [PATCH] devmem: add 128-bit width
+
+Add 128-bit width if the compiler provides the needed type.
+
+function                                             old     new   delta
+devmem_main                                          405     464     +59
+.rodata                                           109025  109043     +18
+------------------------------------------------------------------------------
+(add/remove: 0/0 grow/shrink: 2/0 up/down: 77/0)               Total: 77 bytes
+
+Upstream-Status: Backport [https://git.busybox.net/busybox/commit/?id=d432049f288c9acdc4a7caa729c68ceba3c5dca1]
+
+Signed-off-by: Aaro Koskinen <aaro.koskinen@nokia.com>
+Signed-off-by: Aaro Koskinen <aaro.koskinen@iki.fi>
+Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
+Signed-off-by: Mingli Yu <mingli.yu@windriver.com>
+---
+ miscutils/devmem.c | 68 ++++++++++++++++++++++++++++++----------------
+ 1 file changed, 44 insertions(+), 24 deletions(-)
+
+diff --git a/miscutils/devmem.c b/miscutils/devmem.c
+index f9f0276bc..f21621bd6 100644
+--- a/miscutils/devmem.c
++++ b/miscutils/devmem.c
+@@ -29,7 +29,6 @@ int devmem_main(int argc UNUSED_PARAM, char **argv)
+ {
+ 	void *map_base, *virt_addr;
+ 	uint64_t read_result;
+-	uint64_t writeval = writeval; /* for compiler */
+ 	off_t target;
+ 	unsigned page_size, mapped_size, offset_in_page;
+ 	int fd;
+@@ -64,9 +63,6 @@ int devmem_main(int argc UNUSED_PARAM, char **argv)
+ 			width = strchrnul(bhwl, (argv[2][0] | 0x20)) - bhwl;
+ 			width = sizes[width];
+ 		}
+-		/* VALUE */
+-		if (argv[3])
+-			writeval = bb_strtoull(argv[3], NULL, 0);
+ 	} else { /* argv[2] == NULL */
+ 		/* make argv[3] to be a valid thing to fetch */
+ 		argv--;
+@@ -96,28 +92,46 @@ int devmem_main(int argc UNUSED_PARAM, char **argv)
+ 	virt_addr = (char*)map_base + offset_in_page;
+ 
+ 	if (!argv[3]) {
+-		switch (width) {
+-		case 8:
+-			read_result = *(volatile uint8_t*)virt_addr;
+-			break;
+-		case 16:
+-			read_result = *(volatile uint16_t*)virt_addr;
+-			break;
+-		case 32:
+-			read_result = *(volatile uint32_t*)virt_addr;
+-			break;
+-		case 64:
+-			read_result = *(volatile uint64_t*)virt_addr;
+-			break;
+-		default:
+-			bb_simple_error_msg_and_die("bad width");
++#ifdef __SIZEOF_INT128__
++		if (width == 128) {
++			unsigned __int128 rd =
++				*(volatile unsigned __int128 *)virt_addr;
++			printf("0x%016llX%016llX\n",
++				(unsigned long long)(uint64_t)(rd >> 64),
++				(unsigned long long)(uint64_t)rd
++			);
++		} else
++#endif
++		{
++			switch (width) {
++			case 8:
++				read_result = *(volatile uint8_t*)virt_addr;
++				break;
++			case 16:
++				read_result = *(volatile uint16_t*)virt_addr;
++				break;
++			case 32:
++				read_result = *(volatile uint32_t*)virt_addr;
++				break;
++			case 64:
++				read_result = *(volatile uint64_t*)virt_addr;
++				break;
++			default:
++				bb_simple_error_msg_and_die("bad width");
++			}
++//			printf("Value at address 0x%"OFF_FMT"X (%p): 0x%llX\n",
++//				target, virt_addr,
++//				(unsigned long long)read_result);
++			/* Zero-padded output shows the width of access just done */
++			printf("0x%0*llX\n", (width >> 2), (unsigned long long)read_result);
+ 		}
+-//		printf("Value at address 0x%"OFF_FMT"X (%p): 0x%llX\n",
+-//			target, virt_addr,
+-//			(unsigned long long)read_result);
+-		/* Zero-padded output shows the width of access just done */
+-		printf("0x%0*llX\n", (width >> 2), (unsigned long long)read_result);
+ 	} else {
++		/* parse VALUE */
++#ifdef __SIZEOF_INT128__
++		unsigned __int128 writeval = strtoumax(argv[3], NULL, 0);
++#else
++		uint64_t writeval = bb_strtoull(argv[3], NULL, 0);
++#endif
+ 		switch (width) {
+ 		case 8:
+ 			*(volatile uint8_t*)virt_addr = writeval;
+@@ -135,6 +149,12 @@ int devmem_main(int argc UNUSED_PARAM, char **argv)
+ 			*(volatile uint64_t*)virt_addr = writeval;
+ //			read_result = *(volatile uint64_t*)virt_addr;
+ 			break;
++#ifdef __SIZEOF_INT128__
++		case 128:
++			*(volatile unsigned __int128 *)virt_addr = writeval;
++//			read_result = *(volatile uint64_t*)virt_addr;
++			break;
++#endif
+ 		default:
+ 			bb_simple_error_msg_and_die("bad width");
+ 		}
+-- 
+2.25.1
+
diff --git a/poky/meta/recipes-core/busybox/busybox_1.35.0.bb b/poky/meta/recipes-core/busybox/busybox_1.35.0.bb
index edf8964..e9ca6fd 100644
--- a/poky/meta/recipes-core/busybox/busybox_1.35.0.bb
+++ b/poky/meta/recipes-core/busybox/busybox_1.35.0.bb
@@ -50,6 +50,7 @@
            file://0001-libbb-sockaddr2str-ensure-only-printable-characters-.patch \
            file://0002-nslookup-sanitize-all-printed-strings-with-printable.patch \
            file://CVE-2022-30065.patch \
+           file://0001-devmem-add-128-bit-width.patch \
            "
 SRC_URI:append:libc-musl = " file://musl.cfg "
 
diff --git a/poky/meta/recipes-core/coreutils/coreutils_9.1.bb b/poky/meta/recipes-core/coreutils/coreutils_9.1.bb
index 55663c7..4807eef 100644
--- a/poky/meta/recipes-core/coreutils/coreutils_9.1.bb
+++ b/poky/meta/recipes-core/coreutils/coreutils_9.1.bb
@@ -46,6 +46,7 @@
 PACKAGECONFIG[xattr] = "--enable-xattr,--disable-xattr,attr,"
 PACKAGECONFIG[single-binary] = "--enable-single-binary,--disable-single-binary,,"
 PACKAGECONFIG[selinux] = "--with-selinux,--without-selinux,libselinux"
+PACKAGECONFIG[openssl] = "--with-openssl=yes,--with-openssl=no,openssl"
 
 # [ df mktemp nice printenv base64 gets a special treatment and is not included in this
 bindir_progs = "arch basename chcon cksum comm csplit cut dir dircolors dirname du \
diff --git a/poky/meta/recipes-core/dropbear/dropbear/0007-Fix-X11-build-failure-use-DROPBEAR_PRIO_LOWDELAY.patch b/poky/meta/recipes-core/dropbear/dropbear/0007-Fix-X11-build-failure-use-DROPBEAR_PRIO_LOWDELAY.patch
new file mode 100644
index 0000000..042dccb
--- /dev/null
+++ b/poky/meta/recipes-core/dropbear/dropbear/0007-Fix-X11-build-failure-use-DROPBEAR_PRIO_LOWDELAY.patch
@@ -0,0 +1,28 @@
+From 64292091fe3e8ea7c9bfe74af730b2ff5428bf10 Mon Sep 17 00:00:00 2001
+From: Matt Johnston <matt@ucc.asn.au>
+Date: Sat, 23 Apr 2022 22:33:31 +0800
+Subject: [PATCH] Fix X11 build failure, use DROPBEAR_PRIO_LOWDELAY
+
+Upstream-Status: Backport
+
+Signed-off-by: Daniel Gomez <daniel@qtec.com>
+---
+ svr-x11fwd.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/svr-x11fwd.c b/svr-x11fwd.c
+index 353cb12..5d9e6a9 100644
+--- a/svr-x11fwd.c
++++ b/svr-x11fwd.c
+@@ -206,7 +206,7 @@ void x11cleanup(struct ChanSess *chansess) {
+ }
+
+ static int x11_inithandler(struct Channel *channel) {
+-	channel->prio = DROPBEAR_CHANNEL_PRIO_INTERACTIVE;
++	channel->prio = DROPBEAR_PRIO_LOWDELAY;
+ 	return 0;
+ }
+
+--
+2.35.1
+
diff --git a/poky/meta/recipes-core/dropbear/dropbear_2022.82.bb b/poky/meta/recipes-core/dropbear/dropbear_2022.82.bb
index 2de243b..41c14ff 100644
--- a/poky/meta/recipes-core/dropbear/dropbear_2022.82.bb
+++ b/poky/meta/recipes-core/dropbear/dropbear_2022.82.bb
@@ -22,7 +22,8 @@
            file://dropbear.socket \
            file://dropbear.default \
            ${@bb.utils.contains('DISTRO_FEATURES', 'pam', '${PAM_SRC_URI}', '', d)} \
-           ${@bb.utils.contains('PACKAGECONFIG', 'disable-weak-ciphers', 'file://dropbear-disable-weak-ciphers.patch', '', d)} "
+           ${@bb.utils.contains('PACKAGECONFIG', 'disable-weak-ciphers', 'file://dropbear-disable-weak-ciphers.patch', '', d)} \
+           file://0007-Fix-X11-build-failure-use-DROPBEAR_PRIO_LOWDELAY.patch"
 
 SRC_URI[sha256sum] = "3a038d2bbc02bf28bbdd20c012091f741a3ec5cbe460691811d714876aad75d1"
 
@@ -53,6 +54,7 @@
 PACKAGECONFIG ?= "disable-weak-ciphers"
 PACKAGECONFIG[system-libtom] = "--disable-bundled-libtom,--enable-bundled-libtom,libtommath libtomcrypt"
 PACKAGECONFIG[disable-weak-ciphers] = ""
+PACKAGECONFIG[enable-x11-forwarding] = ""
 
 EXTRA_OECONF += "\
  ${@bb.utils.contains('DISTRO_FEATURES', 'pam', '--enable-pam', '--disable-pam', d)}"
@@ -64,6 +66,13 @@
 # musl does not implement wtmp/logwtmp APIs
 EXTRA_OECONF:append:libc-musl = " --disable-wtmp --disable-lastlog"
 
+do_configure:append() {
+	echo "/* Dropbear features */" > ${B}/localoptions.h
+	if ${@bb.utils.contains('PACKAGECONFIG', 'enable-x11-forwarding', 'true', 'false', d)}; then
+		echo "#define DROPBEAR_X11FWD 1" >> ${B}/localoptions.h
+	fi
+}
+
 do_install() {
 	install -d ${D}${sysconfdir} \
 		${D}${sysconfdir}/init.d \
diff --git a/poky/meta/recipes-core/ell/ell_0.52.bb b/poky/meta/recipes-core/ell/ell_0.53.bb
similarity index 89%
rename from poky/meta/recipes-core/ell/ell_0.52.bb
rename to poky/meta/recipes-core/ell/ell_0.53.bb
index b2af0d0..7817476 100644
--- a/poky/meta/recipes-core/ell/ell_0.52.bb
+++ b/poky/meta/recipes-core/ell/ell_0.53.bb
@@ -15,7 +15,7 @@
 inherit autotools pkgconfig
 
 SRC_URI = "https://mirrors.edge.kernel.org/pub/linux/libs/${BPN}/${BPN}-${PV}.tar.xz"
-SRC_URI[sha256sum] = "83099b14beda2b253a2c69460f9613c5e955b63349e3c00cf2fd506f5b3ba7d0"
+SRC_URI[sha256sum] = "a7d0df846af839bbea1b80f292166371070328854b3fa785b5c607fe600552ad"
 
 do_configure:prepend () {
     mkdir -p ${S}/build-aux
diff --git a/poky/meta/recipes-core/expat/expat_2.4.8.bb b/poky/meta/recipes-core/expat/expat_2.4.9.bb
similarity index 62%
rename from poky/meta/recipes-core/expat/expat_2.4.8.bb
rename to poky/meta/recipes-core/expat/expat_2.4.9.bb
index 980c488..9561edd 100644
--- a/poky/meta/recipes-core/expat/expat_2.4.8.bb
+++ b/poky/meta/recipes-core/expat/expat_2.4.9.bb
@@ -4,23 +4,24 @@
 SECTION = "libs"
 LICENSE = "MIT"
 
-LIC_FILES_CHKSUM = "file://COPYING;md5=9e2ce3b3c4c0f2670883a23bbd7c37a9"
+LIC_FILES_CHKSUM = "file://COPYING;md5=7b3b078238d0901d3b339289117cb7fb"
 
 VERSION_TAG = "${@d.getVar('PV').replace('.', '_')}"
 
-SRC_URI = "https://github.com/libexpat/libexpat/releases/download/R_${VERSION_TAG}/expat-${PV}.tar.bz2  \
+SRC_URI = "${GITHUB_BASE_URI}/download/R_${VERSION_TAG}/expat-${PV}.tar.bz2  \
            file://run-ptest \
            "
 
-UPSTREAM_CHECK_URI = "https://github.com/libexpat/libexpat/releases/"
+GITHUB_BASE_URI = "https://github.com/libexpat/libexpat/releases/"
+UPSTREAM_CHECK_REGEX = "releases/tag/R_(?P<pver>.+)"
 
-SRC_URI[sha256sum] = "a247a7f6bbb21cf2ca81ea4cbb916bfb9717ca523631675f99b3d4a5678dcd16"
+SRC_URI[sha256sum] = "7f44d1469b110773a94b0d5abeeeffaef79f8bd6406b07e52394bcf48126437a"
 
 EXTRA_OECMAKE:class-native += "-DEXPAT_BUILD_DOCS=OFF"
 
 RDEPENDS:${PN}-ptest += "bash"
 
-inherit cmake lib_package ptest
+inherit cmake lib_package ptest github-releases
 
 do_install_ptest:class-target() {
 	install -m 755 ${B}/tests/* ${D}${PTEST_PATH}
diff --git a/poky/meta/recipes-core/glibc/glibc-locale.inc b/poky/meta/recipes-core/glibc/glibc-locale.inc
index b8de7d3..7c14abf 100644
--- a/poky/meta/recipes-core/glibc/glibc-locale.inc
+++ b/poky/meta/recipes-core/glibc/glibc-locale.inc
@@ -87,10 +87,9 @@
 	if [ ${PACKAGE_NO_GCONV} -eq 0 ]; then
 		copy_locale_files ${libdir}/gconv 0755
 		copy_locale_files ${datadir}/i18n 0644
-	else
-		# Remove the libdir if it is empty when gconv is not copied
-		find ${D}${libdir} -type d -empty -delete
 	fi
+	# Remove empty dirs in libdir when gconv or locales are not copied
+	find ${D}${libdir} -type d -empty -delete
 	copy_locale_files ${datadir}/locale 0644
 	install -m 0644 ${LOCALETREESRC}/SUPPORTED ${WORKDIR}/SUPPORTED
 }
diff --git a/poky/meta/recipes-core/glibc/glibc-tests_2.36.bb b/poky/meta/recipes-core/glibc/glibc-tests_2.36.bb
index aca9675..c71c083 100644
--- a/poky/meta/recipes-core/glibc/glibc-tests_2.36.bb
+++ b/poky/meta/recipes-core/glibc/glibc-tests_2.36.bb
@@ -4,7 +4,7 @@
 inherit ptest features_check
 REQUIRED_DISTRO_FEATURES = "ptest"
 
-SRC_URI:append = " \
+SRC_URI += "\
 	file://run-ptest \
 "
 
@@ -29,7 +29,7 @@
 RPROVIDES:${PN} = "${PN}"
 RRECOMMENDS:${PN} = ""
 RDEPENDS:${PN} = " glibc sed"
-DEPENDS:append = " sed"
+DEPENDS += "sed"
 
 export oe_srcdir="${exec_prefix}/src/debug/glibc/${PV}/"
 
diff --git a/poky/meta/recipes-core/glibc/glibc-version.inc b/poky/meta/recipes-core/glibc/glibc-version.inc
index a078eb6..bd125ec 100644
--- a/poky/meta/recipes-core/glibc/glibc-version.inc
+++ b/poky/meta/recipes-core/glibc/glibc-version.inc
@@ -1,6 +1,6 @@
 SRCBRANCH ?= "release/2.36/master"
 PV = "2.36"
-SRCREV_glibc ?= "3bd3c612e98a53ce60ed972f5cd2b90628b3cba5"
+SRCREV_glibc ?= "c399271c10bd00714504e8d4dfbec8aebf996dd4"
 SRCREV_localedef ?= "794da69788cbf9bf57b59a852f9f11307663fa87"
 
 GLIBC_GIT_URI ?= "git://sourceware.org/git/glibc.git"
diff --git a/poky/meta/recipes-core/glibc/glibc_2.36.bb b/poky/meta/recipes-core/glibc/glibc_2.36.bb
index 1cfa810..45bc784 100644
--- a/poky/meta/recipes-core/glibc/glibc_2.36.bb
+++ b/poky/meta/recipes-core/glibc/glibc_2.36.bb
@@ -16,6 +16,9 @@
 # Potential patch at https://sourceware.org/bugzilla/show_bug.cgi?id=22853
 CVE_CHECK_IGNORE += "CVE-2019-1010025"
 
+# This has been integrated into the 2.36 branch as of c399271 so is now fixed
+CVE_CHECK_IGNORE += "CVE-2022-39046"
+
 DEPENDS += "gperf-native bison-native"
 
 NATIVESDKFIXES ?= ""
diff --git a/poky/meta/recipes-core/images/build-appliance-image_15.0.0.bb b/poky/meta/recipes-core/images/build-appliance-image_15.0.0.bb
index 2cdac20..f3f2bb2 100644
--- a/poky/meta/recipes-core/images/build-appliance-image_15.0.0.bb
+++ b/poky/meta/recipes-core/images/build-appliance-image_15.0.0.bb
@@ -24,7 +24,7 @@
 
 inherit core-image setuptools3
 
-SRCREV ?= "093398daf5000e69057aedfe8c9f8df2c4e837f3"
+SRCREV ?= "4f942c272d4417b5b719df25b80a6a6b54669a73"
 SRC_URI = "git://git.yoctoproject.org/poky;branch=master \
            file://Yocto_Build_Appliance.vmx \
            file://Yocto_Build_Appliance.vmxf \
diff --git a/poky/meta/recipes-core/initscripts/init-system-helpers_1.64.bb b/poky/meta/recipes-core/initscripts/init-system-helpers_1.64.bb
index 22ddd68..663d60c 100644
--- a/poky/meta/recipes-core/initscripts/init-system-helpers_1.64.bb
+++ b/poky/meta/recipes-core/initscripts/init-system-helpers_1.64.bb
@@ -18,6 +18,7 @@
 
 SRCREV = "c440893051406c11f0a315058939657d5937be4f"
 SRC_URI = "git://salsa.debian.org/debian/init-system-helpers.git;protocol=https;branch=master"
+UPSTREAM_CHECK_GITTAGREGEX = "(?P<pver>(\d+(\.\d+)+))(?!_exp)"
 
 S = "${WORKDIR}/git"
 
diff --git a/poky/meta/recipes-core/libcgroup/libcgroup_3.0.0.bb b/poky/meta/recipes-core/libcgroup/libcgroup_3.0.0.bb
index f3e8412..9937a17 100644
--- a/poky/meta/recipes-core/libcgroup/libcgroup_3.0.0.bb
+++ b/poky/meta/recipes-core/libcgroup/libcgroup_3.0.0.bb
@@ -7,16 +7,16 @@
 LICENSE = "LGPL-2.1-only"
 LIC_FILES_CHKSUM = "file://COPYING;md5=4d794c5d710e5b3547a6cc6a6609a641"
 
-inherit autotools pkgconfig
+inherit autotools pkgconfig github-releases
 
 DEPENDS = "bison-native flex-native"
 
-SRC_URI = "https://github.com/${BPN}/${BPN}/releases/download/v3.0/${BP}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v3.0/${BP}.tar.gz \
            file://0001-api-Use-GNU-strerror_r-when-available.patch \
 "
+UPSTREAM_CHECK_URI = "https://github.com/libcgroup/libcgroup/tags"
 
 SRC_URI[sha256sum] = "8d284d896fca1c981b55850e92acd3ad9648a69227c028dda7ae3402af878edd"
-UPSTREAM_CHECK_URI = "https://github.com/libcgroup/libcgroup/releases/"
 
 DEPENDS:append:libc-musl = " fts "
 EXTRA_OEMAKE:append:libc-musl = " LIBS=-lfts"
diff --git a/poky/meta/recipes-core/libxml/libxml2/0001-Port-gentest.py-to-Python-3.patch b/poky/meta/recipes-core/libxml/libxml2/0001-Port-gentest.py-to-Python-3.patch
new file mode 100644
index 0000000..c6567ac
--- /dev/null
+++ b/poky/meta/recipes-core/libxml/libxml2/0001-Port-gentest.py-to-Python-3.patch
@@ -0,0 +1,814 @@
+From 2c20198b1ddb1bfb47269b8caf929ffb83748f78 Mon Sep 17 00:00:00 2001
+From: Nick Wellnhofer <wellnhofer@aevum.de>
+Date: Thu, 21 Apr 2022 00:45:58 +0200
+Subject: [PATCH] Port gentest.py to Python 3
+
+Upstream-Status: Backport [https://gitlab.gnome.org/GNOME/libxml2/-/commit/343fc1421cdae097fa6c4cffeb1a065a40be6bbb]
+
+* fixes:
+
+make[1]: 'testReader' is up to date.
+  File "../libxml2-2.9.10/gentest.py", line 11
+    print "libxml2 python bindings not available, skipping testapi.c generation"
+          ^
+SyntaxError: Missing parentheses in call to 'print'. Did you mean print("libxml2 python bindings not available, skipping testapi.c generation")?
+make[1]: [Makefile:2078: testapi.c] Error 1 (ignored)
+
+...
+
+make[1]: 'testReader' is up to date.
+  File "../libxml2-2.9.10/gentest.py", line 271
+    return 1
+           ^
+TabError: inconsistent use of tabs and spaces in indentation
+make[1]: [Makefile:2078: testapi.c] Error 1 (ignored)
+
+...
+
+aarch64-oe-linux-gcc: error: testapi.c: No such file or directory
+aarch64-oe-linux-gcc: fatal error: no input files
+compilation terminated.
+make[1]: *** [Makefile:1275: testapi.o] Error 1
+
+But there is still a bit mystery why it worked before, because check-am
+calls gentest.py with $(PYTHON), so it ignores the shebang in the script
+and libxml2 is using python3native (through python3targetconfig.bbclass)
+so something like:
+
+libxml2/2.9.10-r0/recipe-sysroot-native/usr/bin/python3-native/python3 gentest.py
+
+But that still fails (now without SyntaxError) with:
+libxml2 python bindings not available, skipping testapi.c generation
+
+because we don't have dependency on libxml2-native (to provide libxml2
+python bindings form python3native) and exported PYTHON_SITE_PACKAGES
+might be useless (e.g. /usr/lib/python3.8/site-packages on Ubuntu-22.10
+which uses python 3.10 and there is no site-packages with libxml2)
+
+Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
+
+---
+ gentest.py | 421 ++++++++++++++++++++++++++---------------------------
+ 1 file changed, 209 insertions(+), 212 deletions(-)
+
+diff --git a/gentest.py b/gentest.py
+index b6cd866..af15a4f 100755
+--- a/gentest.py
++++ b/gentest.py
+@@ -8,7 +8,7 @@ import string
+ try:
+     import libxml2
+ except:
+-    print "libxml2 python bindings not available, skipping testapi.c generation"
++    print("libxml2 python bindings not available, skipping testapi.c generation")
+     sys.exit(0)
+ 
+ if len(sys.argv) > 1:
+@@ -227,7 +227,7 @@ extra_post_call = {
+           if (old != NULL) {
+               xmlUnlinkNode(old);
+               xmlFreeNode(old) ; old = NULL ; }
+-	  ret_val = NULL;""",
++\t  ret_val = NULL;""",
+    "xmlTextMerge": 
+        """if ((first != NULL) && (first->type != XML_TEXT_NODE)) {
+               xmlUnlinkNode(second);
+@@ -236,7 +236,7 @@ extra_post_call = {
+        """if ((ret_val != NULL) && (ret_val != ncname) &&
+               (ret_val != prefix) && (ret_val != memory))
+               xmlFree(ret_val);
+-	  ret_val = NULL;""",
++\t  ret_val = NULL;""",
+    "xmlNewDocElementContent":
+        """xmlFreeDocElementContent(doc, ret_val); ret_val = NULL;""",
+    "xmlDictReference": "xmlDictFree(dict);",
+@@ -268,29 +268,29 @@ modules = []
+ def is_skipped_module(name):
+     for mod in skipped_modules:
+         if mod == name:
+-	    return 1
++            return 1
+     return 0
+ 
+ def is_skipped_function(name):
+     for fun in skipped_functions:
+         if fun == name:
+-	    return 1
++            return 1
+     # Do not test destructors
+-    if string.find(name, 'Free') != -1:
++    if name.find('Free') != -1:
+         return 1
+     return 0
+ 
+ def is_skipped_memcheck(name):
+     for fun in skipped_memcheck:
+         if fun == name:
+-	    return 1
++            return 1
+     return 0
+ 
+ missing_types = {}
+ def add_missing_type(name, func):
+     try:
+         list = missing_types[name]
+-	list.append(func)
++        list.append(func)
+     except:
+         missing_types[name] = [func]
+ 
+@@ -310,7 +310,7 @@ def add_missing_functions(name, module):
+     missing_functions_nr = missing_functions_nr + 1
+     try:
+         list = missing_functions[module]
+-	list.append(name)
++        list.append(name)
+     except:
+         missing_functions[module] = [name]
+ 
+@@ -319,45 +319,45 @@ def add_missing_functions(name, module):
+ #
+ 
+ def type_convert(str, name, info, module, function, pos):
+-#    res = string.replace(str, "    ", " ")
+-#    res = string.replace(str, "   ", " ")
+-#    res = string.replace(str, "  ", " ")
+-    res = string.replace(str, " *", "_ptr")
+-#    res = string.replace(str, "*", "_ptr")
+-    res = string.replace(res, " ", "_")
++#    res = str.replace("    ", " ")
++#    res = str.replace("   ", " ")
++#    res = str.replace("  ", " ")
++    res = str.replace(" *", "_ptr")
++#    res = str.replace("*", "_ptr")
++    res = res.replace(" ", "_")
+     if res == 'const_char_ptr':
+-        if string.find(name, "file") != -1 or \
+-           string.find(name, "uri") != -1 or \
+-           string.find(name, "URI") != -1 or \
+-           string.find(info, "filename") != -1 or \
+-           string.find(info, "URI") != -1 or \
+-           string.find(info, "URL") != -1:
+-	    if string.find(function, "Save") != -1 or \
+-	       string.find(function, "Create") != -1 or \
+-	       string.find(function, "Write") != -1 or \
+-	       string.find(function, "Fetch") != -1:
+-	        return('fileoutput')
+-	    return('filepath')
++        if name.find("file") != -1 or \
++           name.find("uri") != -1 or \
++           name.find("URI") != -1 or \
++           info.find("filename") != -1 or \
++           info.find("URI") != -1 or \
++           info.find("URL") != -1:
++            if function.find("Save") != -1 or \
++               function.find("Create") != -1 or \
++               function.find("Write") != -1 or \
++               function.find("Fetch") != -1:
++                return('fileoutput')
++            return('filepath')
+     if res == 'void_ptr':
+         if module == 'nanoftp' and name == 'ctx':
+-	    return('xmlNanoFTPCtxtPtr')
++            return('xmlNanoFTPCtxtPtr')
+         if function == 'xmlNanoFTPNewCtxt' or \
+-	   function == 'xmlNanoFTPConnectTo' or \
+-	   function == 'xmlNanoFTPOpen':
+-	    return('xmlNanoFTPCtxtPtr')
++           function == 'xmlNanoFTPConnectTo' or \
++           function == 'xmlNanoFTPOpen':
++            return('xmlNanoFTPCtxtPtr')
+         if module == 'nanohttp' and name == 'ctx':
+-	    return('xmlNanoHTTPCtxtPtr')
+-	if function == 'xmlNanoHTTPMethod' or \
+-	   function == 'xmlNanoHTTPMethodRedir' or \
+-	   function == 'xmlNanoHTTPOpen' or \
+-	   function == 'xmlNanoHTTPOpenRedir':
+-	    return('xmlNanoHTTPCtxtPtr');
++            return('xmlNanoHTTPCtxtPtr')
++        if function == 'xmlNanoHTTPMethod' or \
++           function == 'xmlNanoHTTPMethodRedir' or \
++           function == 'xmlNanoHTTPOpen' or \
++           function == 'xmlNanoHTTPOpenRedir':
++            return('xmlNanoHTTPCtxtPtr');
+         if function == 'xmlIOHTTPOpen':
+-	    return('xmlNanoHTTPCtxtPtr')
+-	if string.find(name, "data") != -1:
+-	    return('userdata')
+-	if string.find(name, "user") != -1:
+-	    return('userdata')
++            return('xmlNanoHTTPCtxtPtr')
++        if name.find("data") != -1:
++            return('userdata')
++        if name.find("user") != -1:
++            return('userdata')
+     if res == 'xmlDoc_ptr':
+         res = 'xmlDocPtr'
+     if res == 'xmlNode_ptr':
+@@ -366,18 +366,18 @@ def type_convert(str, name, info, module, function, pos):
+         res = 'xmlDictPtr'
+     if res == 'xmlNodePtr' and pos != 0:
+         if (function == 'xmlAddChild' and pos == 2) or \
+-	   (function == 'xmlAddChildList' and pos == 2) or \
++           (function == 'xmlAddChildList' and pos == 2) or \
+            (function == 'xmlAddNextSibling' and pos == 2) or \
+            (function == 'xmlAddSibling' and pos == 2) or \
+            (function == 'xmlDocSetRootElement' and pos == 2) or \
+            (function == 'xmlReplaceNode' and pos == 2) or \
+            (function == 'xmlTextMerge') or \
+-	   (function == 'xmlAddPrevSibling' and pos == 2):
+-	    return('xmlNodePtr_in');
++           (function == 'xmlAddPrevSibling' and pos == 2):
++            return('xmlNodePtr_in');
+     if res == 'const xmlBufferPtr':
+         res = 'xmlBufferPtr'
+     if res == 'xmlChar_ptr' and name == 'name' and \
+-       string.find(function, "EatName") != -1:
++       function.find("EatName") != -1:
+         return('eaten_name')
+     if res == 'void_ptr*':
+         res = 'void_ptr_ptr'
+@@ -393,7 +393,7 @@ def type_convert(str, name, info, module, function, pos):
+         res = 'debug_FILE_ptr';
+     if res == 'int' and name == 'options':
+         if module == 'parser' or module == 'xmlreader':
+-	    res = 'parseroptions'
++            res = 'parseroptions'
+ 
+     return res
+ 
+@@ -402,28 +402,28 @@ known_param_types = []
+ def is_known_param_type(name):
+     for type in known_param_types:
+         if type == name:
+-	    return 1
++            return 1
+     return name[-3:] == 'Ptr' or name[-4:] == '_ptr'
+ 
+ def generate_param_type(name, rtype):
+     global test
+     for type in known_param_types:
+         if type == name:
+-	    return
++            return
+     for type in generated_param_types:
+         if type == name:
+-	    return
++            return
+ 
+     if name[-3:] == 'Ptr' or name[-4:] == '_ptr':
+         if rtype[0:6] == 'const ':
+-	    crtype = rtype[6:]
+-	else:
+-	    crtype = rtype
++            crtype = rtype[6:]
++        else:
++            crtype = rtype
+ 
+         define = 0
+-	if modules_defines.has_key(module):
+-	    test.write("#ifdef %s\n" % (modules_defines[module]))
+-	    define = 1
++        if module in modules_defines:
++            test.write("#ifdef %s\n" % (modules_defines[module]))
++            define = 1
+         test.write("""
+ #define gen_nb_%s 1
+ static %s gen_%s(int no ATTRIBUTE_UNUSED, int nr ATTRIBUTE_UNUSED) {
+@@ -433,7 +433,7 @@ static void des_%s(int no ATTRIBUTE_UNUSED, %s val ATTRIBUTE_UNUSED, int nr ATTR
+ }
+ """ % (name, crtype, name, name, rtype))
+         if define == 1:
+-	    test.write("#endif\n\n")
++            test.write("#endif\n\n")
+         add_generated_param_type(name)
+ 
+ #
+@@ -445,7 +445,7 @@ known_return_types = []
+ def is_known_return_type(name):
+     for type in known_return_types:
+         if type == name:
+-	    return 1
++            return 1
+     return 0
+ 
+ #
+@@ -471,7 +471,7 @@ def compare_and_save():
+         try:
+             os.system("rm testapi.c; mv testapi.c.new testapi.c")
+         except:
+-	    os.system("mv testapi.c.new testapi.c")
++            os.system("mv testapi.c.new testapi.c")
+         print("Updated testapi.c")
+     else:
+         print("Generated testapi.c is identical")
+@@ -481,17 +481,17 @@ while line != "":
+     if line == "/* CUT HERE: everything below that line is generated */\n":
+         break;
+     if line[0:15] == "#define gen_nb_":
+-        type = string.split(line[15:])[0]
+-	known_param_types.append(type)
++        type = line[15:].split()[0]
++        known_param_types.append(type)
+     if line[0:19] == "static void desret_":
+-        type = string.split(line[19:], '(')[0]
+-	known_return_types.append(type)
++        type = line[19:].split('(')[0]
++        known_return_types.append(type)
+     test.write(line)
+     line = input.readline()
+ input.close()
+ 
+ if line == "":
+-    print "Could not find the CUT marker in testapi.c skipping generation"
++    print("Could not find the CUT marker in testapi.c skipping generation")
+     test.close()
+     sys.exit(0)
+ 
+@@ -505,7 +505,7 @@ test.write("/* CUT HERE: everything below that line is generated */\n")
+ #
+ doc = libxml2.readFile(srcPref + 'doc/libxml2-api.xml', None, 0)
+ if doc == None:
+-    print "Failed to load doc/libxml2-api.xml"
++    print("Failed to load doc/libxml2-api.xml")
+     sys.exit(1)
+ ctxt = doc.xpathNewContext()
+ 
+@@ -519,9 +519,9 @@ for arg in args:
+     mod = arg.xpathEval('string(../@file)')
+     func = arg.xpathEval('string(../@name)')
+     if (mod not in skipped_modules) and (func not in skipped_functions):
+-	type = arg.xpathEval('string(@type)')
+-	if not argtypes.has_key(type):
+-	    argtypes[type] = func
++        type = arg.xpathEval('string(@type)')
++        if type not in argtypes:
++            argtypes[type] = func
+ 
+ # similarly for return types
+ rettypes = {}
+@@ -531,8 +531,8 @@ for ret in rets:
+     func = ret.xpathEval('string(../@name)')
+     if (mod not in skipped_modules) and (func not in skipped_functions):
+         type = ret.xpathEval('string(@type)')
+-	if not rettypes.has_key(type):
+-	    rettypes[type] = func
++        if type not in rettypes:
++            rettypes[type] = func
+ 
+ #
+ # Generate constructors and return type handling for all enums
+@@ -549,49 +549,49 @@ for enum in enums:
+         continue;
+     define = 0
+ 
+-    if argtypes.has_key(name) and is_known_param_type(name) == 0:
+-	values = ctxt.xpathEval("/api/symbols/enum[@type='%s']" % name)
+-	i = 0
+-	vals = []
+-	for value in values:
+-	    vname = value.xpathEval('string(@name)')
+-	    if vname == None:
+-		continue;
+-	    i = i + 1
+-	    if i >= 5:
+-		break;
+-	    vals.append(vname)
+-	if vals == []:
+-	    print "Didn't find any value for enum %s" % (name)
+-	    continue
+-	if modules_defines.has_key(module):
+-	    test.write("#ifdef %s\n" % (modules_defines[module]))
+-	    define = 1
+-	test.write("#define gen_nb_%s %d\n" % (name, len(vals)))
+-	test.write("""static %s gen_%s(int no, int nr ATTRIBUTE_UNUSED) {\n""" %
+-	           (name, name))
+-	i = 1
+-	for value in vals:
+-	    test.write("    if (no == %d) return(%s);\n" % (i, value))
+-	    i = i + 1
+-	test.write("""    return(0);
++    if (name in argtypes) and is_known_param_type(name) == 0:
++        values = ctxt.xpathEval("/api/symbols/enum[@type='%s']" % name)
++        i = 0
++        vals = []
++        for value in values:
++            vname = value.xpathEval('string(@name)')
++            if vname == None:
++                continue;
++            i = i + 1
++            if i >= 5:
++                break;
++            vals.append(vname)
++        if vals == []:
++            print("Didn't find any value for enum %s" % (name))
++            continue
++        if module in modules_defines:
++            test.write("#ifdef %s\n" % (modules_defines[module]))
++            define = 1
++        test.write("#define gen_nb_%s %d\n" % (name, len(vals)))
++        test.write("""static %s gen_%s(int no, int nr ATTRIBUTE_UNUSED) {\n""" %
++                   (name, name))
++        i = 1
++        for value in vals:
++            test.write("    if (no == %d) return(%s);\n" % (i, value))
++            i = i + 1
++        test.write("""    return(0);
+ }
+ 
+ static void des_%s(int no ATTRIBUTE_UNUSED, %s val ATTRIBUTE_UNUSED, int nr ATTRIBUTE_UNUSED) {
+ }
+ 
+ """ % (name, name));
+-	known_param_types.append(name)
++        known_param_types.append(name)
+ 
+     if (is_known_return_type(name) == 0) and (name in rettypes):
+-	if define == 0 and modules_defines.has_key(module):
+-	    test.write("#ifdef %s\n" % (modules_defines[module]))
+-	    define = 1
++        if define == 0 and (module in modules_defines):
++            test.write("#ifdef %s\n" % (modules_defines[module]))
++            define = 1
+         test.write("""static void desret_%s(%s val ATTRIBUTE_UNUSED) {
+ }
+ 
+ """ % (name, name))
+-	known_return_types.append(name)
++        known_return_types.append(name)
+     if define == 1:
+         test.write("#endif\n\n")
+ 
+@@ -615,9 +615,9 @@ for file in headers:
+     # do not test deprecated APIs
+     #
+     desc = file.xpathEval('string(description)')
+-    if string.find(desc, 'DEPRECATED') != -1:
+-        print "Skipping deprecated interface %s" % name
+-	continue;
++    if desc.find('DEPRECATED') != -1:
++        print("Skipping deprecated interface %s" % name)
++        continue;
+ 
+     test.write("#include <libxml/%s.h>\n" % name)
+     modules.append(name)
+@@ -679,7 +679,7 @@ def generate_test(module, node):
+     # and store the information for the generation
+     #
+     try:
+-	args = node.xpathEval("arg")
++        args = node.xpathEval("arg")
+     except:
+         args = []
+     t_args = []
+@@ -687,37 +687,37 @@ def generate_test(module, node):
+     for arg in args:
+         n = n + 1
+         rtype = arg.xpathEval("string(@type)")
+-	if rtype == 'void':
+-	    break;
+-	info = arg.xpathEval("string(@info)")
+-	nam = arg.xpathEval("string(@name)")
++        if rtype == 'void':
++            break;
++        info = arg.xpathEval("string(@info)")
++        nam = arg.xpathEval("string(@name)")
+         type = type_convert(rtype, nam, info, module, name, n)
+-	if is_known_param_type(type) == 0:
+-	    add_missing_type(type, name);
+-	    no_gen = 1
++        if is_known_param_type(type) == 0:
++            add_missing_type(type, name);
++            no_gen = 1
+         if (type[-3:] == 'Ptr' or type[-4:] == '_ptr') and \
+-	    rtype[0:6] == 'const ':
+-	    crtype = rtype[6:]
+-	else:
+-	    crtype = rtype
+-	t_args.append((nam, type, rtype, crtype, info))
++            rtype[0:6] == 'const ':
++            crtype = rtype[6:]
++        else:
++            crtype = rtype
++        t_args.append((nam, type, rtype, crtype, info))
+     
+     try:
+-	rets = node.xpathEval("return")
++        rets = node.xpathEval("return")
+     except:
+         rets = []
+     t_ret = None
+     for ret in rets:
+         rtype = ret.xpathEval("string(@type)")
+-	info = ret.xpathEval("string(@info)")
++        info = ret.xpathEval("string(@info)")
+         type = type_convert(rtype, 'return', info, module, name, 0)
+-	if rtype == 'void':
+-	    break
+-	if is_known_return_type(type) == 0:
+-	    add_missing_type(type, name);
+-	    no_gen = 1
+-	t_ret = (type, rtype, info)
+-	break
++        if rtype == 'void':
++            break
++        if is_known_return_type(type) == 0:
++            add_missing_type(type, name);
++            no_gen = 1
++        t_ret = (type, rtype, info)
++        break
+ 
+     if no_gen == 0:
+         for t_arg in t_args:
+@@ -733,7 +733,7 @@ test_%s(void) {
+ 
+     if no_gen == 1:
+         add_missing_functions(name, module)
+-	test.write("""
++        test.write("""
+     /* missing type support */
+     return(test_ret);
+ }
+@@ -742,22 +742,22 @@ test_%s(void) {
+         return
+ 
+     try:
+-	conds = node.xpathEval("cond")
+-	for cond in conds:
+-	    test.write("#if %s\n" % (cond.get_content()))
+-	    nb_cond = nb_cond + 1
++        conds = node.xpathEval("cond")
++        for cond in conds:
++            test.write("#if %s\n" % (cond.get_content()))
++            nb_cond = nb_cond + 1
+     except:
+         pass
+ 
+     define = 0
+-    if function_defines.has_key(name):
++    if name in function_defines:
+         test.write("#ifdef %s\n" % (function_defines[name]))
+-	define = 1
++        define = 1
+     
+     # Declare the memory usage counter
+     no_mem = is_skipped_memcheck(name)
+     if no_mem == 0:
+-	test.write("    int mem_base;\n");
++        test.write("    int mem_base;\n");
+ 
+     # Declare the return value
+     if t_ret != None:
+@@ -766,29 +766,29 @@ test_%s(void) {
+     # Declare the arguments
+     for arg in t_args:
+         (nam, type, rtype, crtype, info) = arg;
+-	# add declaration
+-	test.write("    %s %s; /* %s */\n" % (crtype, nam, info))
+-	test.write("    int n_%s;\n" % (nam))
++        # add declaration
++        test.write("    %s %s; /* %s */\n" % (crtype, nam, info))
++        test.write("    int n_%s;\n" % (nam))
+     test.write("\n")
+ 
+     # Cascade loop on of each argument list of values
+     for arg in t_args:
+         (nam, type, rtype, crtype, info) = arg;
+-	#
+-	test.write("    for (n_%s = 0;n_%s < gen_nb_%s;n_%s++) {\n" % (
+-	           nam, nam, type, nam))
++        #
++        test.write("    for (n_%s = 0;n_%s < gen_nb_%s;n_%s++) {\n" % (
++                   nam, nam, type, nam))
+     
+     # log the memory usage
+     if no_mem == 0:
+-	test.write("        mem_base = xmlMemBlocks();\n");
++        test.write("        mem_base = xmlMemBlocks();\n");
+ 
+     # prepare the call
+     i = 0;
+     for arg in t_args:
+         (nam, type, rtype, crtype, info) = arg;
+-	#
+-	test.write("        %s = gen_%s(n_%s, %d);\n" % (nam, type, nam, i))
+-	i = i + 1;
++        #
++        test.write("        %s = gen_%s(n_%s, %d);\n" % (nam, type, nam, i))
++        i = i + 1;
+ 
+     # add checks to avoid out-of-bounds array access
+     i = 0;
+@@ -797,7 +797,7 @@ test_%s(void) {
+         # assume that "size", "len", and "start" parameters apply to either
+         # the nearest preceding or following char pointer
+         if type == "int" and (nam == "size" or nam == "len" or nam == "start"):
+-            for j in range(i - 1, -1, -1) + range(i + 1, len(t_args)):
++            for j in (*range(i - 1, -1, -1), *range(i + 1, len(t_args))):
+                 (bnam, btype) = t_args[j][:2]
+                 if btype == "const_char_ptr" or btype == "const_xmlChar_ptr":
+                     test.write(
+@@ -806,42 +806,42 @@ test_%s(void) {
+                         "            continue;\n"
+                         % (bnam, nam, bnam))
+                     break
+-	i = i + 1;
++        i = i + 1;
+ 
+     # do the call, and clanup the result
+-    if extra_pre_call.has_key(name):
+-	test.write("        %s\n"% (extra_pre_call[name]))
++    if name in extra_pre_call:
++        test.write("        %s\n"% (extra_pre_call[name]))
+     if t_ret != None:
+-	test.write("\n        ret_val = %s(" % (name))
+-	need = 0
+-	for arg in t_args:
+-	    (nam, type, rtype, crtype, info) = arg
+-	    if need:
+-	        test.write(", ")
+-	    else:
+-	        need = 1
+-	    if rtype != crtype:
+-	        test.write("(%s)" % rtype)
+-	    test.write("%s" % nam);
+-	test.write(");\n")
+-	if extra_post_call.has_key(name):
+-	    test.write("        %s\n"% (extra_post_call[name]))
+-	test.write("        desret_%s(ret_val);\n" % t_ret[0])
++        test.write("\n        ret_val = %s(" % (name))
++        need = 0
++        for arg in t_args:
++            (nam, type, rtype, crtype, info) = arg
++            if need:
++                test.write(", ")
++            else:
++                need = 1
++            if rtype != crtype:
++                test.write("(%s)" % rtype)
++            test.write("%s" % nam);
++        test.write(");\n")
++        if name in extra_post_call:
++            test.write("        %s\n"% (extra_post_call[name]))
++        test.write("        desret_%s(ret_val);\n" % t_ret[0])
+     else:
+-	test.write("\n        %s(" % (name));
+-	need = 0;
+-	for arg in t_args:
+-	    (nam, type, rtype, crtype, info) = arg;
+-	    if need:
+-	        test.write(", ")
+-	    else:
+-	        need = 1
+-	    if rtype != crtype:
+-	        test.write("(%s)" % rtype)
+-	    test.write("%s" % nam)
+-	test.write(");\n")
+-	if extra_post_call.has_key(name):
+-	    test.write("        %s\n"% (extra_post_call[name]))
++        test.write("\n        %s(" % (name));
++        need = 0;
++        for arg in t_args:
++            (nam, type, rtype, crtype, info) = arg;
++            if need:
++                test.write(", ")
++            else:
++                need = 1
++            if rtype != crtype:
++                test.write("(%s)" % rtype)
++            test.write("%s" % nam)
++        test.write(");\n")
++        if name in extra_post_call:
++            test.write("        %s\n"% (extra_post_call[name]))
+ 
+     test.write("        call_tests++;\n");
+ 
+@@ -849,32 +849,32 @@ test_%s(void) {
+     i = 0;
+     for arg in t_args:
+         (nam, type, rtype, crtype, info) = arg;
+-	# This is a hack to prevent generating a destructor for the
+-	# 'input' argument in xmlTextReaderSetup.  There should be
+-	# a better, more generic way to do this!
+-	if string.find(info, 'destroy') == -1:
+-	    test.write("        des_%s(n_%s, " % (type, nam))
+-	    if rtype != crtype:
+-	        test.write("(%s)" % rtype)
+-	    test.write("%s, %d);\n" % (nam, i))
+-	i = i + 1;
++        # This is a hack to prevent generating a destructor for the
++        # 'input' argument in xmlTextReaderSetup.  There should be
++        # a better, more generic way to do this!
++        if info.find('destroy') == -1:
++            test.write("        des_%s(n_%s, " % (type, nam))
++            if rtype != crtype:
++                test.write("(%s)" % rtype)
++            test.write("%s, %d);\n" % (nam, i))
++        i = i + 1;
+ 
+     test.write("        xmlResetLastError();\n");
+     # Check the memory usage
+     if no_mem == 0:
+-	test.write("""        if (mem_base != xmlMemBlocks()) {
++        test.write("""        if (mem_base != xmlMemBlocks()) {
+             printf("Leak of %%d blocks found in %s",
+-	           xmlMemBlocks() - mem_base);
+-	    test_ret++;
++\t           xmlMemBlocks() - mem_base);
++\t    test_ret++;
+ """ % (name));
+-	for arg in t_args:
+-	    (nam, type, rtype, crtype, info) = arg;
+-	    test.write("""            printf(" %%d", n_%s);\n""" % (nam))
+-	test.write("""            printf("\\n");\n""")
+-	test.write("        }\n")
++        for arg in t_args:
++            (nam, type, rtype, crtype, info) = arg;
++            test.write("""            printf(" %%d", n_%s);\n""" % (nam))
++        test.write("""            printf("\\n");\n""")
++        test.write("        }\n")
+ 
+     for arg in t_args:
+-	test.write("    }\n")
++        test.write("    }\n")
+ 
+     test.write("    function_tests++;\n")
+     #
+@@ -882,7 +882,7 @@ test_%s(void) {
+     #
+     while nb_cond > 0:
+         test.write("#endif\n")
+-	nb_cond = nb_cond -1
++        nb_cond = nb_cond -1
+     if define == 1:
+         test.write("#endif\n")
+ 
+@@ -900,10 +900,10 @@ test_%s(void) {
+ for module in modules:
+     # gather all the functions exported by that module
+     try:
+-	functions = ctxt.xpathEval("/api/symbols/function[@file='%s']" % (module))
++        functions = ctxt.xpathEval("/api/symbols/function[@file='%s']" % (module))
+     except:
+-        print "Failed to gather functions from module %s" % (module)
+-	continue;
++        print("Failed to gather functions from module %s" % (module))
++        continue;
+ 
+     # iterate over all functions in the module generating the test
+     i = 0
+@@ -923,14 +923,14 @@ test_%s(void) {
+     # iterate over all functions in the module generating the call
+     for function in functions:
+         name = function.xpathEval('string(@name)')
+-	if is_skipped_function(name):
+-	    continue
+-	test.write("    test_ret += test_%s();\n" % (name))
++        if is_skipped_function(name):
++            continue
++        test.write("    test_ret += test_%s();\n" % (name))
+ 
+     # footer
+     test.write("""
+     if (test_ret != 0)
+-	printf("Module %s: %%d errors\\n", test_ret);
++\tprintf("Module %s: %%d errors\\n", test_ret);
+     return(test_ret);
+ }
+ """ % (module))
+@@ -948,7 +948,7 @@ test.write("""    return(0);
+ }
+ """);
+ 
+-print "Generated test for %d modules and %d functions" %(len(modules), nb_tests)
++print("Generated test for %d modules and %d functions" %(len(modules), nb_tests))
+ 
+ compare_and_save()
+ 
+@@ -960,11 +960,8 @@ for missing in missing_types.keys():
+     n = len(missing_types[missing])
+     missing_list.append((n, missing))
+ 
+-def compare_missing(a, b):
+-    return b[0] - a[0]
+-
+-missing_list.sort(compare_missing)
+-print "Missing support for %d functions and %d types see missing.lst" % (missing_functions_nr, len(missing_list))
++missing_list.sort(key=lambda a: a[0])
++print("Missing support for %d functions and %d types see missing.lst" % (missing_functions_nr, len(missing_list)))
+ lst = open("missing.lst", "w")
+ lst.write("Missing support for %d types" % (len(missing_list)))
+ lst.write("\n")
+@@ -974,9 +971,9 @@ for miss in missing_list:
+     for n in missing_types[miss[1]]:
+         i = i + 1
+         if i > 5:
+-	    lst.write(" ...")
+-	    break
+-	lst.write(" %s" % (n))
++            lst.write(" ...")
++            break
++        lst.write(" %s" % (n))
+     lst.write("\n")
+ lst.write("\n")
+ lst.write("\n")
diff --git a/poky/meta/recipes-core/libxml/libxml2_2.9.14.bb b/poky/meta/recipes-core/libxml/libxml2_2.9.14.bb
index d803db8..a2ed8d7 100644
--- a/poky/meta/recipes-core/libxml/libxml2_2.9.14.bb
+++ b/poky/meta/recipes-core/libxml/libxml2_2.9.14.bb
@@ -22,6 +22,7 @@
            file://fix-execution-of-ptests.patch \
            file://remove-fuzz-from-ptests.patch \
            file://libxml-m4-use-pkgconfig.patch \
+           file://0001-Port-gentest.py-to-Python-3.patch \
            "
 
 SRC_URI[archive.sha256sum] = "60d74a257d1ccec0475e749cba2f21559e48139efba6ff28224357c7c798dfee"
@@ -82,6 +83,16 @@
 }
 
 do_compile_ptest() {
+        # Make sure that testapi.c is newer than gentests.py, because
+        # with reproducible builds, they will both get e.g. Jan  1  1970
+        # modification time from SOURCE_DATE_EPOCH and then check-am
+        # might try to rebuild_testapi, which will fail even with
+        # 0001-Port-gentest.py-to-Python-3.patch, because it needs
+        # libxml2 module (libxml2-native dependency and correctly
+        # set PYTHON_SITE_PACKAGES), it's easier to
+        # just rely on pre-generated testapi.c from the release
+        touch ${S}/testapi.c
+
 	oe_runmake check-am
 }
 
@@ -110,7 +121,8 @@
 	# Docs are not needed in the native case
 	rm ${D}${datadir}/gtk-doc -rf
 
-	create_wrapper ${D}${bindir}/xmllint XML_CATALOG_FILES=${sysconfdir}/xml/catalog
+	create_wrapper ${D}${bindir}/xmllint 'XML_CATALOG_FILES=${XML_CATALOG_FILES:-${sysconfdir}/xml/catalog}'
 }
+do_install[vardepsexclude] += "XML_CATALOG_FILES:-${sysconfdir}/xml/catalog"
 
 BBCLASSEXTEND = "native nativesdk"
diff --git a/poky/meta/recipes-core/musl/gcompat/0001-auxv-new-module.patch b/poky/meta/recipes-core/musl/gcompat/0001-auxv-new-module.patch
new file mode 100644
index 0000000..ee292f3
--- /dev/null
+++ b/poky/meta/recipes-core/musl/gcompat/0001-auxv-new-module.patch
@@ -0,0 +1,59 @@
+From aecd42801904462501a890d173648e4e826eda19 Mon Sep 17 00:00:00 2001
+From: "A. Wilcox" <AWilcox@Wilcox-Tech.com>
+Date: Sat, 23 Oct 2021 23:29:40 -0500
+Subject: [PATCH] auxv: new module
+
+Adds `__getauxval` wrapper, needed for Parallels Tools GUI installer.
+
+Upstream-Status: Submitted [https://git.adelielinux.org/adelie/gcompat/-/commit/e860a38a88c7ea148ee15976136a1f83ea13f8e0]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ CHANGELOG.rst     | 5 +++++
+ Makefile          | 1 +
+ libgcompat/auxv.c | 6 ++++++
+ 3 files changed, 12 insertions(+)
+ create mode 100644 libgcompat/auxv.c
+
+diff --git a/CHANGELOG.rst b/CHANGELOG.rst
+index ce6859b..c3a9cee 100644
+--- a/CHANGELOG.rst
++++ b/CHANGELOG.rst
+@@ -11,6 +11,11 @@
+ 1.1 (202?-??-??)
+ ================
+ 
++auxv
++----
++
++* Add __getauxval wrapper for getauxval.
++
+ locale
+ ------
+ 
+diff --git a/Makefile b/Makefile
+index cbb7634..1893cc1 100644
+--- a/Makefile
++++ b/Makefile
+@@ -2,6 +2,7 @@ LIBGCOMPAT_INCLUDE = \
+ 	libgcompat/alias.h \
+ 	libgcompat/internal.h
+ LIBGCOMPAT_SRC = \
++	libgcompat/auxv.c		\
+ 	libgcompat/ctype.c		\
+ 	libgcompat/cxx_thread.c		\
+ 	libgcompat/dlfcn.c		\
+diff --git a/libgcompat/auxv.c b/libgcompat/auxv.c
+new file mode 100644
+index 0000000..01de376
+--- /dev/null
++++ b/libgcompat/auxv.c
+@@ -0,0 +1,6 @@
++#include <sys/auxv.h> /* getauxval */
++
++unsigned long __getauxval(unsigned long value)
++{
++	return getauxval(value);
++}
+-- 
+2.37.3
+
diff --git a/poky/meta/recipes-core/musl/gcompat_git.bb b/poky/meta/recipes-core/musl/gcompat_git.bb
index b051e43..11f3c58 100644
--- a/poky/meta/recipes-core/musl/gcompat_git.bb
+++ b/poky/meta/recipes-core/musl/gcompat_git.bb
@@ -8,12 +8,13 @@
 LIC_FILES_CHKSUM = "file://LICENSE;md5=eb33ef4af05a9c7602843afb7adfe792"
 
 SRC_URI = "git://git.adelielinux.org/adelie/gcompat.git;protocol=https;branch=current \
-          "
+           file://0001-auxv-new-module.patch \
+           "
 SRC_URI:append:powerpc = "\
            file://0001-make-Static-PIE-does-not-work-on-musl-ppc.patch \
            "
 PV = "1.0.0+1.1+git${SRCPV}"
-SRCREV = "4d6a5156a6eb7f56b30d93853a872e36dadde81b"
+SRCREV = "c6921a1aa454bb87671f9bd1ecfe47d80d1620aa"
 
 S = "${WORKDIR}/git"
 
@@ -42,6 +43,8 @@
 		install -d ${D}${nonarch_base_libdir}${SITEINFO_BITS}
 		ln -rs ${D}${GLIBC_LDSO} ${D}${nonarch_base_libdir}${SITEINFO_BITS}/`basename ${GLIBC_LDSO}`
 	fi
+	install -d ${D}${libdir}
+	ln -sf ${base_libdir}/libgcompat.so.0 ${D}${libdir}/libgcompat.so
 }
 
 FILES:${PN} += "${nonarch_base_libdir}${SITEINFO_BITS}"
diff --git a/poky/meta/recipes-core/systemd/systemd-systemctl/systemctl b/poky/meta/recipes-core/systemd/systemd-systemctl/systemctl
index 6d19666..cddae75 100755
--- a/poky/meta/recipes-core/systemd/systemd-systemctl/systemctl
+++ b/poky/meta/recipes-core/systemd/systemd-systemctl/systemctl
@@ -26,6 +26,9 @@
 
 class SystemdFile():
     """Class representing a single systemd configuration file"""
+
+    _clearable_keys = ['WantedBy']
+
     def __init__(self, root, path, instance_unit_name):
         self.sections = dict()
         self._parse(root, path)
@@ -80,6 +83,14 @@
                 v = m.group('value')
                 if k not in section:
                     section[k] = list()
+
+                # If we come across a "key=" line for a "clearable key", then
+                # forget all preceding assignments. This works because we are
+                # processing files in correct parse order.
+                if k in self._clearable_keys and not v:
+                    del section[k]
+                    continue
+
                 section[k].extend(v.split())
 
     def get(self, section, prop):
diff --git a/poky/meta/recipes-core/systemd/systemd/00-create-volatile.conf b/poky/meta/recipes-core/systemd/systemd/00-create-volatile.conf
index 87cbe1e..c427722 100644
--- a/poky/meta/recipes-core/systemd/systemd/00-create-volatile.conf
+++ b/poky/meta/recipes-core/systemd/systemd/00-create-volatile.conf
@@ -3,5 +3,6 @@
 # inside /var/log.
 
 
+d		/run/lock		1777	-	-	-
 d		/var/volatile/log		-	-	-	-
 d		/var/volatile/tmp		1777	-	-
diff --git a/poky/meta/recipes-core/systemd/systemd_251.4.bb b/poky/meta/recipes-core/systemd/systemd_251.4.bb
index 8497e24..910ea71 100644
--- a/poky/meta/recipes-core/systemd/systemd_251.4.bb
+++ b/poky/meta/recipes-core/systemd/systemd_251.4.bb
@@ -163,6 +163,7 @@
 PACKAGECONFIG[microhttpd] = "-Dmicrohttpd=true,-Dmicrohttpd=false,libmicrohttpd"
 PACKAGECONFIG[myhostname] = "-Dnss-myhostname=true,-Dnss-myhostname=false,,libnss-myhostname"
 PACKAGECONFIG[networkd] = "-Dnetworkd=true,-Dnetworkd=false"
+PACKAGECONFIG[no-dns-fallback] = "-Ddns-servers="
 PACKAGECONFIG[nss] = "-Dnss-systemd=true,-Dnss-systemd=false"
 PACKAGECONFIG[nss-mymachines] = "-Dnss-mymachines=true,-Dnss-mymachines=false"
 PACKAGECONFIG[nss-resolve] = "-Dnss-resolve=true,-Dnss-resolve=false"
diff --git a/poky/meta/recipes-core/udev/eudev_3.2.11.bb b/poky/meta/recipes-core/udev/eudev_3.2.11.bb
index bc2c77d..eba36c2 100644
--- a/poky/meta/recipes-core/udev/eudev_3.2.11.bb
+++ b/poky/meta/recipes-core/udev/eudev_3.2.11.bb
@@ -9,7 +9,7 @@
 
 PROVIDES = "udev"
 
-SRC_URI = "https://github.com/eudev-project/${BPN}/releases/download/v${PV}/${BP}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/${BP}.tar.gz \
            file://init \
            file://local.rules \
            file://0001-build-Remove-dead-g-i-r-configuration.patch \
@@ -17,10 +17,9 @@
 
 SRC_URI[sha256sum] = "19847cafec67897da855fde56f9dc7d92e21c50e450aa79068a7e704ed44558b"
 
-UPSTREAM_CHECK_URI = "https://github.com/eudev-project/eudev/releases"
-UPSTREAM_CHECK_REGEX = "eudev-(?P<pver>\d+(\.\d+)+)\.tar"
+GITHUB_BASE_URI = "https://github.com/eudev-project/eudev/releases"
 
-inherit autotools update-rc.d qemu pkgconfig features_check manpages
+inherit autotools update-rc.d qemu pkgconfig features_check manpages github-releases
 
 CONFLICT_DISTRO_FEATURES = "systemd"
 
diff --git a/poky/meta/recipes-devtools/apt/apt/0001-typecast-time_t-and-suseconds_t-from-std-chrono.patch b/poky/meta/recipes-devtools/apt/apt/0001-typecast-time_t-and-suseconds_t-from-std-chrono.patch
new file mode 100644
index 0000000..fc3509d
--- /dev/null
+++ b/poky/meta/recipes-devtools/apt/apt/0001-typecast-time_t-and-suseconds_t-from-std-chrono.patch
@@ -0,0 +1,64 @@
+From b7a1a4d3259557f2587f7d5d47502691d94c21c2 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 16 Sep 2022 20:00:30 -0700
+Subject: [PATCH 1/2] typecast time_t and suseconds_t from std::chrono
+
+This fixes build on some architectures like mips
+progress.cc:125:31: error: non-constant-expression cannot be narrowed from type 'std::chrono::duration<long long>::rep' (aka 'long long') to '__time_t' (aka 'long') in initializer list [-Wc++11-narrowing]
+   struct timeval NowTime = { Now_sec.count(), Now_usec.count() };
+
+Upstream-Status: Submitted [https://salsa.debian.org/apt-team/apt/-/merge_requests/259]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ apt-pkg/acquire.cc           | 4 ++--
+ apt-pkg/contrib/progress.cc  | 2 +-
+ ftparchive/apt-ftparchive.cc | 2 +-
+ 3 files changed, 4 insertions(+), 4 deletions(-)
+
+diff --git a/apt-pkg/acquire.cc b/apt-pkg/acquire.cc
+index 100ccde..dd0624a 100644
+--- a/apt-pkg/acquire.cc
++++ b/apt-pkg/acquire.cc
+@@ -53,11 +53,11 @@
+ using namespace std;
+ 
+ // helper to convert time_point to a timeval
+-static struct timeval SteadyDurationToTimeVal(std::chrono::steady_clock::duration Time)
++constexpr struct timeval SteadyDurationToTimeVal(std::chrono::steady_clock::duration Time)
+ {
+    auto const Time_sec = std::chrono::duration_cast<std::chrono::seconds>(Time);
+    auto const Time_usec = std::chrono::duration_cast<std::chrono::microseconds>(Time - Time_sec);
+-   return {Time_sec.count(), Time_usec.count()};
++   return timeval{static_cast<time_t>(Time_sec.count()), static_cast<suseconds_t>(Time_usec.count())};
+ }
+ 
+ std::string pkgAcquire::URIEncode(std::string const &part)		/*{{{*/
+diff --git a/apt-pkg/contrib/progress.cc b/apt-pkg/contrib/progress.cc
+index 03f88d4..eb688b9 100644
+--- a/apt-pkg/contrib/progress.cc
++++ b/apt-pkg/contrib/progress.cc
+@@ -122,7 +122,7 @@ bool OpProgress::CheckChange(float Interval)
+    auto const Now = std::chrono::steady_clock::now().time_since_epoch();
+    auto const Now_sec = std::chrono::duration_cast<std::chrono::seconds>(Now);
+    auto const Now_usec = std::chrono::duration_cast<std::chrono::microseconds>(Now - Now_sec);
+-   struct timeval NowTime = { Now_sec.count(), Now_usec.count() };
++   struct timeval NowTime = { static_cast<time_t>(Now_sec.count()), static_cast<suseconds_t>(Now_usec.count()) };
+ 
+    std::chrono::duration<decltype(Interval)> Delta =
+       std::chrono::seconds(NowTime.tv_sec - LastTime.tv_sec) +
+diff --git a/ftparchive/apt-ftparchive.cc b/ftparchive/apt-ftparchive.cc
+index 56fdc22..0a253b1 100644
+--- a/ftparchive/apt-ftparchive.cc
++++ b/ftparchive/apt-ftparchive.cc
+@@ -58,7 +58,7 @@ static struct timeval GetTimevalFromSteadyClock()			/*{{{*/
+    auto const Time = std::chrono::steady_clock::now().time_since_epoch();
+    auto const Time_sec = std::chrono::duration_cast<std::chrono::seconds>(Time);
+    auto const Time_usec = std::chrono::duration_cast<std::chrono::microseconds>(Time - Time_sec);
+-   return { Time_sec.count(), Time_usec.count() };
++   return { static_cast<time_t>(Time_sec.count()), static_cast<suseconds_t>(Time_usec.count()) };
+ }
+ 									/*}}}*/
+ static auto GetTimeDeltaSince(struct timeval StartTime)			/*{{{*/
+-- 
+2.37.3
+
diff --git a/poky/meta/recipes-devtools/apt/apt/0002-interactive-helper-Undefine-_FORTIFY_SOURCE.patch b/poky/meta/recipes-devtools/apt/apt/0002-interactive-helper-Undefine-_FORTIFY_SOURCE.patch
new file mode 100644
index 0000000..18c4641
--- /dev/null
+++ b/poky/meta/recipes-devtools/apt/apt/0002-interactive-helper-Undefine-_FORTIFY_SOURCE.patch
@@ -0,0 +1,27 @@
+From 891076c2cf4298b5d587545497f4831f0d21caa1 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Fri, 16 Sep 2022 20:04:43 -0700
+Subject: [PATCH 2/2] interactive-helper: Undefine _FORTIFY_SOURCE
+
+This ensures that it compiles when clang compiler is passing
+-DFORTIFY_SOURCES=2
+
+Upstream-Status: Submitted [https://salsa.debian.org/apt-team/apt/-/merge_requests/259]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ test/interactive-helper/libnoprofile.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/test/interactive-helper/libnoprofile.c b/test/interactive-helper/libnoprofile.c
+index f11b898..b26ec2a 100644
+--- a/test/interactive-helper/libnoprofile.c
++++ b/test/interactive-helper/libnoprofile.c
+@@ -1,4 +1,5 @@
+ #define _GNU_SOURCE
++#undef _FORTIFY_SOURCE
+ #include <stdarg.h>
+ #include <stdlib.h>
+ #include <string.h>
+-- 
+2.37.3
+
diff --git a/poky/meta/recipes-devtools/apt/apt_2.4.5.bb b/poky/meta/recipes-devtools/apt/apt_2.4.5.bb
index 564bdee..4b9f804 100644
--- a/poky/meta/recipes-devtools/apt/apt_2.4.5.bb
+++ b/poky/meta/recipes-devtools/apt/apt_2.4.5.bb
@@ -14,6 +14,8 @@
            file://0001-Hide-fstatat64-and-prlimit64-defines-on-musl.patch \
            file://0001-aptwebserver.cc-Include-array.patch \
            file://0001-Remove-using-std-binary_function.patch \
+           file://0001-typecast-time_t-and-suseconds_t-from-std-chrono.patch \
+           file://0002-interactive-helper-Undefine-_FORTIFY_SOURCE.patch \
            "
 
 SRC_URI:append:class-native = " \
diff --git a/poky/meta/recipes-devtools/autoconf-archive/autoconf-archive_2022.02.11.bb b/poky/meta/recipes-devtools/autoconf-archive/autoconf-archive_2022.09.03.bb
similarity index 85%
rename from poky/meta/recipes-devtools/autoconf-archive/autoconf-archive_2022.02.11.bb
rename to poky/meta/recipes-devtools/autoconf-archive/autoconf-archive_2022.09.03.bb
index 47bf238..6595ac9 100644
--- a/poky/meta/recipes-devtools/autoconf-archive/autoconf-archive_2022.02.11.bb
+++ b/poky/meta/recipes-devtools/autoconf-archive/autoconf-archive_2022.09.03.bb
@@ -6,7 +6,7 @@
     file://COPYING.EXCEPTION;md5=fdef168ebff3bc2f13664c365a5fb515"
 
 SRC_URI = "${GNU_MIRROR}/${BPN}/${BPN}-${PV}.tar.xz"
-SRC_URI[sha256sum] = "78a61b611e2eeb55a89e0398e0ce387bcaf57fe2dd53c6fe427130f777ad1e8c"
+SRC_URI[sha256sum] = "e07454f00d8cae7907bed42d0747798927809947684d94c37207a4d63a32f423"
 
 inherit autotools allarch texinfo
 
diff --git a/poky/meta/recipes-devtools/autoconf/autoconf/0001-Port-to-compilers-that-moan-about-K-R-func-decls.patch b/poky/meta/recipes-devtools/autoconf/autoconf/0001-Port-to-compilers-that-moan-about-K-R-func-decls.patch
new file mode 100644
index 0000000..4f15bf9
--- /dev/null
+++ b/poky/meta/recipes-devtools/autoconf/autoconf/0001-Port-to-compilers-that-moan-about-K-R-func-decls.patch
@@ -0,0 +1,138 @@
+From 7a3bbca81b803ba116b83c82de378e840cc35f81 Mon Sep 17 00:00:00 2001
+From: Paul Eggert <eggert@cs.ucla.edu>
+Date: Thu, 1 Sep 2022 16:19:50 -0500
+Subject: [PATCH] Port to compilers that moan about K&R func decls
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+* lib/autoconf/c.m4 (AC_LANG_CALL, AC_LANG_FUNC_LINK_TRY):
+Use '(void)' rather than '()' in function prototypes, as the latter
+provokes fatal errors in some compilers nowadays.
+* lib/autoconf/functions.m4 (AC_FUNC_STRTOD):
+* tests/fortran.at (AC_F77_DUMMY_MAIN usage):
+* tests/semantics.at (AC_CHECK_DECLS):
+Don’t use () in a function decl.
+
+Upstream-Status: Backport [https://git.savannah.gnu.org/cgit/autoconf.git/commit/?id=8b5e2016c7ed2d67f31b03a3d2e361858ff5299b]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ doc/autoconf.texi         | 7 +++----
+ lib/autoconf/c.m4         | 6 +++---
+ lib/autoconf/functions.m4 | 3 ---
+ tests/fortran.at          | 8 ++++----
+ tests/semantics.at        | 2 +-
+ 5 files changed, 11 insertions(+), 15 deletions(-)
+
+--- a/doc/autoconf.texi
++++ b/doc/autoconf.texi
+@@ -5465,9 +5465,7 @@ the @samp{#undef malloc}):
+ #include <config.h>
+ #undef malloc
+ 
+-#include <sys/types.h>
+-
+-void *malloc ();
++#include <stdlib.h>
+ 
+ /* Allocate an N-byte block of memory from the heap.
+    If N is zero, allocate a 1-byte block.  */
+@@ -8295,7 +8293,7 @@ needed:
+ #  ifdef __cplusplus
+      extern "C"
+ #  endif
+-   int F77_DUMMY_MAIN () @{ return 1; @}
++   int F77_DUMMY_MAIN (void) @{ return 1; @}
+ #endif
+ @end example
+ 
+--- a/lib/autoconf/c.m4
++++ b/lib/autoconf/c.m4
+@@ -127,7 +127,7 @@ m4_if([$2], [main], ,
+ [/* Override any GCC internal prototype to avoid an error.
+    Use char because int might match the return type of a GCC
+    builtin and then its argument prototype would still apply.  */
+-char $2 ();])], [return $2 ();])])
++char $2 (void);])], [return $2 ();])])
+ 
+ 
+ # AC_LANG_FUNC_LINK_TRY(C)(FUNCTION)
+@@ -151,7 +151,7 @@ m4_define([AC_LANG_FUNC_LINK_TRY(C)],
+ #define $1 innocuous_$1
+ 
+ /* System header to define __stub macros and hopefully few prototypes,
+-   which can conflict with char $1 (); below.  */
++   which can conflict with char $1 (void); below.  */
+ 
+ #include <limits.h>
+ #undef $1
+@@ -162,7 +162,7 @@ m4_define([AC_LANG_FUNC_LINK_TRY(C)],
+ #ifdef __cplusplus
+ extern "C"
+ #endif
+-char $1 ();
++char $1 (void);
+ /* The GNU C library defines this for functions which it implements
+     to always fail with ENOSYS.  Some functions are actually named
+     something starting with __ and the normal name is an alias.  */
+--- a/lib/autoconf/functions.m4
++++ b/lib/autoconf/functions.m4
+@@ -1601,9 +1601,6 @@ AC_DEFUN([AC_FUNC_STRTOD],
+ AC_CACHE_CHECK(for working strtod, ac_cv_func_strtod,
+ [AC_RUN_IFELSE([AC_LANG_SOURCE([[
+ ]AC_INCLUDES_DEFAULT[
+-#ifndef strtod
+-double strtod ();
+-#endif
+ int
+ main (void)
+ {
+--- a/tests/fortran.at
++++ b/tests/fortran.at
+@@ -233,7 +233,7 @@ void FOOBAR_F77 (double *x, double *y);
+ #  ifdef __cplusplus
+      extern "C"
+ #  endif
+-   int F77_DUMMY_MAIN () { return 1; }
++   int F77_DUMMY_MAIN (void) { return 1; }
+ #endif
+ 
+ int main(int argc, char *argv[])
+@@ -315,7 +315,7 @@ void FOOBAR_FC(double *x, double *y);
+ #  ifdef __cplusplus
+      extern "C"
+ #  endif
+-   int FC_DUMMY_MAIN () { return 1; }
++   int FC_DUMMY_MAIN (void) { return 1; }
+ #endif
+ 
+ int main (int argc, char *argv[])
+@@ -561,7 +561,7 @@ void @foobar@ (int *x);
+ #  ifdef __cplusplus
+      extern "C"
+ #  endif
+-   int F77_DUMMY_MAIN () { return 1; }
++   int F77_DUMMY_MAIN (void) { return 1; }
+ #endif
+ 
+ int main(int argc, char *argv[])
+@@ -637,7 +637,7 @@ void @foobar@ (int *x);
+ #  ifdef __cplusplus
+      extern "C"
+ #  endif
+-   int FC_DUMMY_MAIN () { return 1; }
++   int FC_DUMMY_MAIN (void) { return 1; }
+ #endif
+ 
+ int main(int argc, char *argv[])
+--- a/tests/semantics.at
++++ b/tests/semantics.at
+@@ -207,7 +207,7 @@ AT_CHECK_MACRO([AC_CHECK_DECLS],
+ 		 [[extern int yes;
+ 		   enum { myenum };
+ 		   extern struct mystruct_s { int x[20]; } mystruct;
+-		   extern int myfunc();
++		   extern int myfunc (int);
+ 		   #define mymacro1(arg) arg
+ 		   #define mymacro2]])
+   # Ensure we can detect missing declarations of functions whose
diff --git a/poky/meta/recipes-devtools/autoconf/autoconf/0001-specify-void-prototype-for-functions-with-no-paramet.patch b/poky/meta/recipes-devtools/autoconf/autoconf/0001-specify-void-prototype-for-functions-with-no-paramet.patch
deleted file mode 100644
index 4d8aa29..0000000
--- a/poky/meta/recipes-devtools/autoconf/autoconf/0001-specify-void-prototype-for-functions-with-no-paramet.patch
+++ /dev/null
@@ -1,64 +0,0 @@
-From 7ccfea413216bddd988823acf4e93421ea0f7f9f Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Tue, 16 Aug 2022 18:35:45 -0700
-Subject: [PATCH] specify void prototype for functions with no parameters
-
-Compilers defaulting to C99 flag such functions as warning which fails
-to compile when using -Werror
-
-Fixes
-error: a function declaration without a prototype is deprecated in all versions of C [-Werror,-Wstrict-prototypes]
-
-Upstream-Status: Submitted [https://lists.gnu.org/archive/html/autoconf-patches/2022-08/msg00003.html]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- lib/autoconf/c.m4 | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
---- a/lib/autoconf/c.m4
-+++ b/lib/autoconf/c.m4
-@@ -127,7 +127,7 @@ m4_if([$2], [main], ,
- [/* Override any GCC internal prototype to avoid an error.
-    Use char because int might match the return type of a GCC
-    builtin and then its argument prototype would still apply.  */
--char $2 ();])], [return $2 ();])])
-+char $2 (void);])], [return $2 ();])])
- 
- 
- # AC_LANG_FUNC_LINK_TRY(C)(FUNCTION)
-@@ -151,7 +151,7 @@ m4_define([AC_LANG_FUNC_LINK_TRY(C)],
- #define $1 innocuous_$1
- 
- /* System header to define __stub macros and hopefully few prototypes,
--   which can conflict with char $1 (); below.  */
-+   which can conflict with char $1 (void); below.  */
- 
- #include <limits.h>
- #undef $1
-@@ -162,7 +162,7 @@ m4_define([AC_LANG_FUNC_LINK_TRY(C)],
- #ifdef __cplusplus
- extern "C"
- #endif
--char $1 ();
-+char $1 (void);
- /* The GNU C library defines this for functions which it implements
-     to always fail with ENOSYS.  Some functions are actually named
-     something starting with __ and the normal name is an alias.  */
-@@ -252,7 +252,7 @@ dnl other built-in extern "C" functions,
- dnl when it actually happens.
- [AC_LANG_PROGRAM([[$1
- namespace conftest {
--  extern "C" int $2 ();
-+  extern "C" int $2 (void);
- }]],
- [[return conftest::$2 ();]])])
- 
-@@ -2457,7 +2457,7 @@ using std::strcmp;
- 
- namespace {
- 
--void test_exception_syntax()
-+void test_exception_syntax(void)
- {
-   try {
-     throw "test";
diff --git a/poky/meta/recipes-devtools/autoconf/autoconf_2.71.bb b/poky/meta/recipes-devtools/autoconf/autoconf_2.71.bb
index 239b268..97c241a 100644
--- a/poky/meta/recipes-devtools/autoconf/autoconf_2.71.bb
+++ b/poky/meta/recipes-devtools/autoconf/autoconf_2.71.bb
@@ -18,7 +18,7 @@
            file://preferbash.patch \
            file://autotest-automake-result-format.patch \
            file://man-host-perl.patch \
-           file://0001-specify-void-prototype-for-functions-with-no-paramet.patch \
+           file://0001-Port-to-compilers-that-moan-about-K-R-func-decls.patch \
            "
 SRC_URI:append:class-native = " file://no-man.patch"
 
diff --git a/poky/meta/recipes-devtools/binutils/binutils-2.39.inc b/poky/meta/recipes-devtools/binutils/binutils-2.39.inc
index 89612a3..b040e57 100644
--- a/poky/meta/recipes-devtools/binutils/binutils-2.39.inc
+++ b/poky/meta/recipes-devtools/binutils/binutils-2.39.inc
@@ -31,5 +31,13 @@
      file://0010-sync-with-OE-libtool-changes.patch \
      file://0011-Check-for-clang-before-checking-gcc-version.patch \
      file://0012-Only-generate-an-RPATH-entry-if-LD_RUN_PATH-is-not-e.patch \
+     file://0013-CVE-2022-38533.patch \
+     file://0014-CVE-2022-38128-1.patch \
+     file://0014-CVE-2022-38128-2.patch \
+     file://0014-CVE-2022-38128-3.patch \
 "
 S  = "${WORKDIR}/git"
+# Already in 2.39 branch
+# - https://sourceware.org/bugzilla/show_bug.cgi?id=29289
+# - https://sourceware.org/bugzilla/show_bug.cgi?id=29290
+CVE_CHECK_IGNORE += "CVE-2022-38126 CVE-2022-38127"
diff --git a/poky/meta/recipes-devtools/binutils/binutils-cross-canadian.inc b/poky/meta/recipes-devtools/binutils/binutils-cross-canadian.inc
index 4e8f10c..12e7d77 100644
--- a/poky/meta/recipes-devtools/binutils/binutils-cross-canadian.inc
+++ b/poky/meta/recipes-devtools/binutils/binutils-cross-canadian.inc
@@ -23,10 +23,10 @@
 	rm -f ${D}${libdir}/libiberty*
 	rm -f ${D}${libdir}/libopcodes*
 	rm -f ${D}${includedir}/*.h
-	
+	rm -f ${D}${sysconfdir}/gprofng.rc
+	rmdir ${D}${sysconfdir} || :
+
 	cross_canadian_bindirlinks
 }
 
-FILES:${PN} += "${sysconfdir}/gprofng.rc"
-
 BBCLASSEXTEND = ""
diff --git a/poky/meta/recipes-devtools/binutils/binutils.inc b/poky/meta/recipes-devtools/binutils/binutils.inc
index 789c8be..98acf0a 100644
--- a/poky/meta/recipes-devtools/binutils/binutils.inc
+++ b/poky/meta/recipes-devtools/binutils/binutils.inc
@@ -36,6 +36,24 @@
 # Rather than duplicating multiple entries for these, make one
 # list and reuse it.
 
+GPROFNGS = " \
+	gp-archive \
+	gp-collect-app \
+	gp-display-html \
+	gp-display-src \
+	gp-display-text \
+	gprofng \
+"
+
+# it disables gprofng for clang and musl in the bb file
+GPROFNGS:toolchain-clang = ""
+GPROFNGS:libc-musl = ""
+
+GPROFNG_ALTS ?= ""
+GPROFNG_ALTS:x86 = "${GPROFNGS}"
+GPROFNG_ALTS:x86-64 = "${GPROFNGS}"
+GPROFNG_ALTS:aarch64 = "${GPROFNGS}"
+
 LDGOLD_ALTS ?= "ld.gold dwp"
 LDGOLD_ALTS:riscv64 = ""
 LDGOLD_ALTS:riscv32 = ""
@@ -48,6 +66,7 @@
 	c++filt \
 	elfedit \
 	gprof \
+	${GPROFNG_ALTS} \
 	ld \
 	ld.bfd \
 	${LDGOLD_ALTS} \
diff --git a/poky/meta/recipes-devtools/binutils/binutils/0013-CVE-2022-38533.patch b/poky/meta/recipes-devtools/binutils/binutils/0013-CVE-2022-38533.patch
new file mode 100644
index 0000000..5d9ac2c
--- /dev/null
+++ b/poky/meta/recipes-devtools/binutils/binutils/0013-CVE-2022-38533.patch
@@ -0,0 +1,36 @@
+From ef186fe54aa6d281a3ff8a9528417e5cc614c797 Mon Sep 17 00:00:00 2001
+From: Alan Modra <amodra@gmail.com>
+Date: Sat, 13 Aug 2022 15:32:47 +0930
+Subject: [PATCH] PR29482 - strip: heap-buffer-overflow
+
+	PR 29482
+	* coffcode.h (coff_set_section_contents): Sanity check _LIB.
+
+Upstream-Status: Backport [https://sourceware.org/git/gitweb.cgi?p=binutils-gdb.git;h=ef186fe54aa6d281a3ff8a9528417e5cc614c797]
+
+Signed-off-by: Pgowda <pgowda.cve@gmail.com>
+
+---
+ bfd/coffcode.h | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/bfd/coffcode.h b/bfd/coffcode.h
+index 67aaf158ca1..52027981c3f 100644
+--- a/bfd/coffcode.h
++++ b/bfd/coffcode.h
+@@ -4302,10 +4302,13 @@ coff_set_section_contents (bfd * abfd,
+ 
+ 	rec = (bfd_byte *) location;
+ 	recend = rec + count;
+-	while (rec < recend)
++	while (recend - rec >= 4)
+ 	  {
++	    size_t len = bfd_get_32 (abfd, rec);
++	    if (len == 0 || len > (size_t) (recend - rec) / 4)
++	      break;
++	    rec += len * 4;
+ 	    ++section->lma;
+-	    rec += bfd_get_32 (abfd, rec) * 4;
+ 	  }
+ 
+ 	BFD_ASSERT (rec == recend);
diff --git a/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-1.patch b/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-1.patch
new file mode 100644
index 0000000..0a490d8
--- /dev/null
+++ b/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-1.patch
@@ -0,0 +1,350 @@
+From f07c08e115e27cddf5a0030dc6332bbee1bd9c6a Mon Sep 17 00:00:00 2001
+From: Alan Modra <amodra@gmail.com>
+Date: Thu, 21 Jul 2022 08:38:14 +0930
+Subject: [PATCH] binutils/dwarf.c: abbrev caching
+
+I'm inclined to think that abbrev caching is counter-productive.  The
+time taken to search the list of abbrevs converted to internal form is
+non-zero, and it's easy to decode the raw abbrevs.  It's especially
+silly to cache empty lists of decoded abbrevs (happens with zero
+padding in .debug_abbrev), or abbrevs as they are displayed when there
+is no further use of those abbrevs.  This patch stops caching in those
+cases.
+
+	* dwarf.c (record_abbrev_list_for_cu): Add free_list param.
+	Put abbrevs on abbrev_lists here.
+	(new_abbrev_list): Delete function.
+	(process_abbrev_set): Return newly allocated list.  Move
+	abbrev base, offset and size checking to..
+	(find_and_process_abbrev_set): ..here, new function.  Handle
+	lookup of cached abbrevs here, and calculate start and end
+	for process_abbrev_set.  Return free_list if newly alloc'd.
+	(process_debug_info): Consolidate cached list lookup, new list
+	alloc and processing into find_and_process_abbrev_set call.
+	Free list when not cached.
+	(display_debug_abbrev): Similarly.
+
+Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=f07c08e115e27cddf5a0030dc6332bbee1bd9c6a]
+
+Signed-off-by: Pgowda <pgowda.cve@gmail.com>
+---
+ binutils/dwarf.c | 208 +++++++++++++++++++++++++----------------------
+ 1 file changed, 110 insertions(+), 98 deletions(-)
+
+diff --git a/binutils/dwarf.c b/binutils/dwarf.c
+index 267ed3bb382..2fc352f74c5 100644
+--- a/binutils/dwarf.c
++++ b/binutils/dwarf.c
+@@ -882,8 +882,15 @@ static unsigned long  next_free_abbrev_m
+ #define ABBREV_MAP_ENTRIES_INCREMENT   8
+ 
+ static void
+-record_abbrev_list_for_cu (dwarf_vma start, dwarf_vma end, abbrev_list * list)
++record_abbrev_list_for_cu (dwarf_vma start, dwarf_vma end,
++			   abbrev_list *list, abbrev_list *free_list)
+ {
++  if (free_list != NULL)
++    {
++      list->next = abbrev_lists;
++      abbrev_lists = list;
++    }
++
+   if (cu_abbrev_map == NULL)
+     {
+       num_abbrev_map_entries = INITIAL_NUM_ABBREV_MAP_ENTRIES;
+@@ -936,20 +943,6 @@ free_all_abbrevs (void)
+ }
+ 
+ static abbrev_list *
+-new_abbrev_list (dwarf_vma abbrev_base, dwarf_vma abbrev_offset)
+-{
+-  abbrev_list * list = (abbrev_list *) xcalloc (sizeof * list, 1);
+-
+-  list->abbrev_base = abbrev_base;
+-  list->abbrev_offset = abbrev_offset;
+-
+-  list->next = abbrev_lists;
+-  abbrev_lists = list;
+-
+-  return list;
+-}
+-
+-static abbrev_list *
+ find_abbrev_list_by_abbrev_offset (dwarf_vma abbrev_base,
+ 				   dwarf_vma abbrev_offset)
+ {
+@@ -966,7 +959,7 @@ find_abbrev_list_by_abbrev_offset (dwarf
+ /* Find the abbreviation map for the CU that includes OFFSET.
+    OFFSET is an absolute offset from the start of the .debug_info section.  */
+ /* FIXME: This function is going to slow down readelf & objdump.
+-   Consider using a better algorithm to mitigate this effect.  */
++   Not caching abbrevs is likely the answer.  */
+ 
+ static  abbrev_map *
+ find_abbrev_map_by_offset (dwarf_vma offset)
+@@ -1033,40 +1026,18 @@ add_abbrev_attr (unsigned long    attrib
+   list->last_abbrev->last_attr = attr;
+ }
+ 
+-/* Processes the (partial) contents of a .debug_abbrev section.
+-   Returns NULL if the end of the section was encountered.
+-   Returns the address after the last byte read if the end of
+-   an abbreviation set was found.  */
++/* Return processed (partial) contents of a .debug_abbrev section.
++   Returns NULL on errors.  */
+ 
+-static unsigned char *
++static abbrev_list *
+ process_abbrev_set (struct dwarf_section *section,
+-		    dwarf_vma abbrev_base,
+-		    dwarf_vma abbrev_size,
+-		    dwarf_vma abbrev_offset,
+-		    abbrev_list *list)
++		    unsigned char *start,
++		    unsigned char *end)
+ {
+-  if (abbrev_base >= section->size
+-      || abbrev_size > section->size - abbrev_base)
+-    {
+-      /* PR 17531: file:4bcd9ce9.  */
+-      warn (_("Debug info is corrupted, abbrev size (%lx) is larger than "
+-	      "abbrev section size (%lx)\n"),
+-	      (unsigned long) (abbrev_base + abbrev_size),
+-	      (unsigned long) section->size);
+-      return NULL;
+-    }
+-  if (abbrev_offset >= abbrev_size)
+-    {
+-      warn (_("Debug info is corrupted, abbrev offset (%lx) is larger than "
+-	      "abbrev section size (%lx)\n"),
+-	    (unsigned long) abbrev_offset,
+-	    (unsigned long) abbrev_size);
+-      return NULL;
+-    }
++  abbrev_list *list = xmalloc (sizeof (*list));
++  list->first_abbrev = NULL;
++  list->last_abbrev = NULL;
+ 
+-  unsigned char *start = section->start + abbrev_base;
+-  unsigned char *end = start + abbrev_size;
+-  start += abbrev_offset;
+   while (start < end)
+     {
+       unsigned long entry;
+@@ -1079,14 +1050,18 @@ process_abbrev_set (struct dwarf_section
+       /* A single zero is supposed to end the set according
+ 	 to the standard.  If there's more, then signal that to
+ 	 the caller.  */
+-      if (start == end)
+-	return NULL;
+-      if (entry == 0)
+-	return start;
++      if (start == end || entry == 0)
++	{
++	  list->start_of_next_abbrevs = start != end ? start : NULL;
++	  return list;
++	}
+ 
+       READ_ULEB (tag, start, end);
+       if (start == end)
+-	return NULL;
++	{
++	  free (list);
++	  return NULL;
++	}
+ 
+       children = *start++;
+ 
+@@ -1121,9 +1096,67 @@ process_abbrev_set (struct dwarf_section
+   /* Report the missing single zero which ends the section.  */
+   error (_(".debug_abbrev section not zero terminated\n"));
+ 
++  free (list);
+   return NULL;
+ }
+ 
++/* Return a sequence of abbrevs in SECTION starting at ABBREV_BASE
++   plus ABBREV_OFFSET and finishing at ABBREV_BASE + ABBREV_SIZE.
++   If FREE_LIST is non-NULL search the already decoded abbrevs on
++   abbrev_lists first and if found set *FREE_LIST to NULL.  If
++   searching doesn't find a matching abbrev, set *FREE_LIST to the
++   newly allocated list.  If FREE_LIST is NULL, no search is done and
++   the returned abbrev_list is always newly allocated.  */
++
++static abbrev_list *
++find_and_process_abbrev_set (struct dwarf_section *section,
++			     dwarf_vma abbrev_base,
++			     dwarf_vma abbrev_size,
++			     dwarf_vma abbrev_offset,
++			     abbrev_list **free_list)
++{
++  if (free_list)
++    *free_list = NULL;
++
++  if (abbrev_base >= section->size
++      || abbrev_size > section->size - abbrev_base)
++    {
++      /* PR 17531: file:4bcd9ce9.  */
++      warn (_("Debug info is corrupted, abbrev size (%lx) is larger than "
++	      "abbrev section size (%lx)\n"),
++	      (unsigned long) (abbrev_base + abbrev_size),
++	      (unsigned long) section->size);
++      return NULL;
++    }
++  if (abbrev_offset >= abbrev_size)
++    {
++      warn (_("Debug info is corrupted, abbrev offset (%lx) is larger than "
++	      "abbrev section size (%lx)\n"),
++	    (unsigned long) abbrev_offset,
++	    (unsigned long) abbrev_size);
++      return NULL;
++    }
++
++  unsigned char *start = section->start + abbrev_base + abbrev_offset;
++  unsigned char *end = section->start + abbrev_base + abbrev_size;
++  abbrev_list *list = NULL;
++  if (free_list)
++    list = find_abbrev_list_by_abbrev_offset (abbrev_base, abbrev_offset);
++  if (list == NULL)
++    {
++      list = process_abbrev_set (section, start, end);
++      if (list)
++	{
++	  list->abbrev_base = abbrev_base;
++	  list->abbrev_offset = abbrev_offset;
++	  list->next = NULL;
++	}
++      if (free_list)
++	*free_list = list;
++    }
++  return list;
++}
++
+ static const char *
+ get_TAG_name (unsigned long tag)
+ {
+@@ -3670,7 +3703,6 @@ process_debug_info (struct dwarf_section
+       dwarf_vma                 cu_offset;
+       unsigned int              offset_size;
+       struct cu_tu_set *        this_set;
+-      abbrev_list *             list;
+       unsigned char *end_cu;
+ 
+       hdrptr = start;
+@@ -3726,22 +3758,18 @@ process_debug_info (struct dwarf_section
+ 	  abbrev_size = this_set->section_sizes [DW_SECT_ABBREV];
+ 	}
+ 
+-      list = find_abbrev_list_by_abbrev_offset (abbrev_base,
+-						compunit.cu_abbrev_offset);
+-      if (list == NULL)
+-	{
+-	  unsigned char *  next;
+-
+-	  list = new_abbrev_list (abbrev_base,
+-				  compunit.cu_abbrev_offset);
+-	  next = process_abbrev_set (&debug_displays[abbrev_sec].section,
+-				     abbrev_base, abbrev_size,
+-				     compunit.cu_abbrev_offset, list);
+-	  list->start_of_next_abbrevs = next;
+-	}
+-
++      abbrev_list *list;
++      abbrev_list *free_list;
++      list = find_and_process_abbrev_set (&debug_displays[abbrev_sec].section,
++					  abbrev_base, abbrev_size,
++					  compunit.cu_abbrev_offset,
++					  &free_list);
+       start = end_cu;
+-      record_abbrev_list_for_cu (cu_offset, start - section_begin, list);
++      if (list != NULL && list->first_abbrev != NULL)
++	record_abbrev_list_for_cu (cu_offset, start - section_begin,
++				   list, free_list);
++      else if (free_list != NULL)
++	free_abbrev_list (free_list);
+     }
+ 
+   for (start = section_begin, unit = 0; start < end; unit++)
+@@ -3757,7 +3785,6 @@ process_debug_info (struct dwarf_section
+       struct cu_tu_set *this_set;
+       dwarf_vma abbrev_base;
+       size_t abbrev_size;
+-      abbrev_list * list = NULL;
+       unsigned char *end_cu;
+ 
+       hdrptr = start;
+@@ -3936,20 +3963,10 @@ process_debug_info (struct dwarf_section
+ 	}
+ 
+       /* Process the abbrevs used by this compilation unit.  */
+-      list = find_abbrev_list_by_abbrev_offset (abbrev_base,
+-						compunit.cu_abbrev_offset);
+-      if (list == NULL)
+-	{
+-	  unsigned char *next;
+-
+-	  list = new_abbrev_list (abbrev_base,
+-				  compunit.cu_abbrev_offset);
+-	  next = process_abbrev_set (&debug_displays[abbrev_sec].section,
+-				     abbrev_base, abbrev_size,
+-				     compunit.cu_abbrev_offset, list);
+-	  list->start_of_next_abbrevs = next;
+-	}
+-
++      abbrev_list *list;
++      list = find_and_process_abbrev_set (&debug_displays[abbrev_sec].section,
++					  abbrev_base, abbrev_size,
++					  compunit.cu_abbrev_offset, NULL);
+       level = 0;
+       last_level = level;
+       saved_level = -1;
+@@ -4128,6 +4145,8 @@ process_debug_info (struct dwarf_section
+ 	  if (entry->children)
+ 	    ++level;
+ 	}
++      if (list != NULL)
++	free_abbrev_list (list);
+     }
+ 
+   /* Set num_debug_info_entries here so that it can be used to check if
+@@ -6353,24 +6372,15 @@ display_debug_abbrev (struct dwarf_secti
+ 
+   do
+     {
+-      abbrev_list *    list;
+-      dwarf_vma        offset;
+-
+-      offset = start - section->start;
+-      list = find_abbrev_list_by_abbrev_offset (0, offset);
++      dwarf_vma offset = start - section->start;
++      abbrev_list *list = find_and_process_abbrev_set (section, 0,
++						       section->size, offset,
++						       NULL);
+       if (list == NULL)
+-	{
+-	  list = new_abbrev_list (0, offset);
+-	  start = process_abbrev_set (section, 0, section->size, offset, list);
+-	  list->start_of_next_abbrevs = start;
+-	}
+-      else
+-	start = list->start_of_next_abbrevs;
+-
+-      if (list->first_abbrev == NULL)
+-	continue;
++	break;
+ 
+-      printf (_("  Number TAG (0x%lx)\n"), (long) offset);
++      if (list->first_abbrev)
++	printf (_("  Number TAG (0x%lx)\n"), (long) offset);
+ 
+       for (entry = list->first_abbrev; entry; entry = entry->next)
+ 	{
+@@ -6391,6 +6401,8 @@ display_debug_abbrev (struct dwarf_secti
+ 	      putchar ('\n');
+ 	    }
+ 	}
++      start = list->start_of_next_abbrevs;
++      free_abbrev_list (list);
+     }
+   while (start);
+ 
diff --git a/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-2.patch b/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-2.patch
new file mode 100644
index 0000000..e30b4d8
--- /dev/null
+++ b/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-2.patch
@@ -0,0 +1,536 @@
+From 175b91507b83ad42607d2f6dadaf55b7b511bdbe Mon Sep 17 00:00:00 2001
+From: Alan Modra <amodra@gmail.com>
+Date: Wed, 20 Jul 2022 18:28:50 +0930
+Subject: [PATCH] miscellaneous dwarf.c tidies
+
+	* dwarf.c: Leading and trailing whitespace fixes.
+	(free_abbrev_list): New function.
+	(free_all_abbrevs): Use the above.  Free cu_abbrev_map here too.
+	(process_abbrev_set): Print actual section name on error.
+	(get_type_abbrev_from_form): Add overflow check.
+	(free_debug_memory): Don't free cu_abbrev_map here..
+	(process_debug_info): ..or here.  Warn on another case of not
+	finding a neeeded abbrev.
+
+Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=commitdiff;h=175b91507b83ad42607d2f6dadaf55b7b511bdbe]
+
+Signed-off-by: Pgowda <pgowda.cve@gmail.com>
+---
+ binutils/dwarf.c | 216 +++++++++++++++++++++++------------------------
+ 1 file changed, 106 insertions(+), 110 deletions(-)
+
+diff --git a/binutils/dwarf.c b/binutils/dwarf.c
+index 2b1eec49422..267ed3bb382 100644
+--- a/binutils/dwarf.c
++++ b/binutils/dwarf.c
+@@ -806,7 +806,7 @@ fetch_indexed_value (dwarf_vma idx,
+       pointer_size = 4;
+       bias = 12;
+     }
+- 
++
+   dwarf_vma offset = idx * pointer_size;
+ 
+   /* Offsets are biased by the size of the section header
+@@ -908,38 +908,41 @@ record_abbrev_list_for_cu (dwarf_vma sta
+   next_free_abbrev_map_entry ++;
+ }
+ 
+-static void
+-free_all_abbrevs (void)
++static abbrev_list *
++free_abbrev_list (abbrev_list *list)
+ {
+-  abbrev_list *  list;
++  abbrev_entry *abbrv = list->first_abbrev;
+ 
+-  for (list = abbrev_lists; list != NULL;)
++  while (abbrv)
+     {
+-      abbrev_list *   next = list->next;
+-      abbrev_entry *  abbrv;
++      abbrev_attr *attr = abbrv->first_attr;
+ 
+-      for (abbrv = list->first_abbrev; abbrv != NULL;)
++      while (attr)
+ 	{
+-	  abbrev_entry *  next_abbrev = abbrv->next;
+-	  abbrev_attr *   attr;
+-
+-	  for (attr = abbrv->first_attr; attr;)
+-	    {
+-	      abbrev_attr *next_attr = attr->next;
+-
+-	      free (attr);
+-	      attr = next_attr;
+-	    }
+-
+-	  free (abbrv);
+-	  abbrv = next_abbrev;
++	  abbrev_attr *next_attr = attr->next;
++	  free (attr);
++	  attr = next_attr;
+ 	}
+ 
+-      free (list);
+-      list = next;
++      abbrev_entry *next_abbrev = abbrv->next;
++      free (abbrv);
++      abbrv = next_abbrev;
+     }
+ 
+-  abbrev_lists = NULL;
++  abbrev_list *next = list->next;
++  free (list);
++  return next;
++}
++
++static void
++free_all_abbrevs (void)
++{
++  while (abbrev_lists)
++    abbrev_lists = free_abbrev_list (abbrev_lists);
++
++  free (cu_abbrev_map);
++  cu_abbrev_map = NULL;
++  next_free_abbrev_map_entry = 0;
+ }
+ 
+ static abbrev_list *
+@@ -971,7 +974,7 @@ find_abbrev_map_by_offset (dwarf_vma off
+ 	&& cu_abbrev_map[i].end > offset)
+       return cu_abbrev_map + i;
+ 
+-  return NULL;	
++  return NULL;
+ }
+ 
+ static void
+@@ -1094,7 +1097,7 @@ process_abbrev_set (struct dwarf_section
+     }
+ 
+   /* Report the missing single zero which ends the section.  */
+-  error (_(".debug_abbrev section not zero terminated\n"));
++  error (_("%s section not zero terminated\n"), section->name);
+ 
+   free (list);
+   return NULL;
+@@ -1875,7 +1878,7 @@ fetch_alt_indirect_string (dwarf_vma off
+ 	dwarf_vmatoa ("x", offset));
+   return _("<offset is too big>");
+ }
+-	
++
+ static const char *
+ get_AT_name (unsigned long attribute)
+ {
+@@ -2157,7 +2160,8 @@ get_type_abbrev_from_form (unsigned long
+     case DW_FORM_ref4:
+     case DW_FORM_ref8:
+     case DW_FORM_ref_udata:
+-      if (uvalue + cu_offset > (size_t) (cu_end - section->start))
++      if (uvalue + cu_offset < uvalue
++	  || uvalue + cu_offset > (size_t) (cu_end - section->start))
+ 	{
+ 	  warn (_("Unable to resolve ref form: uvalue %lx + cu_offset %lx > CU size %lx\n"),
+ 		uvalue, (long) cu_offset, (long) (cu_end - section->start));
+@@ -2194,7 +2198,7 @@ get_type_abbrev_from_form (unsigned long
+       else
+ 	*map_return = NULL;
+     }
+-	
++
+   READ_ULEB (abbrev_number, data, section->start + section->size);
+ 
+   for (entry = map->list->first_abbrev; entry != NULL; entry = entry->next)
+@@ -2783,10 +2787,10 @@ read_and_display_attr_value (unsigned lo
+ 	  if (form == DW_FORM_loclistx)
+ 	    {
+ 	      if (dwo)
+-	        {
+-	          index = fetch_indexed_value (uvalue, loclists_dwo, 0);
+-	          index += (offset_size == 8) ? 20 : 12;
+-	        }
++		{
++		  index = fetch_indexed_value (uvalue, loclists_dwo, 0);
++		  index += (offset_size == 8) ? 20 : 12;
++		}
+ 	      else if (debug_info_p == NULL)
+ 		{
+ 		  index = fetch_indexed_value (uvalue, loclists, 0);
+@@ -2804,21 +2808,21 @@ read_and_display_attr_value (unsigned lo
+ 	  else if (form == DW_FORM_rnglistx)
+ 	    {
+ 	      if (dwo)
+-	        {
+-	          index = fetch_indexed_value (uvalue, rnglists_dwo, 0);
+-	          index += (offset_size == 8) ? 20 : 12;
+-	        }
++		{
++		  index = fetch_indexed_value (uvalue, rnglists_dwo, 0);
++		  index += (offset_size == 8) ? 20 : 12;
++		}
+ 	      else
+-	        {
+-	          if (debug_info_p == NULL)
+-	            base = 0;
+-	          else
+-	            base = debug_info_p->rnglists_base;
+-	          /* We do not have a cached value this time, so we perform the
+-	             computation manually.  */
+-	          index = fetch_indexed_value (uvalue, rnglists, base);
+-	          index += base;
+-	        }
++		{
++		  if (debug_info_p == NULL)
++		    base = 0;
++		  else
++		    base = debug_info_p->rnglists_base;
++		  /* We do not have a cached value this time, so we perform the
++		     computation manually.  */
++		  index = fetch_indexed_value (uvalue, rnglists, base);
++		  index += base;
++		}
+ 	    }
+ 	  else
+ 	    {
+@@ -2844,7 +2848,7 @@ read_and_display_attr_value (unsigned lo
+       if (!do_loc)
+ 	printf ("%c<0x%s>", delimiter, dwarf_vmatoa ("x", uvalue + cu_offset));
+       break;
+-      
++
+     default:
+       warn (_("Unrecognized form: 0x%lx\n"), form);
+       /* What to do?  Consume a byte maybe?  */
+@@ -2869,9 +2873,9 @@ read_and_display_attr_value (unsigned lo
+ 	case DW_AT_rnglists_base:
+ 	  if (debug_info_p->rnglists_base)
+ 	    warn (_("CU @ 0x%s has multiple rnglists_base values (0x%s and 0x%s)"),
+-	          dwarf_vmatoa ("x", debug_info_p->cu_offset),
+-	          dwarf_vmatoa ("x", debug_info_p->rnglists_base),
+-	          dwarf_vmatoa ("x", uvalue));
++		  dwarf_vmatoa ("x", debug_info_p->cu_offset),
++		  dwarf_vmatoa ("x", debug_info_p->rnglists_base),
++		  dwarf_vmatoa ("x", uvalue));
+ 	  debug_info_p->rnglists_base = uvalue;
+ 	  break;
+ 	case DW_AT_str_offsets_base:
+@@ -3021,7 +3025,7 @@ read_and_display_attr_value (unsigned lo
+ 	      case DW_FORM_strx3:
+ 	      case DW_FORM_strx4:
+ 		add_dwo_name (fetch_indexed_string (uvalue, this_set, offset_size, false,
+-		                                    debug_info_p->str_offsets_base),
++						    debug_info_p->str_offsets_base),
+ 			      cu_offset);
+ 		break;
+ 	      case DW_FORM_string:
+@@ -3055,7 +3059,7 @@ read_and_display_attr_value (unsigned lo
+ 	      case DW_FORM_strx3:
+ 	      case DW_FORM_strx4:
+ 		add_dwo_dir (fetch_indexed_string (uvalue, this_set, offset_size, false,
+-		                                   debug_info_p->str_offsets_base),
++						   debug_info_p->str_offsets_base),
+ 			     cu_offset);
+ 		break;
+ 	      case DW_FORM_string:
+@@ -3686,11 +3690,8 @@ process_debug_info (struct dwarf_section
+     introduce (section, false);
+ 
+   free_all_abbrevs ();
+-  free (cu_abbrev_map);
+-  cu_abbrev_map = NULL;
+-  next_free_abbrev_map_entry = 0;
+ 
+-  /* In order to be able to resolve DW_FORM_ref_attr forms we need
++  /* In order to be able to resolve DW_FORM_ref_addr forms we need
+      to load *all* of the abbrevs for all CUs in this .debug_info
+      section.  This does effectively mean that we (partially) read
+      every CU header twice.  */
+@@ -4045,12 +4046,11 @@ process_debug_info (struct dwarf_section
+ 
+ 	  /* Scan through the abbreviation list until we reach the
+ 	     correct entry.  */
+-	  if (list == NULL)
+-	    continue;
+-
+-	  for (entry = list->first_abbrev; entry != NULL; entry = entry->next)
+-	    if (entry->number == abbrev_number)
+-	      break;
++	  entry = NULL;
++	  if (list != NULL)
++	    for (entry = list->first_abbrev; entry != NULL; entry = entry->next)
++	      if (entry->number == abbrev_number)
++		break;
+ 
+ 	  if (entry == NULL)
+ 	    {
+@@ -4074,7 +4074,7 @@ process_debug_info (struct dwarf_section
+ 	      break;
+ 	    case DW_TAG_compile_unit:
+ 	    case DW_TAG_skeleton_unit:
+-	      need_base_address = 1;	
++	      need_base_address = 1;
+ 	      need_dwo_info = do_loc;
+ 	      break;
+ 	    case DW_TAG_entry_point:
+@@ -4459,7 +4459,7 @@ display_debug_sup (struct dwarf_section
+ 
+   SAFE_BYTE_GET_AND_INC (is_supplementary, start, 1, end);
+   if (is_supplementary != 0 && is_supplementary != 1)
+-    warn (_("corrupt .debug_sup section: is_supplementary not 0 or 1\n"));    
++    warn (_("corrupt .debug_sup section: is_supplementary not 0 or 1\n"));
+ 
+   sup_filename = start;
+   if (is_supplementary && sup_filename[0] != 0)
+@@ -5638,7 +5638,7 @@ display_debug_lines_decoded (struct dwar
+ 			printf ("%s  %11d  %#18" DWARF_VMA_FMT "x",
+ 				newFileName, state_machine_regs.line,
+ 				state_machine_regs.address);
+-		    }			
++		    }
+ 		  else
+ 		    {
+ 		      if (xop == -DW_LNE_end_sequence)
+@@ -6092,7 +6092,7 @@ display_debug_macro (struct dwarf_sectio
+   load_debug_section_with_follow (str, file);
+   load_debug_section_with_follow (line, file);
+   load_debug_section_with_follow (str_index, file);
+-  
++
+   introduce (section, false);
+ 
+   while (curr < end)
+@@ -6537,7 +6537,7 @@ display_loc_list (struct dwarf_section *
+ 
+       /* Check base address specifiers.  */
+       if (is_max_address (begin, pointer_size)
+-          && !is_max_address (end, pointer_size))
++	  && !is_max_address (end, pointer_size))
+ 	{
+ 	  base_address = end;
+ 	  print_dwarf_vma (begin, pointer_size);
+@@ -6715,7 +6715,7 @@ display_loclists_list (struct dwarf_sect
+ 	case DW_LLE_default_location:
+ 	  begin = end = 0;
+ 	  break;
+-	  
++
+ 	case DW_LLE_offset_pair:
+ 	  READ_ULEB (begin, start, section_end);
+ 	  begin += base_address;
+@@ -7011,7 +7011,7 @@ display_offset_entry_loclists (struct dw
+   unsigned char *  start = section->start;
+   unsigned char * const end = start + section->size;
+ 
+-  introduce (section, false);  
++  introduce (section, false);
+ 
+   do
+     {
+@@ -7060,14 +7060,14 @@ display_offset_entry_loclists (struct dw
+ 		section->name, segment_selector_size);
+ 	  return 0;
+ 	}
+-      
++
+       if (offset_entry_count == 0)
+ 	{
+ 	  warn (_("The %s section contains a table without offset\n"),
+ 		section->name);
+ 	  return 0;
+ 	}
+-  
++
+       printf (_("\n   Offset Entries starting at 0x%lx:\n"),
+ 	      (long)(start - section->start));
+ 
+@@ -8229,7 +8229,7 @@ display_debug_rnglists (struct dwarf_sec
+ 	  start = display_debug_rnglists_list
+ 	    (start, end, address_size, offset, 0, offset_size);
+ 	  if (start >= end)
+-	    break;	  
++	    break;
+ 	}
+ 
+       start = end;
+@@ -8347,12 +8347,12 @@ display_debug_ranges (struct dwarf_secti
+       next = section_begin + offset + debug_info_p->rnglists_base;
+ 
+       /* If multiple DWARF entities reference the same range then we will
+-         have multiple entries in the `range_entries' list for the same
+-         offset.  Thanks to the sort above these will all be consecutive in
+-         the `range_entries' list, so we can easily ignore duplicates
+-         here.  */
++	 have multiple entries in the `range_entries' list for the same
++	 offset.  Thanks to the sort above these will all be consecutive in
++	 the `range_entries' list, so we can easily ignore duplicates
++	 here.  */
+       if (i > 0 && last_offset == offset)
+-        continue;
++	continue;
+       last_offset = offset;
+ 
+       if (dwarf_check != 0 && i > 0)
+@@ -10286,7 +10286,7 @@ display_debug_names (struct dwarf_sectio
+ 	  printf (_("Out of %lu items there are %zu bucket clashes"
+ 		    " (longest of %zu entries).\n"),
+ 		  (unsigned long) name_count, hash_clash_count, longest_clash);
+-	  
++
+ 	  if (name_count != buckets_filled + hash_clash_count)
+ 	    warn (_("The name_count (%lu) is not the same as the used bucket_count (%lu) + the hash clash count (%lu)"),
+ 		  (unsigned long) name_count,
+@@ -10390,7 +10390,7 @@ display_debug_names (struct dwarf_sectio
+ 		break;
+ 	      if (tagno >= 0)
+ 		printf ("%s<%lu>",
+-		        (tagno == 0 && second_abbrev_tag == 0 ? " " : "\n\t"),
++			(tagno == 0 && second_abbrev_tag == 0 ? " " : "\n\t"),
+ 			(unsigned long) abbrev_tag);
+ 
+ 	      for (entry = abbrev_lookup;
+@@ -10919,7 +10919,7 @@ process_cu_tu_index (struct dwarf_sectio
+ 	 Check for integer overflow (can occur when size_t is 32-bit)
+ 	 with overlarge ncols or nused values.  */
+       if (nused == -1u
+-	  || _mul_overflow ((size_t) ncols, 4, &temp)	  
++	  || _mul_overflow ((size_t) ncols, 4, &temp)
+ 	  || _mul_overflow ((size_t) nused + 1, temp, &total)
+ 	  || total > (size_t) (limit - ppool))
+ 	{
+@@ -10927,7 +10927,7 @@ process_cu_tu_index (struct dwarf_sectio
+ 		section->name);
+ 	  return 0;
+ 	}
+-      
++
+       if (do_display)
+ 	{
+ 	  printf (_("  Offset table\n"));
+@@ -11431,8 +11431,8 @@ add_separate_debug_file (const char * fi
+ 
+ static bool
+ debuginfod_fetch_separate_debug_info (struct dwarf_section * section,
+-                                      char ** filename,
+-                                      void * file)
++				      char ** filename,
++				      void * file)
+ {
+   size_t build_id_len;
+   unsigned char * build_id;
+@@ -11450,14 +11450,14 @@ debuginfod_fetch_separate_debug_info (st
+ 
+       filelen = strnlen ((const char *)section->start, section->size);
+       if (filelen == section->size)
+-        /* Corrupt debugaltlink.  */
+-        return false;
++	/* Corrupt debugaltlink.  */
++	return false;
+ 
+       build_id = section->start + filelen + 1;
+       build_id_len = section->size - (filelen + 1);
+ 
+       if (build_id_len == 0)
+-        return false;
++	return false;
+     }
+   else
+     return false;
+@@ -11469,25 +11469,25 @@ debuginfod_fetch_separate_debug_info (st
+ 
+       client = debuginfod_begin ();
+       if (client == NULL)
+-        return false;
++	return false;
+ 
+       /* Query debuginfod servers for the target file. If found its path
+-         will be stored in filename.  */
++	 will be stored in filename.  */
+       fd = debuginfod_find_debuginfo (client, build_id, build_id_len, filename);
+       debuginfod_end (client);
+ 
+       /* Only free build_id if we allocated space for a hex string
+-         in get_build_id ().  */
++	 in get_build_id ().  */
+       if (build_id_len == 0)
+-        free (build_id);
++	free (build_id);
+ 
+       if (fd >= 0)
+-        {
+-          /* File successfully retrieved. Close fd since we want to
+-             use open_debug_file () on filename instead.  */
+-          close (fd);
+-          return true;
+-        }
++	{
++	  /* File successfully retrieved. Close fd since we want to
++	     use open_debug_file () on filename instead.  */
++	  close (fd);
++	  return true;
++	}
+     }
+ 
+   return false;
+@@ -11500,7 +11500,7 @@ load_separate_debug_info (const char *
+ 			  parse_func_type         parse_func,
+ 			  check_func_type         check_func,
+ 			  void *                  func_data,
+-                          void *                  file ATTRIBUTE_UNUSED)
++			  void *                  file ATTRIBUTE_UNUSED)
+ {
+   const char *   separate_filename;
+   char *         debug_filename;
+@@ -11616,11 +11616,11 @@ load_separate_debug_info (const char *
+ 						 & tmp_filename,
+ 						 file))
+       {
+-        /* File successfully downloaded from server, replace
+-           debug_filename with the file's path.  */
+-        free (debug_filename);
+-        debug_filename = tmp_filename;
+-        goto found;
++	/* File successfully downloaded from server, replace
++	   debug_filename with the file's path.  */
++	free (debug_filename);
++	debug_filename = tmp_filename;
++	goto found;
+       }
+   }
+ #endif
+@@ -11787,12 +11787,12 @@ load_build_id_debug_file (const char * m
+   /* In theory we should extract the contents of the section into
+      a note structure and then check the fields.  For now though
+      just use hard coded offsets instead:
+-     
++
+        Field  Bytes    Contents
+ 	NSize  0...3   4
+ 	DSize  4...7   8+
+ 	Type   8..11   3  (NT_GNU_BUILD_ID)
+-        Name   12.15   GNU\0
++	Name   12.15   GNU\0
+ 	Data   16....   */
+ 
+   /* FIXME: Check the name size, name and type fields.  */
+@@ -11804,7 +11804,7 @@ load_build_id_debug_file (const char * m
+       warn (_(".note.gnu.build-id data size is too small\n"));
+       return;
+     }
+-  
++
+   if (build_id_size > (section->size - 16))
+     {
+       warn (_(".note.gnu.build-id data size is too bug\n"));
+@@ -12100,10 +12100,6 @@ free_debug_memory (void)
+ 
+   free_all_abbrevs ();
+ 
+-  free (cu_abbrev_map);
+-  cu_abbrev_map = NULL;
+-  next_free_abbrev_map_entry = 0;
+-
+   free (shndx_pool);
+   shndx_pool = NULL;
+   shndx_pool_size = 0;
diff --git a/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-3.patch b/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-3.patch
new file mode 100644
index 0000000..04d06ed
--- /dev/null
+++ b/poky/meta/recipes-devtools/binutils/binutils/0014-CVE-2022-38128-3.patch
@@ -0,0 +1,95 @@
+From 695c6dfe7e85006b98c8b746f3fd5f913c94ebff Mon Sep 17 00:00:00 2001
+From: Alan Modra <amodra@gmail.com>
+Date: Thu, 21 Jul 2022 09:56:15 +0930
+Subject: [PATCH] PR29370, infinite loop in display_debug_abbrev
+
+The PR29370 testcase is a fuzzed object file with multiple
+.trace_abbrev sections.  Multiple .trace_abbrev or .debug_abbrev
+sections are not a violation of the DWARF standard.  The DWARF5
+standard even gives an example of multiple .debug_abbrev sections
+contained in groups.  Caching and lookup of processed abbrevs thus
+needs to be done by section and offset rather than base and offset.
+(Why base anyway?)  Or, since section contents are kept, by a pointer
+into the contents.
+
+	PR 29370
+	* dwarf.c (struct abbrev_list): Replace abbrev_base and
+	abbrev_offset with raw field.
+	(find_abbrev_list_by_abbrev_offset): Delete.
+	(find_abbrev_list_by_raw_abbrev): New function.
+	(process_abbrev_set): Set list->raw and list->next.
+	(find_and_process_abbrev_set): Replace abbrev list lookup with
+	new function.  Don't set list abbrev_base, abbrev_offset or next.
+
+Upstream-Status: Backport [https://sourceware.org/git/?p=binutils-gdb.git;a=patch;h=695c6dfe7e85006b98c8b746f3fd5f913c94ebff]
+
+Signed-off-by: Pgowda <pgowda.cve@gmail.com>
+---
+ binutils/dwarf.c | 19 ++++++-------------
+ 1 file changed, 6 insertions(+), 13 deletions(-)
+
+diff --git a/binutils/dwarf.c b/binutils/dwarf.c
+index 2fc352f74c5..99fb3566994 100644
+--- a/binutils/dwarf.c
++++ b/binutils/dwarf.c
+@@ -856,8 +856,7 @@ typedef struct abbrev_list
+ {
+   abbrev_entry *        first_abbrev;
+   abbrev_entry *        last_abbrev;
+-  dwarf_vma             abbrev_base;
+-  dwarf_vma             abbrev_offset;
++  unsigned char *       raw;
+   struct abbrev_list *  next;
+   unsigned char *       start_of_next_abbrevs;
+ }
+@@ -946,14 +945,12 @@ free_all_abbrevs (void)
+ }
+ 
+ static abbrev_list *
+-find_abbrev_list_by_abbrev_offset (dwarf_vma abbrev_base,
+-				   dwarf_vma abbrev_offset)
++find_abbrev_list_by_raw_abbrev (unsigned char *raw)
+ {
+   abbrev_list * list;
+ 
+   for (list = abbrev_lists; list != NULL; list = list->next)
+-    if (list->abbrev_base == abbrev_base
+-	&& list->abbrev_offset == abbrev_offset)
++    if (list->raw == raw)
+       return list;
+ 
+   return NULL;
+@@ -1040,6 +1037,7 @@ process_abbrev_set (struct dwarf_section
+   abbrev_list *list = xmalloc (sizeof (*list));
+   list->first_abbrev = NULL;
+   list->last_abbrev = NULL;
++  list->raw = start;
+ 
+   while (start < end)
+     {
+@@ -1055,6 +1053,7 @@ process_abbrev_set (struct dwarf_section
+ 	 the caller.  */
+       if (start == end || entry == 0)
+ 	{
++	  list->next = NULL;
+ 	  list->start_of_next_abbrevs = start != end ? start : NULL;
+ 	  return list;
+ 	}
+@@ -1144,16 +1143,10 @@ find_and_process_abbrev_set (struct dwar
+   unsigned char *end = section->start + abbrev_base + abbrev_size;
+   abbrev_list *list = NULL;
+   if (free_list)
+-    list = find_abbrev_list_by_abbrev_offset (abbrev_base, abbrev_offset);
++    list = find_abbrev_list_by_raw_abbrev (start);
+   if (list == NULL)
+     {
+       list = process_abbrev_set (section, start, end);
+-      if (list)
+-	{
+-	  list->abbrev_base = abbrev_base;
+-	  list->abbrev_offset = abbrev_offset;
+-	  list->next = NULL;
+-	}
+       if (free_list)
+ 	*free_list = list;
+     }
diff --git a/poky/meta/recipes-devtools/btrfs-tools/btrfs-tools_5.19.bb b/poky/meta/recipes-devtools/btrfs-tools/btrfs-tools_5.19.1.bb
similarity index 97%
rename from poky/meta/recipes-devtools/btrfs-tools/btrfs-tools_5.19.bb
rename to poky/meta/recipes-devtools/btrfs-tools/btrfs-tools_5.19.1.bb
index 4f116a8..db8026e 100644
--- a/poky/meta/recipes-devtools/btrfs-tools/btrfs-tools_5.19.bb
+++ b/poky/meta/recipes-devtools/btrfs-tools/btrfs-tools_5.19.1.bb
@@ -18,7 +18,7 @@
 SRC_URI = "git://git.kernel.org/pub/scm/linux/kernel/git/kdave/btrfs-progs.git;branch=master \
            file://0001-Add-a-possibility-to-specify-where-python-modules-ar.patch \
            "
-SRCREV = "96b83b16158f3b87037085761bf212e958473767"
+SRCREV = "e1d9dab326e043696e6648aba4ccf90874d1d3d1"
 S = "${WORKDIR}/git"
 
 PACKAGECONFIG ??= " \
diff --git a/poky/meta/recipes-devtools/ccache/ccache_4.6.2.bb b/poky/meta/recipes-devtools/ccache/ccache_4.6.3.bb
similarity index 66%
rename from poky/meta/recipes-devtools/ccache/ccache_4.6.2.bb
rename to poky/meta/recipes-devtools/ccache/ccache_4.6.3.bb
index dbac022..bc1be92 100644
--- a/poky/meta/recipes-devtools/ccache/ccache_4.6.2.bb
+++ b/poky/meta/recipes-devtools/ccache/ccache_4.6.3.bb
@@ -11,16 +11,12 @@
 
 DEPENDS = "zstd"
 
-SRC_URI = "https://github.com/ccache/ccache/releases/download/v${PV}/${BP}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/${BP}.tar.gz \
            file://0001-xxhash.h-Fix-build-with-gcc-12.patch \
-           file://0001-Include-time.h-for-time_t.patch \
-           file://0002-config-Include-sys-types.h-for-mode_t-defintion.patch \
-"
-SRC_URI[sha256sum] = "6a746a9bed01585388b68e2d58af2e77741fc8d66bc360b5a0b4c41fc284dafe"
+           "
+SRC_URI[sha256sum] = "f46ba3706ad80c30d4d5874dee2bf9227a7fcd0ccaac31b51919a3053d84bd05"
 
-UPSTREAM_CHECK_URI = "https://github.com/ccache/ccache/releases/"
-
-inherit cmake
+inherit cmake github-releases
 
 PATCHTOOL = "patch"
 
diff --git a/poky/meta/recipes-devtools/ccache/files/0001-Include-time.h-for-time_t.patch b/poky/meta/recipes-devtools/ccache/files/0001-Include-time.h-for-time_t.patch
deleted file mode 100644
index d752eb0..0000000
--- a/poky/meta/recipes-devtools/ccache/files/0001-Include-time.h-for-time_t.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From 590c656838a9b3769a7a855fb1891bfa8d8878ad Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Tue, 23 Aug 2022 10:27:21 -0700
-Subject: [PATCH] Include time.h for time_t
-
-Fixes
-src/core/Statistics.hpp:41:37: error: 'time_t' has not been declared
-|    41 |                                     time_t last_updated,
-|       |                                     ^~~~~~
-
-Upstream-Status: Submitted [https://github.com/ccache/ccache/pull/1145]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
-
----
- src/core/Statistics.hpp | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/src/core/Statistics.hpp b/src/core/Statistics.hpp
-index 54f32e9..eb80e1c 100644
---- a/src/core/Statistics.hpp
-+++ b/src/core/Statistics.hpp
-@@ -21,6 +21,7 @@
- #include <core/StatisticsCounters.hpp>
- 
- #include <cstdint>
-+#include <ctime>
- #include <string>
- #include <unordered_map>
- #include <vector>
diff --git a/poky/meta/recipes-devtools/ccache/files/0002-config-Include-sys-types.h-for-mode_t-defintion.patch b/poky/meta/recipes-devtools/ccache/files/0002-config-Include-sys-types.h-for-mode_t-defintion.patch
deleted file mode 100644
index 0fd7760..0000000
--- a/poky/meta/recipes-devtools/ccache/files/0002-config-Include-sys-types.h-for-mode_t-defintion.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From f98b390a2d323f7f92fb0492b0943d201afe5b8f Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Tue, 23 Aug 2022 10:40:53 -0700
-Subject: [PATCH] config: Include sys/types.h for mode_t defintion
-
-Upstream-Status: Submitted [https://github.com/ccache/ccache/pull/1145]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
-
----
- src/Config.hpp | 2 ++
- 1 file changed, 2 insertions(+)
-
-diff --git a/src/Config.hpp b/src/Config.hpp
-index a9e08ec..9e7af40 100644
---- a/src/Config.hpp
-+++ b/src/Config.hpp
-@@ -25,6 +25,8 @@
- 
- #include "third_party/nonstd/optional.hpp"
- 
-+#include <sys/types.h>
-+
- #include <cstdint>
- #include <functional>
- #include <limits>
diff --git a/poky/meta/recipes-devtools/cmake/cmake/OEToolchainConfig.cmake b/poky/meta/recipes-devtools/cmake/cmake/OEToolchainConfig.cmake
index 3ddef12..d6a1e04 100644
--- a/poky/meta/recipes-devtools/cmake/cmake/OEToolchainConfig.cmake
+++ b/poky/meta/recipes-devtools/cmake/cmake/OEToolchainConfig.cmake
@@ -11,10 +11,7 @@
 
 set(CMAKE_FIND_LIBRARY_CUSTOM_LIB_SUFFIX "$ENV{OE_CMAKE_FIND_LIBRARY_CUSTOM_LIB_SUFFIX}")
 
-# Set CMAKE_SYSTEM_PROCESSOR from the sysroot name (assuming processor-distro-os).
-if ($ENV{SDKTARGETSYSROOT} MATCHES "/sysroots/([a-zA-Z0-9_-]+)-.+-.+")
-  set(CMAKE_SYSTEM_PROCESSOR ${CMAKE_MATCH_1})
-endif()
+set( CMAKE_SYSTEM_PROCESSOR $ENV{OECORE_TARGET_ARCH} )
 
 # Include the toolchain configuration subscripts
 file( GLOB toolchain_config_files "${CMAKE_CURRENT_LIST_FILE}.d/*.cmake" )
diff --git a/poky/meta/recipes-devtools/dnf/dnf_4.13.0.bb b/poky/meta/recipes-devtools/dnf/dnf_4.14.0.bb
similarity index 93%
rename from poky/meta/recipes-devtools/dnf/dnf_4.13.0.bb
rename to poky/meta/recipes-devtools/dnf/dnf_4.14.0.bb
index bf89ceb..62df8c4 100644
--- a/poky/meta/recipes-devtools/dnf/dnf_4.13.0.bb
+++ b/poky/meta/recipes-devtools/dnf/dnf_4.14.0.bb
@@ -18,7 +18,7 @@
            file://0001-dnf-write-the-log-lock-to-root.patch \
            "
 
-SRCREV = "ef09e48fc3ebbbaf4dfa478e6e0532706506f091"
+SRCREV = "e50875b3f5790f70720bdb670e1dd2bf4d828744"
 UPSTREAM_CHECK_GITTAGREGEX = "(?P<pver>\d+(\.\d+)+)"
 
 S = "${WORKDIR}/git"
@@ -88,3 +88,9 @@
 SYSTEMD_AUTO_ENABLE ?= "disable"
 
 SKIP_RECIPE[dnf] ?= "${@bb.utils.contains('PACKAGE_CLASSES', 'package_rpm', '', 'does not build without package_rpm in PACKAGE_CLASSES due disabled rpm support in libsolv', d)}"
+
+# Packages for testing purposes
+PACKAGES += "${PN}-test-main ${PN}-test-dep"
+ALLOW_EMPTY:${PN}-test-main = "1"
+ALLOW_EMPTY:${PN}-test-dep = "1"
+RRECOMMENDS:${PN}-test-main = "${PN}-test-dep"
diff --git a/poky/meta/recipes-devtools/dosfstools/dosfstools_4.2.bb b/poky/meta/recipes-devtools/dosfstools/dosfstools_4.2.bb
index 289d939..47d81da 100644
--- a/poky/meta/recipes-devtools/dosfstools/dosfstools_4.2.bb
+++ b/poky/meta/recipes-devtools/dosfstools/dosfstools_4.2.bb
@@ -9,13 +9,11 @@
 LICENSE = "GPL-3.0-only"
 LIC_FILES_CHKSUM = "file://COPYING;md5=d32239bcb673463ab874e80d47fae504"
 
-SRC_URI = "https://github.com/dosfstools/dosfstools/releases/download/v${PV}/${BP}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/${BP}.tar.gz \
           "
 SRC_URI[sha256sum] = "64926eebf90092dca21b14259a5301b7b98e7b1943e8a201c7d726084809b527"
 
-UPSTREAM_CHECK_URI = "https://github.com/dosfstools/dosfstools/releases"
-
-inherit autotools gettext pkgconfig update-alternatives
+inherit autotools gettext pkgconfig update-alternatives github-releases
 
 EXTRA_OECONF = "--enable-compat-symlinks --without-iconv"
 
diff --git a/poky/meta/recipes-devtools/elfutils/elfutils_0.187.bb b/poky/meta/recipes-devtools/elfutils/elfutils_0.187.bb
index 561112c..40c51c8 100644
--- a/poky/meta/recipes-devtools/elfutils/elfutils_0.187.bb
+++ b/poky/meta/recipes-devtools/elfutils/elfutils_0.187.bb
@@ -21,6 +21,7 @@
            file://run-ptest \
            file://ptest.patch \
            file://0001-tests-Makefile.am-compile-test_nlist-with-standard-C.patch \
+           file://0001-tests-Add-libeu-to-tests-needing-error-API.patch \
            "
 SRC_URI:append:libc-musl = " \
            file://0003-musl-utils.patch \
@@ -29,7 +30,6 @@
 SRC_URI[sha256sum] = "e70b0dfbe610f90c4d1fe0d71af142a4e25c3c4ef9ebab8d2d72b65159d454c8"
 
 inherit autotools gettext ptest pkgconfig
-PTEST_ENABLED:libc-musl = "0"
 
 EXTRA_OECONF = "--program-prefix=eu-"
 
@@ -66,6 +66,7 @@
 		# copy the files which needed by the cases
 		TEST_FILES="strip strip.o addr2line elfcmp objdump readelf size.o nm.o nm elflint elfcompress elfclassify stack unstrip"
 		install -d -m 755                       ${D}${PTEST_PATH}/src
+		install -d -m 755                       ${D}${PTEST_PATH}/lib
 		install -d -m 755                       ${D}${PTEST_PATH}/libelf
 		install -d -m 755                       ${D}${PTEST_PATH}/libdw
 		install -d -m 755                       ${D}${PTEST_PATH}/libdwfl
@@ -83,6 +84,7 @@
 		cp ${D}${libdir}/libasm-${PV}.so ${D}${PTEST_PATH}/libasm/libasm.so
 		cp ${B}/libcpu/libcpu.a ${D}${PTEST_PATH}/libcpu/
 		cp ${B}/libebl/libebl.a ${D}${PTEST_PATH}/libebl/
+		cp ${B}/lib/libeu.a ${D}${PTEST_PATH}/lib/
 		cp ${S}/libelf/*.h             ${D}${PTEST_PATH}/libelf/
 		cp ${S}/libdw/*.h              ${D}${PTEST_PATH}/libdw/
 		cp ${S}/libdwfl/*.h            ${D}${PTEST_PATH}/libdwfl/
diff --git a/poky/meta/recipes-devtools/elfutils/files/0001-tests-Add-libeu-to-tests-needing-error-API.patch b/poky/meta/recipes-devtools/elfutils/files/0001-tests-Add-libeu-to-tests-needing-error-API.patch
new file mode 100644
index 0000000..7c1bc87
--- /dev/null
+++ b/poky/meta/recipes-devtools/elfutils/files/0001-tests-Add-libeu-to-tests-needing-error-API.patch
@@ -0,0 +1,147 @@
+From c05c787070a390a2061bfcb845e1e35e8b1373b3 Mon Sep 17 00:00:00 2001
+From: Khem Raj <raj.khem@gmail.com>
+Date: Tue, 13 Sep 2022 09:33:00 -0700
+Subject: [PATCH] tests: Add libeu to tests needing error() API
+
+A local error() impelmentation is used when libc does not provide it,
+therefore link in libeu.a which contains this function in tests needing
+error() API
+
+Upstream-Status: Submitted [https://sourceware.org/pipermail/elfutils-devel/2022q3/005375.html]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+---
+ tests/Makefile.am | 60 +++++++++++++++++++++++------------------------
+ 1 file changed, 30 insertions(+), 30 deletions(-)
+
+diff --git a/tests/Makefile.am b/tests/Makefile.am
+index 3943e17..1acc49b 100644
+--- a/tests/Makefile.am
++++ b/tests/Makefile.am
+@@ -638,17 +638,17 @@ libeu = ../lib/libeu.a
+ arextract_LDADD = $(libelf)
+ arsymtest_LDADD = $(libelf)
+ newfile_LDADD = $(libelf)
+-saridx_LDADD = $(libelf)
++saridx_LDADD = $(libeu) $(libelf)
+ scnnames_LDADD = $(libelf)
+-sectiondump_LDADD = $(libelf)
++sectiondump_LDADD = $(libeu) $(libelf)
+ showptable_LDADD = $(libelf)
+ hash_LDADD = $(libelf)
+ test_nlist_CFLAGS =-g -O0 $(EXTRA_NLIST_CFLAGS)
+ test_nlist_LDADD = $(libelf)
+ msg_tst_LDADD = $(libelf)
+ newscn_LDADD = $(libelf)
+-early_offscn_LDADD = $(libelf)
+-ecp_LDADD = $(libelf)
++early_offscn_LDADD = $(libeu) $(libelf)
++ecp_LDADD = $(libeu) $(libelf)
+ update1_LDADD = $(libelf)
+ update2_LDADD = $(libelf)
+ update3_LDADD = $(libdw) $(libelf)
+@@ -662,12 +662,12 @@ get_files_LDADD = $(libdw) $(libelf)
+ next_files_LDADD = $(libdw) $(libelf)
+ get_aranges_LDADD = $(libdw) $(libelf)
+ allfcts_LDADD = $(libdw) $(libelf)
+-line2addr_LDADD = $(libdw) $(argp_LDADD)
+-addrscopes_LDADD = $(libdw) $(argp_LDADD)
+-funcscopes_LDADD = $(libdw) $(argp_LDADD)
+-funcretval_LDADD = $(libdw) $(argp_LDADD)
+-allregs_LDADD = $(libdw) $(argp_LDADD)
+-find_prologues_LDADD = $(libdw) $(argp_LDADD)
++line2addr_LDADD = $(libeu) $(libdw) $(argp_LDADD)
++addrscopes_LDADD = $(libeu) $(libdw) $(argp_LDADD)
++funcscopes_LDADD = $(libeu) $(libdw) $(argp_LDADD)
++funcretval_LDADD = $(libeu) $(libdw) $(argp_LDADD)
++allregs_LDADD = $(libeu) $(libdw) $(argp_LDADD)
++find_prologues_LDADD = $(libeu) $(libdw) $(argp_LDADD)
+ #show_ciefde_LDADD = ../libdwarf/libdwarf.so $(libelf)
+ asm_tst1_LDADD = $(libasm) $(libebl) $(libelf) $(libdw)
+ asm_tst2_LDADD = $(libasm) $(libebl) $(libelf) $(libdw)
+@@ -678,19 +678,19 @@ asm_tst6_LDADD = $(libasm) $(libebl) $(libelf) $(libdw)
+ asm_tst7_LDADD = $(libasm) $(libebl) $(libelf) $(libdw)
+ asm_tst8_LDADD = $(libasm) $(libebl) $(libelf) $(libdw)
+ asm_tst9_LDADD = $(libasm) $(libebl) $(libelf) $(libdw)
+-dwflmodtest_LDADD = $(libdw) $(libebl) $(libelf) $(argp_LDADD)
+-rdwrmmap_LDADD = $(libelf)
++dwflmodtest_LDADD = $(libeu) $(libdw) $(libebl) $(libelf) $(argp_LDADD)
++rdwrmmap_LDADD = $(libeu) $(libelf)
+ dwfl_bug_addr_overflow_LDADD = $(libdw) $(libebl) $(libelf)
+ arls_LDADD = $(libelf)
+-dwfl_bug_fd_leak_LDADD = $(libdw) $(libebl) $(libelf)
++dwfl_bug_fd_leak_LDADD = $(libeu) $(libdw) $(libebl) $(libelf)
+ dwfl_bug_report_LDADD = $(libdw) $(libebl) $(libelf)
+-dwfl_bug_getmodules_LDADD = $(libdw) $(libebl) $(libelf)
+-dwfl_addr_sect_LDADD = $(libdw) $(libebl) $(libelf) $(argp_LDADD)
++dwfl_bug_getmodules_LDADD = $(libeu) $(libdw) $(libebl) $(libelf)
++dwfl_addr_sect_LDADD = $(libeu) $(libdw) $(libebl) $(libelf) $(argp_LDADD)
+ dwarf_getmacros_LDADD = $(libdw)
+ dwarf_ranges_LDADD = $(libdw)
+ dwarf_getstring_LDADD = $(libdw)
+ addrcfi_LDADD = $(libdw) $(libebl) $(libelf) $(argp_LDADD)
+-dwarfcfi_LDADD = $(libdw) $(libelf)
++dwarfcfi_LDADD = $(libeu) $(libdw) $(libelf)
+ test_flag_nobits_LDADD = $(libelf)
+ rerequest_tag_LDADD = $(libdw)
+ alldts_LDADD = $(libdw) $(libelf)
+@@ -699,35 +699,35 @@ typeiter2_LDADD = $(libdw) $(libelf)
+ low_high_pc_LDADD = $(libdw) $(libelf) $(argp_LDADD)
+ test_elf_cntl_gelf_getshdr_LDADD = $(libelf)
+ dwflsyms_LDADD = $(libdw) $(libelf) $(argp_LDADD)
+-dwfllines_LDADD = $(libdw) $(libelf) $(argp_LDADD)
+-dwfl_report_elf_align_LDADD = $(libdw)
++dwfllines_LDADD = $(libeu) $(libdw) $(libelf) $(argp_LDADD)
++dwfl_report_elf_align_LDADD = $(libeu) $(libdw)
+ dwfl_report_segment_contiguous_LDADD = $(libdw) $(libebl) $(libelf)
+-varlocs_LDADD = $(libdw) $(libelf) $(argp_LDADD)
+-backtrace_LDADD = $(libdw) $(libelf) $(argp_LDADD)
++varlocs_LDADD = $(libeu) $(libdw) $(libelf) $(argp_LDADD)
++backtrace_LDADD = $(libeu) $(libdw) $(libelf) $(argp_LDADD)
+ # backtrace-child-biarch also uses those *_CFLAGS and *_LDLAGS variables:
+ backtrace_child_CFLAGS = $(fpie_CFLAGS)
+ backtrace_child_LDFLAGS = -pie -pthread
+ backtrace_child_biarch_SOURCES = backtrace-child.c
+-backtrace_data_LDADD = $(libdw) $(libelf)
++backtrace_data_LDADD = $(libeu) $(libdw) $(libelf)
+ backtrace_dwarf_CFLAGS = -Wno-unused-parameter
+-backtrace_dwarf_LDADD = $(libdw) $(libelf)
+-debuglink_LDADD = $(libdw) $(libelf)
+-debugaltlink_LDADD = $(libdw) $(libelf)
+-buildid_LDADD = $(libdw) $(libelf)
++backtrace_dwarf_LDADD = $(libeu) $(libdw) $(libelf)
++debuglink_LDADD = $(libeu) $(libdw) $(libelf)
++debugaltlink_LDADD = $(libeu) $(libdw) $(libelf)
++buildid_LDADD = $(libeu) $(libdw) $(libelf)
+ deleted_LDADD = ./deleted-lib.so
+ deleted_lib_so_LDFLAGS = -shared
+ deleted_lib_so_CFLAGS = $(fpic_CFLAGS) -fasynchronous-unwind-tables
+ aggregate_size_LDADD = $(libdw) $(libelf) $(argp_LDADD)
+ peel_type_LDADD = $(libdw) $(libelf) $(argp_LDADD)
+-vdsosyms_LDADD = $(libdw) $(libelf)
+-getsrc_die_LDADD = $(libdw) $(libelf)
++vdsosyms_LDADD = $(libeu) $(libdw) $(libelf)
++getsrc_die_LDADD = $(libeu) $(libdw) $(libelf)
+ strptr_LDADD = $(libelf)
+ newdata_LDADD = $(libelf)
+ elfstrtab_LDADD = $(libelf)
+-dwfl_proc_attach_LDADD = $(libdw)
++dwfl_proc_attach_LDADD = $(libeu) $(libdw)
+ dwfl_proc_attach_LDFLAGS = -pthread -rdynamic $(AM_LDFLAGS)
+ elfshphehdr_LDADD =$(libelf)
+-elfstrmerge_LDADD = $(libdw) $(libelf)
++elfstrmerge_LDADD = $(libeu) $(libdw) $(libelf)
+ dwelfgnucompressed_LDADD = $(libelf) $(libdw)
+ elfgetchdr_LDADD = $(libelf) $(libdw)
+ elfgetzdata_LDADD = $(libelf)
+@@ -743,7 +743,7 @@ get_units_split_LDADD = $(libdw)
+ attr_integrate_skel_LDADD = $(libdw)
+ all_dwarf_ranges_LDADD = $(libdw)
+ unit_info_LDADD = $(libdw)
+-next_cfi_LDADD = $(libelf) $(libdw)
++next_cfi_LDADD = $(libeu) $(libelf) $(libdw)
+ elfcopy_LDADD = $(libelf)
+ addsections_LDADD = $(libelf)
+ debuginfod_build_id_find_LDADD = $(libelf) $(libdw)
+-- 
+2.37.3
+
diff --git a/poky/meta/recipes-devtools/file/file_5.42.bb b/poky/meta/recipes-devtools/file/file_5.43.bb
similarity index 96%
rename from poky/meta/recipes-devtools/file/file_5.42.bb
rename to poky/meta/recipes-devtools/file/file_5.43.bb
index 8efcf09..102d0fd 100644
--- a/poky/meta/recipes-devtools/file/file_5.42.bb
+++ b/poky/meta/recipes-devtools/file/file_5.43.bb
@@ -13,7 +13,7 @@
 
 SRC_URI = "git://github.com/file/file.git;branch=master;protocol=https"
 
-SRCREV = "a42b38690579de23403e0aff9b7b2f9cdf55f534"
+SRCREV = "011778a2877f75597ed83c1a2716d917770920ee"
 S = "${WORKDIR}/git"
 
 inherit autotools update-alternatives
diff --git a/poky/meta/recipes-devtools/flex/flex_2.6.4.bb b/poky/meta/recipes-devtools/flex/flex_2.6.4.bb
index 8736b35..15cf6f5 100644
--- a/poky/meta/recipes-devtools/flex/flex_2.6.4.bb
+++ b/poky/meta/recipes-devtools/flex/flex_2.6.4.bb
@@ -12,7 +12,7 @@
 LIC_FILES_CHKSUM = "file://COPYING;md5=e4742cf92e89040b39486a6219b68067 \
                     file://src/gettext.h;beginline=1;endline=17;md5=9c05dda2f58d89b850c399cf22e1a00c"
 
-SRC_URI = "https://github.com/westes/flex/releases/download/v${PV}/flex-${PV}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/flex-${PV}.tar.gz \
            file://run-ptest \
            file://0001-tests-add-a-target-for-building-tests-without-runnin.patch \
            ${@bb.utils.contains('PTEST_ENABLED', '1', '', 'file://disable-tests.patch', d)} \
@@ -24,16 +24,14 @@
 SRC_URI[md5sum] = "2882e3179748cc9f9c23ec593d6adc8d"
 SRC_URI[sha256sum] = "e87aae032bf07c26f85ac0ed3250998c37621d95f8bd748b31f15b33c45ee995"
 
-# Flex has moved to github from 2.6.1 onwards
-UPSTREAM_CHECK_URI = "https://github.com/westes/flex/releases"
-UPSTREAM_CHECK_REGEX = "flex-(?P<pver>\d+(\.\d+)+)\.tar"
+GITHUB_BASE_URI = "https://github.com/westes/flex/releases"
 
 # Disputed - yes there is stack exhaustion but no bug and it is building the
 # parser, not running it, effectively similar to a compiler ICE. Upstream no plans to address
 # https://github.com/westes/flex/issues/414
 CVE_CHECK_IGNORE += "CVE-2019-6293"
 
-inherit autotools gettext texinfo ptest
+inherit autotools gettext texinfo ptest github-releases
 
 M4 = "${bindir}/m4"
 M4:class-native = "${STAGING_BINDIR_NATIVE}/m4"
diff --git a/poky/meta/recipes-devtools/gcc/gcc-12.2.inc b/poky/meta/recipes-devtools/gcc/gcc-12.2.inc
index 572fd8b..0dbbeca 100644
--- a/poky/meta/recipes-devtools/gcc/gcc-12.2.inc
+++ b/poky/meta/recipes-devtools/gcc/gcc-12.2.inc
@@ -49,7 +49,6 @@
            file://0009-gcc-armv4-pass-fix-v4bx-to-linker-to-support-EABI.patch \
            file://0010-Use-the-multilib-config-files-from-B-instead-of-usin.patch \
            file://0011-Avoid-using-libdir-from-.la-which-usually-points-to-.patch \
-           file://0012-export-CPP.patch \
            file://0013-Ensure-target-gcc-headers-can-be-included.patch \
            file://0014-Don-t-search-host-directory-during-relink-if-inst_pr.patch \
            file://0015-libcc1-fix-libcc1-s-install-path-and-rpath.patch \
@@ -59,10 +58,8 @@
            file://0019-Re-introduce-spe-commandline-options.patch \
            file://0020-libgcc_s-Use-alias-for-__cpu_indicator_init-instead-.patch \
            file://0021-gentypes-genmodes-Do-not-use-__LINE__-for-maintainin.patch \
-           file://0022-mingw32-Enable-operation_not_supported.patch \
            file://0023-libatomic-Do-not-enforce-march-on-aarch64.patch \
            file://0024-Fix-install-path-of-linux64.h.patch \
-           file://0025-Move-sched.h-include-ahead-of-user-headers.patch \
            file://0026-rust-recursion-limit.patch \
            file://prefix-map-realpath.patch \
            file://hardcoded-paths.patch \
diff --git a/poky/meta/recipes-devtools/gcc/gcc-common.inc b/poky/meta/recipes-devtools/gcc/gcc-common.inc
index 2abc0e3..d3b3693 100644
--- a/poky/meta/recipes-devtools/gcc/gcc-common.inc
+++ b/poky/meta/recipes-devtools/gcc/gcc-common.inc
@@ -32,6 +32,16 @@
 
 get_gcc_float_setting[vardepvalue] = "${@get_gcc_float_setting(bb, d)}"
 
+def get_gcc_x86_64_arch_setting(bb, d):
+    import re
+    march = re.match(r'^.*-march=([^\s]*)', d.getVar('TUNE_CCARGS'))
+    if march:
+        return "--with-arch=%s " % march.group(1)
+    # The earliest supported x86-64 CPU
+    return "--with-arch=core2"
+
+get_gcc_x86_64_arch_setting[vardepvalue] = "${@get_gcc_x86_64_arch_setting(bb, d)}"
+
 def get_gcc_mips_plt_setting(bb, d):
     if d.getVar('TRANSLATED_TARGET_ARCH') in [ 'mips', 'mipsel' ] and bb.utils.contains('DISTRO_FEATURES', 'mplt', True, False, d):
         return "--with-mips-plt"
diff --git a/poky/meta/recipes-devtools/gcc/gcc-cross-canadian.inc b/poky/meta/recipes-devtools/gcc/gcc-cross-canadian.inc
index a87b446..c36e4cb 100644
--- a/poky/meta/recipes-devtools/gcc/gcc-cross-canadian.inc
+++ b/poky/meta/recipes-devtools/gcc/gcc-cross-canadian.inc
@@ -9,6 +9,7 @@
 
 require gcc-configure-common.inc
 
+EXTRA_OECONF += "--with-plugin-ld=ld"
 EXTRA_OECONF_PATHS = "\
     --with-gxx-include-dir=/not/exist${target_includedir}/c++/${BINV} \
     --with-build-time-tools=${STAGING_DIR_NATIVE}${prefix_native}/${TARGET_SYS}/bin \
@@ -134,8 +135,6 @@
 
 		ln -sf ${BINRELPATH}/${TARGET_PREFIX}$t$suffix $dest$t$suffix
 	done
-	t=real-ld
-	ln -sf ${BINRELPATH}/${TARGET_PREFIX}ld$suffix $dest$t$suffix
 
 	# libquadmath headers need to  be available in the gcc libexec dir
 	install -d ${D}${libdir}/gcc/${TARGET_SYS}/${BINV}/include/
diff --git a/poky/meta/recipes-devtools/gcc/gcc-target.inc b/poky/meta/recipes-devtools/gcc/gcc-target.inc
index cc65e99..7dac3ef 100644
--- a/poky/meta/recipes-devtools/gcc/gcc-target.inc
+++ b/poky/meta/recipes-devtools/gcc/gcc-target.inc
@@ -19,7 +19,7 @@
 EXTRA_OECONF:append:armv7a:class-target = " --with-arch=armv7-a${ARMFPARCHEXT}"
 EXTRA_OECONF:append:armv7ve:class-target = " --with-arch=armv7ve${ARMFPARCHEXT}"
 EXTRA_OECONF:append:arc:class-target = " --with-cpu=${TUNE_PKGARCH}"
-EXTRA_OECONF:append:x86-64:class-target = " --with-arch=native"
+EXTRA_OECONF:append:x86-64:class-target = " ${@get_gcc_x86_64_arch_setting(bb, d)}"
 
 # libcc1 requres gcc_cv_objdump when cross build, but gcc_cv_objdump is
 # set in subdir gcc, so subdir libcc1 can't use it, export it here to
diff --git a/poky/meta/recipes-devtools/gcc/gcc/0002-gcc-poison-system-directories.patch b/poky/meta/recipes-devtools/gcc/gcc/0002-gcc-poison-system-directories.patch
index 5a51ae7..bfec447 100644
--- a/poky/meta/recipes-devtools/gcc/gcc/0002-gcc-poison-system-directories.patch
+++ b/poky/meta/recipes-devtools/gcc/gcc/0002-gcc-poison-system-directories.patch
@@ -185,6 +185,19 @@
  
  /* Pass -d* flags, possibly modifying -dumpdir, -dumpbase et al.
  
+@@ -1265,7 +1270,11 @@ static const char *cc1_options =
+  %{coverage:-fprofile-arcs -ftest-coverage}\
+  %{fprofile-arcs|fprofile-generate*|coverage:\
+    %{!fprofile-update=single:\
+-     %{pthread:-fprofile-update=prefer-atomic}}}";
++     %{pthread:-fprofile-update=prefer-atomic}}}"
++#ifdef POISON_BY_DEFAULT
++ " -Werror=poison-system-directories"
++#endif
++  ;
+
+ static const char *asm_options =
+ "%{-target-help:%:print-asm-header()} "
 diff --git a/gcc/incpath.cc b/gcc/incpath.cc
 index bd2a97938eb..c80f100f476 100644
 --- a/gcc/incpath.cc
diff --git a/poky/meta/recipes-devtools/gcc/gcc/0012-export-CPP.patch b/poky/meta/recipes-devtools/gcc/gcc/0012-export-CPP.patch
deleted file mode 100644
index 7e1ebef..0000000
--- a/poky/meta/recipes-devtools/gcc/gcc/0012-export-CPP.patch
+++ /dev/null
@@ -1,50 +0,0 @@
-From 20afebc61199cd74481b0b831c1b56465cd37fa0 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Fri, 20 Feb 2015 09:40:59 +0000
-Subject: [PATCH] export CPP
-
-The OE environment sets and exports CPP as being the target gcc. When
-building gcc-cross-canadian for a mingw targetted sdk, the following can be found
-in build.x86_64-pokysdk-mingw32.i586-poky-linux/build-x86_64-linux/libiberty/config.log:
-
-configure:3641: checking for _FILE_OFFSET_BITS value needed for large files
-configure:3666: gcc  -c -isystem/media/build1/poky/build/tmp/sysroots/x86_64-linux/usr/include -O2 -pipe  conftest.c >&5
-configure:3666: $? = 0
-configure:3698: result: no
-configure:3786: checking how to run the C preprocessor
-configure:3856: result: x86_64-pokysdk-mingw32-gcc -E --sysroot=/media/build1/poky/build/tmp/sysroots/x86_64-nativesdk-mingw32-pokysdk-mingw32
-configure:3876: x86_64-pokysdk-mingw32-gcc -E --sysroot=/media/build1/poky/build/tmp/sysroots/x86_64-nativesdk-mingw32-pokysdk-mingw32 conftest.c
-configure:3876: $? = 0
-
-Note this is a *build* target (in build-x86_64-linux) so it should be
-using the host "gcc", not x86_64-pokysdk-mingw32-gcc. Since the mingw32
-headers are very different, using the wrong cpp is a real problem. It is leaking
-into configure through the CPP variable. Ultimately this leads to build
-failures related to not being able to include a process.h file for pem-unix.c.
-
-The fix is to ensure we export a sane CPP value into the build
-environment when using build targets. We could define a CPP_FOR_BUILD value which may be
-the version which needs to be upstreamed but for now, this fix is good enough to
-avoid the problem.
-
-RP 22/08/2013
-
-Upstream-Status: Pending
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- Makefile.in | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/Makefile.in b/Makefile.in
-index 1d9c83cc566..11819667751 100644
---- a/Makefile.in
-+++ b/Makefile.in
-@@ -152,6 +152,7 @@ BUILD_EXPORTS = \
- 	AR="$(AR_FOR_BUILD)"; export AR; \
- 	AS="$(AS_FOR_BUILD)"; export AS; \
- 	CC="$(CC_FOR_BUILD)"; export CC; \
-+	CPP="$(CC_FOR_BUILD) -E"; export CPP; \
- 	CFLAGS="$(CFLAGS_FOR_BUILD)"; export CFLAGS; \
- 	CONFIG_SHELL="$(SHELL)"; export CONFIG_SHELL; \
- 	CPP="$(CPP_FOR_BUILD)"; export CPP; \
diff --git a/poky/meta/recipes-devtools/gcc/gcc/0022-mingw32-Enable-operation_not_supported.patch b/poky/meta/recipes-devtools/gcc/gcc/0022-mingw32-Enable-operation_not_supported.patch
deleted file mode 100644
index f88ad51..0000000
--- a/poky/meta/recipes-devtools/gcc/gcc/0022-mingw32-Enable-operation_not_supported.patch
+++ /dev/null
@@ -1,26 +0,0 @@
-From 061cf79e7b6e89fdf0f2630ddaebbf1d7b271ac3 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Tue, 12 May 2020 10:39:09 -0700
-Subject: [PATCH] mingw32: Enable operation_not_supported
-
-Fixes nativesdk build errors on mingw32 gcc-runtime
-
-Upstream-Status: Pending
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- libstdc++-v3/config/os/mingw32/error_constants.h | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/libstdc++-v3/config/os/mingw32/error_constants.h b/libstdc++-v3/config/os/mingw32/error_constants.h
-index da5f4c2ac85..e855c86267c 100644
---- a/libstdc++-v3/config/os/mingw32/error_constants.h
-+++ b/libstdc++-v3/config/os/mingw32/error_constants.h
-@@ -107,7 +107,7 @@ _GLIBCXX_BEGIN_NAMESPACE_VERSION
- #ifdef EPERM
-       operation_not_permitted = 		EPERM,
- #endif
--//    operation_not_supported = 		EOPNOTSUPP,
-+      operation_not_supported = 		EOPNOTSUPP,
- #ifdef EWOULDBLOCK
-       operation_would_block = 			EWOULDBLOCK,
- #endif
diff --git a/poky/meta/recipes-devtools/gcc/gcc/0025-Move-sched.h-include-ahead-of-user-headers.patch b/poky/meta/recipes-devtools/gcc/gcc/0025-Move-sched.h-include-ahead-of-user-headers.patch
deleted file mode 100644
index d4aeacf..0000000
--- a/poky/meta/recipes-devtools/gcc/gcc/0025-Move-sched.h-include-ahead-of-user-headers.patch
+++ /dev/null
@@ -1,56 +0,0 @@
-From 7422adfb471f4b4f2ec870124064632d55f72e50 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Mon, 11 Apr 2022 15:46:18 -0700
-Subject: [PATCH] Move sched.h include ahead of user headers
-
-Fix attempt to use poisoned calloc error, this moves the sched.h before
-using system.h from gcc headers which includes #pragma GCC poison calloc
-
-Fixes
-In file included from /mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/pthread.h:30,
-                 from /mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/c++/12.0.1/aarch64-yoe-linux-musl/bits/gthr-default.h:35,
-                 from /mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/c++/12.0.1/aarch64-yoe-linux-musl/bits/gthr.h:148,
-                 from /mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/c++/12.0.1/ext/atomicity.h:35,
-                 from /mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/c++/12.0.1/bits/shared_ptr_base.h:61,
-                 from /mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/c++/12.0.1/bits/shared_ptr.h:53,
-                 from /mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/c++/12.0.1/memory:77,
-                 from ../../../../../../../work-shared/gcc-12.0.1-r0/gcc-12-20220410/libcc1/deleter.hh:23,
-                 from ../../../../../../../work-shared/gcc-12.0.1-r0/gcc-12-20220410/libcc1/rpc.hh:25,
-                 from ../../../../../../../work-shared/gcc-12.0.1-r0/gcc-12-20220410/libcc1/libcc1plugin.cc:67:
-/mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/sched.h:84:7: error: attempt to use poisoned "calloc"
-   84 | void *calloc(size_t, size_t);
-      |       ^
-/mnt/b/yoe/master/build/tmp/work/cortexa72-yoe-linux-musl/gcc/12.0.1-r0/recipe-sysroot/usr/include/sched.h:124:36: error: attempt to use poisoned "calloc"
-  124 | #define CPU_ALLOC(n) ((cpu_set_t *)calloc(1,CPU_ALLOC_SIZE(n)))
-
-Upstream-Status: Pending
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- libcc1/libcc1plugin.cc | 1 +
- libcc1/libcp1plugin.cc | 1 +
- 2 files changed, 2 insertions(+)
-
-diff --git a/libcc1/libcc1plugin.cc b/libcc1/libcc1plugin.cc
-index 12ab5a57c8d..fff9bfab18b 100644
---- a/libcc1/libcc1plugin.cc
-+++ b/libcc1/libcc1plugin.cc
-@@ -17,6 +17,7 @@
-    along with GCC; see the file COPYING3.  If not see
-    <http://www.gnu.org/licenses/>.  */
- 
-+#include <sched.h>
- #include <cc1plugin-config.h>
- 
- #undef PACKAGE_NAME
-diff --git a/libcc1/libcp1plugin.cc b/libcc1/libcp1plugin.cc
-index 83dab7f58b1..0b83ce7a09d 100644
---- a/libcc1/libcp1plugin.cc
-+++ b/libcc1/libcp1plugin.cc
-@@ -18,6 +18,7 @@
-    along with GCC; see the file COPYING3.  If not see
-    <http://www.gnu.org/licenses/>.  */
- 
-+#include <sched.h>
- #include <cc1plugin-config.h>
- 
- #undef PACKAGE_NAME
diff --git a/poky/meta/recipes-devtools/git/git_2.37.2.bb b/poky/meta/recipes-devtools/git/git_2.37.3.bb
similarity index 98%
rename from poky/meta/recipes-devtools/git/git_2.37.2.bb
rename to poky/meta/recipes-devtools/git/git_2.37.3.bb
index b7858e2..2eed85e 100644
--- a/poky/meta/recipes-devtools/git/git_2.37.2.bb
+++ b/poky/meta/recipes-devtools/git/git_2.37.3.bb
@@ -165,4 +165,4 @@
                  "
 EXTRA_OEMAKE += "NO_GETTEXT=1"
 
-SRC_URI[tarball.sha256sum] = "4c428908e3a2dca4174df6ef49acc995a4fdb1b45205a2c79794487a33bc06e5"
+SRC_URI[tarball.sha256sum] = "181f65587155ea48c682f63135678ec53055adf1532428752912d356e46b64a8"
diff --git a/poky/meta/recipes-devtools/go/go-native_1.19.bb b/poky/meta/recipes-devtools/go/go-native_1.19.bb
index 76c0ab7..ddf25b2 100644
--- a/poky/meta/recipes-devtools/go/go-native_1.19.bb
+++ b/poky/meta/recipes-devtools/go/go-native_1.19.bb
@@ -5,7 +5,7 @@
 
 inherit native
 
-SRC_URI:append = " https://dl.google.com/go/go1.4-bootstrap-20171003.tar.gz;name=bootstrap;subdir=go1.4"
+SRC_URI += "https://dl.google.com/go/go1.4-bootstrap-20171003.tar.gz;name=bootstrap;subdir=go1.4"
 SRC_URI[bootstrap.sha256sum] = "f4ff5b5eb3a3cae1c993723f3eab519c5bae18866b5e5f96fe1102f0cb5c3e52"
 
 export GOOS = "${BUILD_GOOS}"
diff --git a/poky/meta/recipes-devtools/go/go-runtime.inc b/poky/meta/recipes-devtools/go/go-runtime.inc
index e18339c..02601f7 100644
--- a/poky/meta/recipes-devtools/go/go-runtime.inc
+++ b/poky/meta/recipes-devtools/go/go-runtime.inc
@@ -50,6 +50,8 @@
 	rm -rf ${D}${libdir}/go/pkg/tool
 	rm -rf ${D}${libdir}/go/pkg/obj
 	rm -rf ${D}${libdir}/go/pkg/bootstrap
+	# the cmd directory is built for the native arch so if BUILD == TARGET
+	rm -rf ${D}${libdir}/go/pkg/${BUILD_GOTUPLE}/cmd
 	find src -mindepth 1 -maxdepth 1 -type d | while read srcdir; do
 		cp --preserve=mode,timestamps -R $srcdir ${D}${libdir}/go/src/
 	done
diff --git a/poky/meta/recipes-devtools/libcomps/libcomps_0.1.18.bb b/poky/meta/recipes-devtools/libcomps/libcomps_0.1.19.bb
similarity index 93%
rename from poky/meta/recipes-devtools/libcomps/libcomps_0.1.18.bb
rename to poky/meta/recipes-devtools/libcomps/libcomps_0.1.19.bb
index dd0a1f8..fa1fbc8 100644
--- a/poky/meta/recipes-devtools/libcomps/libcomps_0.1.18.bb
+++ b/poky/meta/recipes-devtools/libcomps/libcomps_0.1.19.bb
@@ -8,7 +8,7 @@
            file://0002-Do-not-set-PYTHON_INSTALL_DIR-by-running-python.patch \
            "
 
-SRCREV = "dee4ae37f7818709802de28c4d16fa823bd83ae2"
+SRCREV = "9322bdcf06630cc094f094f944d7d0e2cb798b73"
 
 S = "${WORKDIR}/git"
 
diff --git a/poky/meta/recipes-devtools/libdnf/libdnf_0.68.0.bb b/poky/meta/recipes-devtools/libdnf/libdnf_0.69.0.bb
similarity index 97%
rename from poky/meta/recipes-devtools/libdnf/libdnf_0.68.0.bb
rename to poky/meta/recipes-devtools/libdnf/libdnf_0.69.0.bb
index 86cf41c..da2550d 100644
--- a/poky/meta/recipes-devtools/libdnf/libdnf_0.68.0.bb
+++ b/poky/meta/recipes-devtools/libdnf/libdnf_0.69.0.bb
@@ -13,7 +13,7 @@
            file://0001-libdnf-dnf-context.cpp-do-not-try-to-access-BDB-data.patch \
            "
 
-SRCREV = "388e7699f8a75fa81aca05d09389acea7e489168"
+SRCREV = "5c6d9cd6e5955e7038722f091396607c60fcbdd1"
 UPSTREAM_CHECK_GITTAGREGEX = "(?P<pver>(?!4\.90)\d+(\.\d+)+)"
 
 S = "${WORKDIR}/git"
diff --git a/poky/meta/recipes-devtools/librepo/librepo/0001-metadata_downloader-Include-unistd.h-for-lseek.patch b/poky/meta/recipes-devtools/librepo/librepo/0001-metadata_downloader-Include-unistd.h-for-lseek.patch
deleted file mode 100644
index 22b3110..0000000
--- a/poky/meta/recipes-devtools/librepo/librepo/0001-metadata_downloader-Include-unistd.h-for-lseek.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From 5c63ec2e2d4726268ace85e5c61727cbd811d982 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sat, 27 Aug 2022 09:00:24 -0700
-Subject: [PATCH] metadata_downloader: Include unistd.h for lseek()
-
-This is found when compiling on musl systems
-
-Fixes
-
-metadata_downloader.c:331:9: error: call to undeclared function 'lseek'; ISO C99 and later do not support implicit function declarations [-Wimplicit-function-declaration]
-        lseek(fd_value, SEEK_SET, 0);
-        ^
-
-Upstream-Status: Submitted [https://github.com/rpm-software-management/librepo/pull/263]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- librepo/metadata_downloader.c | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/librepo/metadata_downloader.c b/librepo/metadata_downloader.c
-index 123c77b..6604255 100644
---- a/librepo/metadata_downloader.c
-+++ b/librepo/metadata_downloader.c
-@@ -24,6 +24,7 @@
- #include <assert.h>
- #include <string.h>
- #include <errno.h>
-+#include <unistd.h>
- #include <sys/stat.h>
- 
- #include "librepo/librepo.h"
--- 
-2.37.2
-
diff --git a/poky/meta/recipes-devtools/librepo/librepo_1.14.4.bb b/poky/meta/recipes-devtools/librepo/librepo_1.14.5.bb
similarity index 87%
rename from poky/meta/recipes-devtools/librepo/librepo_1.14.4.bb
rename to poky/meta/recipes-devtools/librepo/librepo_1.14.5.bb
index 2b8bd13..46cfce5 100644
--- a/poky/meta/recipes-devtools/librepo/librepo_1.14.4.bb
+++ b/poky/meta/recipes-devtools/librepo/librepo_1.14.5.bb
@@ -8,10 +8,9 @@
 SRC_URI = "git://github.com/rpm-software-management/librepo.git;branch=master;protocol=https \
            file://0002-Do-not-try-to-obtain-PYTHON_INSTALL_DIR-by-running-p.patch \
            file://0004-Set-gpgme-variables-with-pkg-config-not-with-cmake-m.patch \
-           file://0001-metadata_downloader-Include-unistd.h-for-lseek.patch \
            "
 
-SRCREV = "2bd1041c741c85bc196ca01dcca1eae6099eb742"
+SRCREV = "f4b915be5e04e88263097f65addfcac5919fef41"
 
 S = "${WORKDIR}/git"
 
diff --git a/poky/meta/recipes-devtools/meson/meson_0.63.1.bb b/poky/meta/recipes-devtools/meson/meson_0.63.2.bb
similarity index 93%
rename from poky/meta/recipes-devtools/meson/meson_0.63.1.bb
rename to poky/meta/recipes-devtools/meson/meson_0.63.2.bb
index 7f77a7d..890faac 100644
--- a/poky/meta/recipes-devtools/meson/meson_0.63.1.bb
+++ b/poky/meta/recipes-devtools/meson/meson_0.63.2.bb
@@ -7,7 +7,8 @@
 LICENSE = "Apache-2.0"
 LIC_FILES_CHKSUM = "file://COPYING;md5=3b83ef96387f14655fc854ddc3c6bd57"
 
-SRC_URI = "https://github.com/mesonbuild/meson/releases/download/${PV}/meson-${PV}.tar.gz \
+GITHUB_BASE_URI = "https://github.com/mesonbuild/meson/releases/"
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/meson-${PV}.tar.gz \
            file://meson-setup.py \
            file://meson-wrapper \
            file://0001-python-module-do-not-manipulate-the-environment-when.patch \
@@ -17,12 +18,9 @@
            file://0001-is_debianlike-always-return-False.patch \
            file://0001-Check-for-clang-before-guessing-gcc-or-lcc.patch \
            "
-SRC_URI[sha256sum] = "06fe13297213d6ff0121c5d5aab25a56ef938ffec57414ed6086fda272cb65e9"
+SRC_URI[sha256sum] = "16222f17ef76be0542c91c07994f9676ae879f46fc21c0c786a21ef2cb518bbf"
 
-UPSTREAM_CHECK_URI = "https://github.com/mesonbuild/meson/releases"
-UPSTREAM_CHECK_REGEX = "meson-(?P<pver>\d+(\.\d+)+)\.tar"
-
-inherit python_setuptools_build_meta
+inherit python_setuptools_build_meta github-releases
 
 RDEPENDS:${PN} = "ninja python3-modules python3-pkg-resources"
 
diff --git a/poky/meta/recipes-devtools/ninja/ninja_1.11.0.bb b/poky/meta/recipes-devtools/ninja/ninja_1.11.1.bb
similarity index 94%
rename from poky/meta/recipes-devtools/ninja/ninja_1.11.0.bb
rename to poky/meta/recipes-devtools/ninja/ninja_1.11.1.bb
index 4ec361c..255f5ef 100644
--- a/poky/meta/recipes-devtools/ninja/ninja_1.11.0.bb
+++ b/poky/meta/recipes-devtools/ninja/ninja_1.11.1.bb
@@ -6,7 +6,7 @@
 
 DEPENDS = "re2c-native ninja-native"
 
-SRCREV = "51edeeb063a82693573db43782d9e3733b2840e4"
+SRCREV = "a524bf3f6bacd1b4ad85d719eed2737d8562f27a"
 
 SRC_URI = "git://github.com/ninja-build/ninja.git;branch=release;protocol=https"
 UPSTREAM_CHECK_GITTAGREGEX = "v(?P<pver>.*)"
diff --git a/poky/meta/recipes-devtools/opkg/opkg_0.6.0.bb b/poky/meta/recipes-devtools/opkg/opkg_0.6.0.bb
index 7b351e8..4cd589c 100644
--- a/poky/meta/recipes-devtools/opkg/opkg_0.6.0.bb
+++ b/poky/meta/recipes-devtools/opkg/opkg_0.6.0.bb
@@ -39,6 +39,7 @@
 PACKAGECONFIG[curl] = "--enable-curl,--disable-curl,curl"
 PACKAGECONFIG[ssl-curl] = "--enable-ssl-curl,--disable-ssl-curl,curl openssl"
 PACKAGECONFIG[sha256] = "--enable-sha256,--disable-sha256"
+PACKAGECONFIG[zstd] = "--enable-zstd,--disable-zstd,zstd"
 PACKAGECONFIG[libsolv] = "--with-libsolv,--without-libsolv,libsolv"
 
 EXTRA_OECONF:class-native = "--localstatedir=/${@os.path.relpath('${localstatedir}', '${STAGING_DIR_NATIVE}')} --sysconfdir=/${@os.path.relpath('${sysconfdir}', '${STAGING_DIR_NATIVE}')}"
diff --git a/poky/meta/recipes-devtools/perl-cross/perlcross_1.4.bb b/poky/meta/recipes-devtools/perl-cross/perlcross_1.4.bb
index 17ce901..98a8fa3 100644
--- a/poky/meta/recipes-devtools/perl-cross/perlcross_1.4.bb
+++ b/poky/meta/recipes-devtools/perl-cross/perlcross_1.4.bb
@@ -8,16 +8,16 @@
 # but is not provided inside the release tarballs
 LIC_FILES_CHKSUM = "file://${WORKDIR}/README.md;md5=252fcce2026b765fee1ad74d2fb07a3b"
 
-inherit allarch
+inherit allarch github-releases
 
-SRC_URI = "https://github.com/arsv/perl-cross/releases/download/${PV}/perl-cross-${PV}.tar.gz;name=perl-cross \
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/perl-cross-${PV}.tar.gz;name=perl-cross \
            file://README.md \
            file://0001-perl-cross-add-LDFLAGS-when-linking-libperl.patch \
            file://determinism.patch \
            file://0001-Makefile-check-the-file-if-patched-or-not.patch \
            file://0001-configure_func.sh-Add-_GNU_SOURCE-define-and-functio.patch \
            "
-UPSTREAM_CHECK_URI = "https://github.com/arsv/perl-cross/releases/"
+GITHUB_BASE_URI = "https://github.com/arsv/perl-cross/releases/"
 
 SRC_URI[perl-cross.sha256sum] = "be9d9f9f7148edff7a2f9695ba3cb7e3975eff6b25a9a81dd311725fd757aa91"
 
diff --git a/poky/meta/recipes-devtools/python/python3-certifi_2022.6.15.bb b/poky/meta/recipes-devtools/python/python3-certifi_2022.9.14.bb
similarity index 74%
rename from poky/meta/recipes-devtools/python/python3-certifi_2022.6.15.bb
rename to poky/meta/recipes-devtools/python/python3-certifi_2022.9.14.bb
index f78409b..117a32b 100644
--- a/poky/meta/recipes-devtools/python/python3-certifi_2022.6.15.bb
+++ b/poky/meta/recipes-devtools/python/python3-certifi_2022.9.14.bb
@@ -5,9 +5,9 @@
 HOMEPAGE = " http://certifi.io/"
 
 LICENSE = "ISC"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=67da0714c3f9471067b729eca6c9fbe8"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=3c2b7404369c587c3559afb604fce2f2"
 
-SRC_URI[sha256sum] = "84c85a9078b11105f04f3036a9482ae10e4621616db313fe045dd24743a0820d"
+SRC_URI[sha256sum] = "36973885b9542e6bd01dea287b2b4b3b21236307c56324fcc3f1160f2d655ed5"
 
 inherit pypi setuptools3
 
diff --git a/poky/meta/recipes-devtools/python/python3-dtschema_2022.8.1.bb b/poky/meta/recipes-devtools/python/python3-dtschema_2022.8.3.bb
similarity index 83%
rename from poky/meta/recipes-devtools/python/python3-dtschema_2022.8.1.bb
rename to poky/meta/recipes-devtools/python/python3-dtschema_2022.8.3.bb
index 38f646e..bc819c0 100644
--- a/poky/meta/recipes-devtools/python/python3-dtschema_2022.8.1.bb
+++ b/poky/meta/recipes-devtools/python/python3-dtschema_2022.8.3.bb
@@ -7,7 +7,7 @@
 
 PYPI_PACKAGE = "dtschema"
 
-SRC_URI[sha256sum] = "3e56a9920944223d6f93fd51ada19dd8db554ac9182ef52c1c5c9d4966ab30aa"
+SRC_URI[sha256sum] = "128c88a393cd04154cef4d4b10a643e4473ae290b436e8e61d2e97ae8e53255d"
 
 DEPENDS += "python3-setuptools-scm-native"
 RDEPENDS:${PN} += "python3-ruamel-yaml python3-jsonschema python3-rfc3987"
diff --git a/poky/meta/recipes-devtools/python/python3-hatchling_1.8.1.bb b/poky/meta/recipes-devtools/python/python3-hatchling_1.9.0.bb
similarity index 85%
rename from poky/meta/recipes-devtools/python/python3-hatchling_1.8.1.bb
rename to poky/meta/recipes-devtools/python/python3-hatchling_1.9.0.bb
index bfdb664..011f5ed 100644
--- a/poky/meta/recipes-devtools/python/python3-hatchling_1.8.1.bb
+++ b/poky/meta/recipes-devtools/python/python3-hatchling_1.9.0.bb
@@ -8,7 +8,7 @@
 DEPENDS += "python3-pluggy-native python3-tomli-native python3-pathspec-native python3-packaging-native python3-editables-native"
 DEPENDS:remove:class-native = "python3-hatchling-native"
 
-SRC_URI[sha256sum] = "448b04b23faed669b2b565b998ac955af4feea66c5deed3a1212ac9399d2e1cd"
+SRC_URI[sha256sum] = "b57c7362f437b9426e4b94228a21d2ac5804fbb2abcb01adde2544a35bb303cd"
 
 do_compile:prepend() {
     export PYTHONPATH=src
diff --git a/poky/meta/recipes-devtools/python/python3-hypothesis_6.46.11.bb b/poky/meta/recipes-devtools/python/python3-hypothesis_6.54.5.bb
similarity index 90%
rename from poky/meta/recipes-devtools/python/python3-hypothesis_6.46.11.bb
rename to poky/meta/recipes-devtools/python/python3-hypothesis_6.54.5.bb
index 1d9772d..9adb614 100644
--- a/poky/meta/recipes-devtools/python/python3-hypothesis_6.46.11.bb
+++ b/poky/meta/recipes-devtools/python/python3-hypothesis_6.54.5.bb
@@ -13,7 +13,7 @@
     file://test_rle.py \
     "
 
-SRC_URI[sha256sum] = "f5c1cf61b24b094355577a6b8fbbb8eb54c1b0216fbc0519af97c46bddf43c42"
+SRC_URI[sha256sum] = "8a9056825695f415bfad4e808ae719fc01383a9ab659775319724365afcc7ec7"
 
 RDEPENDS:${PN} += " \
     python3-attrs \
diff --git a/poky/meta/recipes-devtools/python/python3-idna_3.3.bb b/poky/meta/recipes-devtools/python/python3-idna_3.4.bb
similarity index 62%
rename from poky/meta/recipes-devtools/python/python3-idna_3.3.bb
rename to poky/meta/recipes-devtools/python/python3-idna_3.4.bb
index ee92f44..41a666f 100644
--- a/poky/meta/recipes-devtools/python/python3-idna_3.3.bb
+++ b/poky/meta/recipes-devtools/python/python3-idna_3.4.bb
@@ -3,14 +3,9 @@
 LICENSE = "BSD-3-Clause & Python-2.0 & Unicode-TOU"
 LIC_FILES_CHKSUM = "file://LICENSE.md;md5=239668a7c6066d9e0c5382e9c8c6c0e1"
 
-SRC_URI[sha256sum] = "9d643ff0a55b762d5cdb124b8eaa99c66322e2157b69160bc32796e824360e6d"
+SRC_URI[sha256sum] = "814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"
 
-inherit pypi setuptools3
-
-# Remove bundled egg-info
-do_compile:prepend() {
-    rm -rf ${S}/idna.egg-info
-}
+inherit pypi python_flit_core
 
 RDEPENDS:${PN}:class-target = "\
     ${PYTHON_PN}-codecs \
diff --git a/meta-openembedded/meta-python/recipes-devtools/python/python3-lxml_4.9.1.bb b/poky/meta/recipes-devtools/python/python3-lxml_4.9.1.bb
similarity index 100%
rename from meta-openembedded/meta-python/recipes-devtools/python/python3-lxml_4.9.1.bb
rename to poky/meta/recipes-devtools/python/python3-lxml_4.9.1.bb
diff --git a/poky/meta/recipes-devtools/python/python3-mako_1.2.1.bb b/poky/meta/recipes-devtools/python/python3-mako_1.2.2.bb
similarity index 85%
rename from poky/meta/recipes-devtools/python/python3-mako_1.2.1.bb
rename to poky/meta/recipes-devtools/python/python3-mako_1.2.2.bb
index 8155c37..e3774ee 100644
--- a/poky/meta/recipes-devtools/python/python3-mako_1.2.1.bb
+++ b/poky/meta/recipes-devtools/python/python3-mako_1.2.2.bb
@@ -8,7 +8,7 @@
 
 inherit pypi python_setuptools_build_meta
 
-SRC_URI[sha256sum] = "f054a5ff4743492f1aa9ecc47172cb33b42b9d993cffcc146c9de17e717b0307"
+SRC_URI[sha256sum] = "3724869b363ba630a272a5f89f68c070352137b8fd1757650017b7e06fda163f"
 
 RDEPENDS:${PN} = "${PYTHON_PN}-html \
                   ${PYTHON_PN}-markupsafe \
diff --git a/poky/meta/recipes-devtools/python/python3-numpy_1.23.2.bb b/poky/meta/recipes-devtools/python/python3-numpy_1.23.3.bb
similarity index 88%
rename from poky/meta/recipes-devtools/python/python3-numpy_1.23.2.bb
rename to poky/meta/recipes-devtools/python/python3-numpy_1.23.3.bb
index 960dcf9..9d086ae 100644
--- a/poky/meta/recipes-devtools/python/python3-numpy_1.23.2.bb
+++ b/poky/meta/recipes-devtools/python/python3-numpy_1.23.3.bb
@@ -7,20 +7,19 @@
 
 SRCNAME = "numpy"
 
-SRC_URI = "https://github.com/${SRCNAME}/${SRCNAME}/releases/download/v${PV}/${SRCNAME}-${PV}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/${SRCNAME}-${PV}.tar.gz \
            file://0001-Don-t-search-usr-and-so-on-for-libraries-by-default-.patch \
            file://0001-numpy-core-Define-RISCV-32-support.patch \
            file://run-ptest \
            file://0001-generate_umath.py-do-not-write-full-path-to-output-f.patch \
            "
-SRC_URI[sha256sum] = "b78d00e48261fbbd04aa0d7427cf78d18401ee0abd89c7559bbf422e5b1c7d01"
+SRC_URI[sha256sum] = "51bf49c0cd1d52be0a240aa66f3458afc4b95d8993d2d04f0d91fa60c10af6cd"
 
-UPSTREAM_CHECK_URI = "https://github.com/numpy/numpy/releases"
-UPSTREAM_CHECK_REGEX = "(?P<pver>\d+(\.\d+)+)\.tar"
+GITHUB_BASE_URI = "https://github.com/numpy/numpy/releases"
 
 DEPENDS += "python3-cython-native"
 
-inherit ptest setuptools3
+inherit ptest setuptools3 github-releases
 
 S = "${WORKDIR}/numpy-${PV}"
 
diff --git a/poky/meta/recipes-devtools/python/python3-pathspec_0.9.0.bb b/poky/meta/recipes-devtools/python/python3-pathspec_0.10.1.bb
similarity index 78%
rename from poky/meta/recipes-devtools/python/python3-pathspec_0.9.0.bb
rename to poky/meta/recipes-devtools/python/python3-pathspec_0.10.1.bb
index 641f1cf..8f2af0e 100644
--- a/poky/meta/recipes-devtools/python/python3-pathspec_0.9.0.bb
+++ b/poky/meta/recipes-devtools/python/python3-pathspec_0.10.1.bb
@@ -4,7 +4,7 @@
 LICENSE = "MPL-2.0"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=815ca599c9df247a0c7f619bab123dad"
 
-SRC_URI[sha256sum] = "e564499435a2673d586f6b2130bb5b95f04a3ba06f81b8f895b651a3c76aabb1"
+SRC_URI[sha256sum] = "7ace6161b621d31e7902eb6b5ae148d12cfd23f4a249b9ffb6b9fee12084323d"
 
 inherit pypi setuptools3
 
diff --git a/poky/meta/recipes-devtools/python/python3-psutil_5.9.1.bb b/poky/meta/recipes-devtools/python/python3-psutil_5.9.2.bb
similarity index 87%
rename from poky/meta/recipes-devtools/python/python3-psutil_5.9.1.bb
rename to poky/meta/recipes-devtools/python/python3-psutil_5.9.2.bb
index 04b6fb0..5b31014 100644
--- a/poky/meta/recipes-devtools/python/python3-psutil_5.9.1.bb
+++ b/poky/meta/recipes-devtools/python/python3-psutil_5.9.2.bb
@@ -3,7 +3,7 @@
 LIC_FILES_CHKSUM = "file://LICENSE;md5=e35fd9f271d19d5f742f20a9d1f8bb8b"
 HOMEPAGE = "https://pypi.org/project/psutil/"
 
-SRC_URI[sha256sum] = "57f1819b5d9e95cdfb0c881a8a5b7d542ed0b7c522d575706a80bedc848c8954"
+SRC_URI[sha256sum] = "feb861a10b6c3bb00701063b37e4afc754f8217f0f09c42280586bd6ac712b5c"
 
 inherit pypi setuptools3
 
diff --git a/poky/meta/recipes-devtools/python/python3-pycairo_1.21.0.bb b/poky/meta/recipes-devtools/python/python3-pycairo_1.21.0.bb
index 29452c7..b692067 100644
--- a/poky/meta/recipes-devtools/python/python3-pycairo_1.21.0.bb
+++ b/poky/meta/recipes-devtools/python/python3-pycairo_1.21.0.bb
@@ -10,14 +10,14 @@
 # cairo >= 1.14
 DEPENDS = "cairo python3"
 
-SRC_URI = "https://github.com/pygobject/pycairo/releases/download/v${PV}/pycairo-${PV}.tar.gz"
-UPSTREAM_CHECK_URI = "https://github.com/pygobject/pycairo/releases/"
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/pycairo-${PV}.tar.gz"
+GITHUB_BASE_URI = "https://github.com/pygobject/pycairo/releases/"
 
 SRC_URI[sha256sum] = "251907f18a552df938aa3386657ff4b5a4937dde70e11aa042bc297957f4b74b"
 
 S = "${WORKDIR}/pycairo-${PV}"
 
-inherit meson pkgconfig python3targetconfig
+inherit meson pkgconfig python3targetconfig github-releases
 
 CFLAGS += "-fPIC"
 
diff --git a/poky/meta/recipes-devtools/python/python3-pytest_7.1.2.bb b/poky/meta/recipes-devtools/python/python3-pytest_7.1.3.bb
similarity index 92%
rename from poky/meta/recipes-devtools/python/python3-pytest_7.1.2.bb
rename to poky/meta/recipes-devtools/python/python3-pytest_7.1.3.bb
index c642d9a..373f7f3 100644
--- a/poky/meta/recipes-devtools/python/python3-pytest_7.1.2.bb
+++ b/poky/meta/recipes-devtools/python/python3-pytest_7.1.3.bb
@@ -5,7 +5,7 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=bd27e41b6550fe0fc45356d1d81ee37c"
 
-SRC_URI[sha256sum] = "a06a0425453864a270bc45e71f783330a7428defb4230fb5e6a731fde06ecd45"
+SRC_URI[sha256sum] = "4f365fec2dff9c1162f834d9f18af1ba13062db0c708bf7b946f8a5c76180c39"
 
 DEPENDS += "python3-setuptools-scm-native"
 
diff --git a/poky/meta/recipes-devtools/python/python3-rfc3986-validator_0.1.1.bb b/poky/meta/recipes-devtools/python/python3-rfc3986-validator_0.1.1.bb
index 4abd181..e374979 100644
--- a/poky/meta/recipes-devtools/python/python3-rfc3986-validator_0.1.1.bb
+++ b/poky/meta/recipes-devtools/python/python3-rfc3986-validator_0.1.1.bb
@@ -13,7 +13,7 @@
 
 inherit pypi setuptools3
 
-SRC_URI:append = " \
+SRC_URI += "\
     file://0001-setup.py-move-pytest-runner-to-test_requirements.patch \
 "
 
diff --git a/poky/meta/recipes-devtools/python/python3/python3-manifest.json b/poky/meta/recipes-devtools/python/python3/python3-manifest.json
index 2f5dad6..2205cf2 100644
--- a/poky/meta/recipes-devtools/python/python3/python3-manifest.json
+++ b/poky/meta/recipes-devtools/python/python3/python3-manifest.json
@@ -1248,5 +1248,16 @@
             "${libdir}/python${PYTHON_MAJMIN}/xmlrpc/__pycache__"
         ],
         "cached": []
+    },
+    "zoneinfo": {
+        "summary": "IANA time zone support",
+        "rdepends": [
+            "core"
+        ],
+        "files": [
+            "${libdir}/python${PYTHON_MAJMIN}/zoneinfo",
+            "${libdir}/python${PYTHON_MAJMIN}/lib-dynload/_zoneinfo.*.so"
+        ],
+       "cached": []
     }
 }
diff --git a/poky/meta/recipes-devtools/python/python3_3.10.6.bb b/poky/meta/recipes-devtools/python/python3_3.10.6.bb
index 1b28728..1f8b60a 100644
--- a/poky/meta/recipes-devtools/python/python3_3.10.6.bb
+++ b/poky/meta/recipes-devtools/python/python3_3.10.6.bb
@@ -105,6 +105,7 @@
 PACKAGECONFIG:class-native ??= "readline gdbm"
 PACKAGECONFIG:class-nativesdk ??= "readline gdbm"
 PACKAGECONFIG[readline] = ",,readline"
+PACKAGECONFIG[editline] = "--with-readline=editline,,libedit,,,readline"
 # Use profile guided optimisation by running PyBench inside qemu-user
 PACKAGECONFIG[pgo] = "--enable-optimizations,,qemu-native"
 PACKAGECONFIG[tk] = ",,tk"
diff --git a/poky/meta/recipes-devtools/qemu/qemu-native_7.0.0.bb b/poky/meta/recipes-devtools/qemu/qemu-native_7.1.0.bb
similarity index 100%
rename from poky/meta/recipes-devtools/qemu/qemu-native_7.0.0.bb
rename to poky/meta/recipes-devtools/qemu/qemu-native_7.1.0.bb
diff --git a/poky/meta/recipes-devtools/qemu/qemu-system-native_7.0.0.bb b/poky/meta/recipes-devtools/qemu/qemu-system-native_7.1.0.bb
similarity index 89%
rename from poky/meta/recipes-devtools/qemu/qemu-system-native_7.0.0.bb
rename to poky/meta/recipes-devtools/qemu/qemu-system-native_7.1.0.bb
index 5ccede5..04c7c2a 100644
--- a/poky/meta/recipes-devtools/qemu/qemu-system-native_7.0.0.bb
+++ b/poky/meta/recipes-devtools/qemu/qemu-system-native_7.1.0.bb
@@ -28,5 +28,6 @@
     rm -rf ${D}${includedir}/qemu-plugin.h
 
     # Install qmp.py to be used with testimage
-    install -D ${S}/python/qemu/qmp/__init__.py ${D}${libdir}/qemu-python/qmp.py
+    install -d ${D}${libdir}/qemu-python/qmp/
+    install -D ${S}/python/qemu/qmp/* ${D}${libdir}/qemu-python/qmp/
 }
diff --git a/poky/meta/recipes-devtools/qemu/qemu.inc b/poky/meta/recipes-devtools/qemu/qemu.inc
index 56fc7aa..612abd2 100644
--- a/poky/meta/recipes-devtools/qemu/qemu.inc
+++ b/poky/meta/recipes-devtools/qemu/qemu.inc
@@ -27,16 +27,12 @@
            file://0008-tests-meson.build-use-relative-path-to-refer-to-file.patch \
            file://0009-Define-MAP_SYNC-and-MAP_SHARED_VALIDATE-on-needed-li.patch \
            file://0010-hw-pvrdma-Protect-against-buggy-or-malicious-guest-d.patch \
-           file://qemu-7.0.0-glibc-2.36.patch \
-           file://CVE-2022-35414.patch \
-           file://CVE-2021-3507_1.patch \
-           file://CVE-2021-3507_2.patch \
-           file://CVE-2022-0216_1.patch \
-           file://CVE-2022-0216_2.patch \
+           file://0001-net-tulip-Restrict-DMA-engine-to-memories.patch \
+           file://arm-cpreg-fix.patch \
            "
 UPSTREAM_CHECK_REGEX = "qemu-(?P<pver>\d+(\.\d+)+)\.tar"
 
-SRC_URI[sha256sum] = "f6b375c7951f728402798b0baabb2d86478ca53d44cedbefabbe1c46bf46f839"
+SRC_URI[sha256sum] = "a0634e536bded57cf38ec8a751adb124b89c776fe0846f21ab6c6728f1cbbbe6"
 
 SRC_URI:append:class-target = " file://cross.patch"
 SRC_URI:append:class-nativesdk = " file://cross.patch"
@@ -75,8 +71,14 @@
 	# Strip the paths from the QEMU variable, we can use PATH
 	sed -i -e "s#^QEMU=.*/qemu-#QEMU=qemu-#g" ${D}${PTEST_PATH}/tests/tcg/*.mak
 
-        # Strip compiler flags as they break reproducibility
-        sed -i -e "s,CROSS_CC_GUEST=.*,CROSS_CC_GUEST=," ${D}${PTEST_PATH}/tests/tcg/*.mak
+	# Strip compiler flags as they break reproducibility
+	sed -i -e "s,^CC=.*,CC=gcc," \
+	       -e "s,^CCAS=.*,CCAS=gcc," \
+	       -e "s,^LD=.*,LD=ld," ${D}${PTEST_PATH}/tests/tcg/*.mak
+
+	# Update SRC_PATH variable to the right place on target
+	sed -i -e "s#^SRC_PATH=.*#SRC_PATH=${PTEST_PATH}#g" ${D}${PTEST_PATH}/tests/tcg/*.mak
+
 }
 
 # QEMU_TARGETS is overridable variable
@@ -151,7 +153,6 @@
 PACKAGECONFIG[xen] = "--enable-xen,--disable-xen,xen-tools,xen-tools-libxenstore xen-tools-libxenctrl xen-tools-libxenguest"
 PACKAGECONFIG[vnc-sasl] = "--enable-vnc --enable-vnc-sasl,--disable-vnc-sasl,cyrus-sasl,"
 PACKAGECONFIG[vnc-jpeg] = "--enable-vnc --enable-vnc-jpeg,--disable-vnc-jpeg,jpeg,"
-PACKAGECONFIG[vnc-png] = "--enable-vnc --enable-vnc-png,--disable-vnc-png,libpng,"
 PACKAGECONFIG[libcurl] = "--enable-curl,--disable-curl,curl,"
 PACKAGECONFIG[nss] = "--enable-smartcard,--disable-smartcard,nss,"
 PACKAGECONFIG[curses] = "--enable-curses,--disable-curses,ncurses,"
diff --git a/poky/meta/recipes-devtools/qemu/qemu/0001-net-tulip-Restrict-DMA-engine-to-memories.patch b/poky/meta/recipes-devtools/qemu/qemu/0001-net-tulip-Restrict-DMA-engine-to-memories.patch
new file mode 100644
index 0000000..6c85a77
--- /dev/null
+++ b/poky/meta/recipes-devtools/qemu/qemu/0001-net-tulip-Restrict-DMA-engine-to-memories.patch
@@ -0,0 +1,64 @@
+CVE: CVE-2022-2962
+Upstream-Status: Backport
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+
+From 5c5c50b0a73d78ffe18336c9996fef5eae9bbbb0 Mon Sep 17 00:00:00 2001
+From: Zheyu Ma <zheyuma97@gmail.com>
+Date: Sun, 21 Aug 2022 20:43:43 +0800
+Subject: [PATCH] net: tulip: Restrict DMA engine to memories
+
+The DMA engine is started by I/O access and then itself accesses the
+I/O registers, triggering a reentrancy bug.
+
+The following log can reveal it:
+==5637==ERROR: AddressSanitizer: stack-overflow
+    #0 0x5595435f6078 in tulip_xmit_list_update qemu/hw/net/tulip.c:673
+    #1 0x5595435f204a in tulip_write qemu/hw/net/tulip.c:805:13
+    #2 0x559544637f86 in memory_region_write_accessor qemu/softmmu/memory.c:492:5
+    #3 0x5595446379fa in access_with_adjusted_size qemu/softmmu/memory.c:554:18
+    #4 0x5595446372fa in memory_region_dispatch_write qemu/softmmu/memory.c
+    #5 0x55954468b74c in flatview_write_continue qemu/softmmu/physmem.c:2825:23
+    #6 0x559544683662 in flatview_write qemu/softmmu/physmem.c:2867:12
+    #7 0x5595446833f3 in address_space_write qemu/softmmu/physmem.c:2963:18
+    #8 0x5595435fb082 in dma_memory_rw_relaxed qemu/include/sysemu/dma.h:87:12
+    #9 0x5595435fb082 in dma_memory_rw qemu/include/sysemu/dma.h:130:12
+    #10 0x5595435fb082 in dma_memory_write qemu/include/sysemu/dma.h:171:12
+    #11 0x5595435fb082 in stl_le_dma qemu/include/sysemu/dma.h:272:1
+    #12 0x5595435fb082 in stl_le_pci_dma qemu/include/hw/pci/pci.h:910:1
+    #13 0x5595435fb082 in tulip_desc_write qemu/hw/net/tulip.c:101:9
+    #14 0x5595435f7e3d in tulip_xmit_list_update qemu/hw/net/tulip.c:706:9
+    #15 0x5595435f204a in tulip_write qemu/hw/net/tulip.c:805:13
+
+Fix this bug by restricting the DMA engine to memories regions.
+
+Signed-off-by: Zheyu Ma <zheyuma97@gmail.com>
+Signed-off-by: Jason Wang <jasowang@redhat.com>
+---
+ hw/net/tulip.c | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+diff --git a/hw/net/tulip.c b/hw/net/tulip.c
+index 097e905bec..b9e42c322a 100644
+--- a/hw/net/tulip.c
++++ b/hw/net/tulip.c
+@@ -70,7 +70,7 @@ static const VMStateDescription vmstate_pci_tulip = {
+ static void tulip_desc_read(TULIPState *s, hwaddr p,
+         struct tulip_descriptor *desc)
+ {
+-    const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
++    const MemTxAttrs attrs = { .memory = true };
+ 
+     if (s->csr[0] & CSR0_DBO) {
+         ldl_be_pci_dma(&s->dev, p, &desc->status, attrs);
+@@ -88,7 +88,7 @@ static void tulip_desc_read(TULIPState *s, hwaddr p,
+ static void tulip_desc_write(TULIPState *s, hwaddr p,
+         struct tulip_descriptor *desc)
+ {
+-    const MemTxAttrs attrs = MEMTXATTRS_UNSPECIFIED;
++    const MemTxAttrs attrs = { .memory = true };
+ 
+     if (s->csr[0] & CSR0_DBO) {
+         stl_be_pci_dma(&s->dev, p, desc->status, attrs);
+-- 
+2.34.1
+
diff --git a/poky/meta/recipes-devtools/qemu/qemu/0010-hw-pvrdma-Protect-against-buggy-or-malicious-guest-d.patch b/poky/meta/recipes-devtools/qemu/qemu/0010-hw-pvrdma-Protect-against-buggy-or-malicious-guest-d.patch
index 826d42f..810c74f 100644
--- a/poky/meta/recipes-devtools/qemu/qemu/0010-hw-pvrdma-Protect-against-buggy-or-malicious-guest-d.patch
+++ b/poky/meta/recipes-devtools/qemu/qemu/0010-hw-pvrdma-Protect-against-buggy-or-malicious-guest-d.patch
@@ -1,28 +1,28 @@
-From 52c38fa9f3a790a7c2805e7d8cce3ea9262d6ae2 Mon Sep 17 00:00:00 2001
+CVE: CVE-2022-1050
+Upstream-Status: Submitted [https://lore.kernel.org/qemu-devel/20220403095234.2210-1-yuval.shaia.ml@gmail.com/]
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+
+From dbdef95c272e8f3ec037c3db4197c66002e30995 Mon Sep 17 00:00:00 2001
 From: Yuval Shaia <yuval.shaia.ml@gmail.com>
-Date: Tue, 12 Apr 2022 11:01:51 +0100
-Subject: [PATCH 10/12] hw/pvrdma: Protect against buggy or malicious guest
- driver
+Date: Sun, 3 Apr 2022 12:52:34 +0300
+Subject: [PATCH] hw/pvrdma: Protect against buggy or malicious guest driver
 
 Guest driver might execute HW commands when shared buffers are not yet
 allocated.
-This might happen on purpose (malicious guest) or because some other
-guest/host address mapping.
+This could happen on purpose (malicious guest) or because of some other
+guest/host address mapping error.
 We need to protect againts such case.
 
-Reported-by: Mauro Matteo Cascella <mcascell@redhat.com>
+Fixes: CVE-2022-1050
+
+Reported-by: Raven <wxhusst@gmail.com>
 Signed-off-by: Yuval Shaia <yuval.shaia.ml@gmail.com>
-
-CVE: CVE-2022-1050
-Upstream-Status: Submitted [https://lists.nongnu.org/archive/html/qemu-devel/2022-03/msg05197.html]
-
 ---
- hw/rdma/vmw/pvrdma_cmd.c  | 6 ++++++
- hw/rdma/vmw/pvrdma_main.c | 3 ++-
- 2 files changed, 8 insertions(+), 1 deletion(-)
+ hw/rdma/vmw/pvrdma_cmd.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
 
 diff --git a/hw/rdma/vmw/pvrdma_cmd.c b/hw/rdma/vmw/pvrdma_cmd.c
-index da7ddfa54..89db963c4 100644
+index da7ddfa548..89db963c46 100644
 --- a/hw/rdma/vmw/pvrdma_cmd.c
 +++ b/hw/rdma/vmw/pvrdma_cmd.c
 @@ -796,6 +796,12 @@ int pvrdma_exec_cmd(PVRDMADev *dev)
@@ -38,20 +38,6 @@
      if (dsr_info->req->hdr.cmd >= sizeof(cmd_handlers) /
                        sizeof(struct cmd_handler)) {
          rdma_error_report("Unsupported command");
-diff --git a/hw/rdma/vmw/pvrdma_main.c b/hw/rdma/vmw/pvrdma_main.c
-index 91206dbb8..0b7d908e2 100644
---- a/hw/rdma/vmw/pvrdma_main.c
-+++ b/hw/rdma/vmw/pvrdma_main.c
-@@ -249,7 +249,8 @@ static void init_dsr_dev_caps(PVRDMADev *dev)
- {
-     struct pvrdma_device_shared_region *dsr;
- 
--    if (dev->dsr_info.dsr == NULL) {
-+    if (!dev->dsr_info.dsr) {
-+        /* Buggy or malicious guest driver */
-         rdma_error_report("Can't initialized DSR");
-         return;
-     }
 -- 
-2.30.2
+2.34.1
 
diff --git a/poky/meta/recipes-devtools/qemu/qemu/CVE-2021-3507_1.patch b/poky/meta/recipes-devtools/qemu/qemu/CVE-2021-3507_1.patch
deleted file mode 100644
index 24fd2c5..0000000
--- a/poky/meta/recipes-devtools/qemu/qemu/CVE-2021-3507_1.patch
+++ /dev/null
@@ -1,92 +0,0 @@
-From 57a89cc36ead7234e540d0ecbe1a792ab6b04cb7 Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
-Date: Thu, 18 Nov 2021 12:57:32 +0100
-Subject: [PATCH 1/2] hw/block/fdc: Prevent end-of-track overrun
- (CVE-2021-3507)
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Per the 82078 datasheet, if the end-of-track (EOT byte in
-the FIFO) is more than the number of sectors per side, the
-command is terminated unsuccessfully:
-
-* 5.2.5 DATA TRANSFER TERMINATION
-
-  The 82078 supports terminal count explicitly through
-  the TC pin and implicitly through the underrun/over-
-  run and end-of-track (EOT) functions. For full sector
-  transfers, the EOT parameter can define the last
-  sector to be transferred in a single or multisector
-  transfer. If the last sector to be transferred is a par-
-  tial sector, the host can stop transferring the data in
-  mid-sector, and the 82078 will continue to complete
-  the sector as if a hardware TC was received. The
-  only difference between these implicit functions and
-  TC is that they return "abnormal termination" result
-  status. Such status indications can be ignored if they
-  were expected.
-
-* 6.1.3 READ TRACK
-
-  This command terminates when the EOT specified
-  number of sectors have been read. If the 82078
-  does not find an I D Address Mark on the diskette
-  after the second· occurrence of a pulse on the
-  INDX# pin, then it sets the IC code in Status Regis-
-  ter 0 to "01" (Abnormal termination), sets the MA bit
-  in Status Register 1 to "1", and terminates the com-
-  mand.
-
-* 6.1.6 VERIFY
-
-  Refer to Table 6-6 and Table 6-7 for information
-  concerning the values of MT and EC versus SC and
-  EOT value.
-
-* Table 6·6. Result Phase Table
-
-* Table 6-7. Verify Command Result Phase Table
-
-Fix by aborting the transfer when EOT > # Sectors Per Side.
-
-Cc: qemu-stable@nongnu.org
-Cc: Hervé Poussineau <hpoussin@reactos.org>
-Fixes: baca51faff0 ("floppy driver: disk geometry auto detect")
-Reported-by: Alexander Bulekov <alxndr@bu.edu>
-Resolves: https://gitlab.com/qemu-project/qemu/-/issues/339
-Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Message-Id: <20211118115733.4038610-2-philmd@redhat.com>
-Reviewed-by: Hanna Reitz <hreitz@redhat.com>
-Signed-off-by: Kevin Wolf <kwolf@redhat.com>
-
-Upstream-Status: Backport [defac5e2fbddf8423a354ff0454283a2115e1367]
-CVE: CVE-2021-3507
-
-Signed-off-by: Sakib Sajal <sakib.sajal@windriver.com>
----
- hw/block/fdc.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/hw/block/fdc.c b/hw/block/fdc.c
-index 347875a0c..57bb35579 100644
---- a/hw/block/fdc.c
-+++ b/hw/block/fdc.c
-@@ -1530,6 +1530,14 @@ static void fdctrl_start_transfer(FDCtrl *fdctrl, int direction)
-         int tmp;
-         fdctrl->data_len = 128 << (fdctrl->fifo[5] > 7 ? 7 : fdctrl->fifo[5]);
-         tmp = (fdctrl->fifo[6] - ks + 1);
-+        if (tmp < 0) {
-+            FLOPPY_DPRINTF("invalid EOT: %d\n", tmp);
-+            fdctrl_stop_transfer(fdctrl, FD_SR0_ABNTERM, FD_SR1_MA, 0x00);
-+            fdctrl->fifo[3] = kt;
-+            fdctrl->fifo[4] = kh;
-+            fdctrl->fifo[5] = ks;
-+            return;
-+        }
-         if (fdctrl->fifo[0] & 0x80)
-             tmp += fdctrl->fifo[6];
-         fdctrl->data_len *= tmp;
--- 
-2.33.0
-
diff --git a/poky/meta/recipes-devtools/qemu/qemu/CVE-2021-3507_2.patch b/poky/meta/recipes-devtools/qemu/qemu/CVE-2021-3507_2.patch
deleted file mode 100644
index acc93e8..0000000
--- a/poky/meta/recipes-devtools/qemu/qemu/CVE-2021-3507_2.patch
+++ /dev/null
@@ -1,115 +0,0 @@
-From 3e8601ec707dcbc3c768f7733d016dc70c947e4a Mon Sep 17 00:00:00 2001
-From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= <philmd@redhat.com>
-Date: Thu, 18 Nov 2021 12:57:33 +0100
-Subject: [PATCH 2/2] tests/qtest/fdc-test: Add a regression test for
- CVE-2021-3507
-MIME-Version: 1.0
-Content-Type: text/plain; charset=UTF-8
-Content-Transfer-Encoding: 8bit
-
-Add the reproducer from https://gitlab.com/qemu-project/qemu/-/issues/339
-
-Without the previous commit, when running 'make check-qtest-i386'
-with QEMU configured with '--enable-sanitizers' we get:
-
-  ==4028352==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x619000062a00 at pc 0x5626d03c491a bp 0x7ffdb4199410 sp 0x7ffdb4198bc0
-  READ of size 786432 at 0x619000062a00 thread T0
-      #0 0x5626d03c4919 in __asan_memcpy (qemu-system-i386+0x1e65919)
-      #1 0x5626d1c023cc in flatview_write_continue softmmu/physmem.c:2787:13
-      #2 0x5626d1bf0c0f in flatview_write softmmu/physmem.c:2822:14
-      #3 0x5626d1bf0798 in address_space_write softmmu/physmem.c:2914:18
-      #4 0x5626d1bf0f37 in address_space_rw softmmu/physmem.c:2924:16
-      #5 0x5626d1bf14c8 in cpu_physical_memory_rw softmmu/physmem.c:2933:5
-      #6 0x5626d0bd5649 in cpu_physical_memory_write include/exec/cpu-common.h:82:5
-      #7 0x5626d0bd0a07 in i8257_dma_write_memory hw/dma/i8257.c:452:9
-      #8 0x5626d09f825d in fdctrl_transfer_handler hw/block/fdc.c:1616:13
-      #9 0x5626d0a048b4 in fdctrl_start_transfer hw/block/fdc.c:1539:13
-      #10 0x5626d09f4c3e in fdctrl_write_data hw/block/fdc.c:2266:13
-      #11 0x5626d09f22f7 in fdctrl_write hw/block/fdc.c:829:9
-      #12 0x5626d1c20bc5 in portio_write softmmu/ioport.c:207:17
-
-  0x619000062a00 is located 0 bytes to the right of 512-byte region [0x619000062800,0x619000062a00)
-  allocated by thread T0 here:
-      #0 0x5626d03c66ec in posix_memalign (qemu-system-i386+0x1e676ec)
-      #1 0x5626d2b988d4 in qemu_try_memalign util/oslib-posix.c:210:11
-      #2 0x5626d2b98b0c in qemu_memalign util/oslib-posix.c:226:27
-      #3 0x5626d09fbaf0 in fdctrl_realize_common hw/block/fdc.c:2341:20
-      #4 0x5626d0a150ed in isabus_fdc_realize hw/block/fdc-isa.c:113:5
-      #5 0x5626d2367935 in device_set_realized hw/core/qdev.c:531:13
-
-  SUMMARY: AddressSanitizer: heap-buffer-overflow (qemu-system-i386+0x1e65919) in __asan_memcpy
-  Shadow bytes around the buggy address:
-    0x0c32800044f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
-    0x0c3280004500: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-    0x0c3280004510: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-    0x0c3280004520: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-    0x0c3280004530: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
-  =>0x0c3280004540:[fa]fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
-    0x0c3280004550: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
-    0x0c3280004560: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
-    0x0c3280004570: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
-    0x0c3280004580: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa
-    0x0c3280004590: fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd fd
-  Shadow byte legend (one shadow byte represents 8 application bytes):
-    Addressable:           00
-    Heap left redzone:       fa
-    Freed heap region:       fd
-  ==4028352==ABORTING
-
-[ kwolf: Added snapshot=on to prevent write file lock failure ]
-
-Reported-by: Alexander Bulekov <alxndr@bu.edu>
-Signed-off-by: Philippe Mathieu-Daudé <philmd@redhat.com>
-Reviewed-by: Alexander Bulekov <alxndr@bu.edu>
-Signed-off-by: Kevin Wolf <kwolf@redhat.com>
-
-Upstream-Status: Backport [46609b90d9e3a6304def11038a76b58ff43f77bc]
-CVE: CVE-2021-3507
-
-Signed-off-by: Sakib Sajal <sakib.sajal@windriver.com>
----
- tests/qtest/fdc-test.c | 21 +++++++++++++++++++++
- 1 file changed, 21 insertions(+)
-
-diff --git a/tests/qtest/fdc-test.c b/tests/qtest/fdc-test.c
-index b0d40012e..1d4f85212 100644
---- a/tests/qtest/fdc-test.c
-+++ b/tests/qtest/fdc-test.c
-@@ -583,6 +583,26 @@ static void test_cve_2021_20196(void)
-     qtest_quit(s);
- }
- 
-+static void test_cve_2021_3507(void)
-+{
-+    QTestState *s;
-+
-+    s = qtest_initf("-nographic -m 32M -nodefaults "
-+                    "-drive file=%s,format=raw,if=floppy,snapshot=on",
-+                    test_image);
-+    qtest_outl(s, 0x9, 0x0a0206);
-+    qtest_outw(s, 0x3f4, 0x1600);
-+    qtest_outw(s, 0x3f4, 0x0000);
-+    qtest_outw(s, 0x3f4, 0x0000);
-+    qtest_outw(s, 0x3f4, 0x0000);
-+    qtest_outw(s, 0x3f4, 0x0200);
-+    qtest_outw(s, 0x3f4, 0x0200);
-+    qtest_outw(s, 0x3f4, 0x0000);
-+    qtest_outw(s, 0x3f4, 0x0000);
-+    qtest_outw(s, 0x3f4, 0x0000);
-+    qtest_quit(s);
-+}
-+
- int main(int argc, char **argv)
- {
-     int fd;
-@@ -614,6 +634,7 @@ int main(int argc, char **argv)
-     qtest_add_func("/fdc/read_no_dma_19", test_read_no_dma_19);
-     qtest_add_func("/fdc/fuzz-registers", fuzz_registers);
-     qtest_add_func("/fdc/fuzz/cve_2021_20196", test_cve_2021_20196);
-+    qtest_add_func("/fdc/fuzz/cve_2021_3507", test_cve_2021_3507);
- 
-     ret = g_test_run();
- 
--- 
-2.33.0
-
diff --git a/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-0216_1.patch b/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-0216_1.patch
deleted file mode 100644
index 56fc34c..0000000
--- a/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-0216_1.patch
+++ /dev/null
@@ -1,42 +0,0 @@
-From f37ac8619a39498edd225c4a0b3039b28814833d Mon Sep 17 00:00:00 2001
-From: Mauro Matteo Cascella <mcascell@redhat.com>
-Date: Tue, 5 Jul 2022 22:05:43 +0200
-Subject: [PATCH 1/2] scsi/lsi53c895a: fix use-after-free in lsi_do_msgout
- (CVE-2022-0216)
-
-Set current_req->req to NULL to prevent reusing a free'd buffer in case of
-repeated SCSI cancel requests. Thanks to Thomas Huth for suggesting the patch.
-
-Fixes: CVE-2022-0216
-Resolves: https://gitlab.com/qemu-project/qemu/-/issues/972
-Signed-off-by: Mauro Matteo Cascella <mcascell@redhat.com>
-Reviewed-by: Thomas Huth <thuth@redhat.com>
-Message-Id: <20220705200543.2366809-1-mcascell@redhat.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-
-Upstream-Status: Backport [6c8fa961da5e60f574bb52fd3ad44b1e9e8ad4b8]
-CVE: CVE-2022-0216
-
-Signed-off-by: Sakib Sajal <sakib.sajal@windriver.com>
----
- hw/scsi/lsi53c895a.c | 3 ++-
- 1 file changed, 2 insertions(+), 1 deletion(-)
-
-diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
-index c8773f73f..99ea42d49 100644
---- a/hw/scsi/lsi53c895a.c
-+++ b/hw/scsi/lsi53c895a.c
-@@ -1028,8 +1028,9 @@ static void lsi_do_msgout(LSIState *s)
-         case 0x0d:
-             /* The ABORT TAG message clears the current I/O process only. */
-             trace_lsi_do_msgout_abort(current_tag);
--            if (current_req) {
-+            if (current_req && current_req->req) {
-                 scsi_req_cancel(current_req->req);
-+                current_req->req = NULL;
-             }
-             lsi_disconnect(s);
-             break;
--- 
-2.33.0
-
diff --git a/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-0216_2.patch b/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-0216_2.patch
deleted file mode 100644
index f332154..0000000
--- a/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-0216_2.patch
+++ /dev/null
@@ -1,146 +0,0 @@
-From 5451bf6db85ce3da1238e9154d051ebccec8f171 Mon Sep 17 00:00:00 2001
-From: Mauro Matteo Cascella <mcascell@redhat.com>
-Date: Mon, 11 Jul 2022 14:33:16 +0200
-Subject: [PATCH 2/2] scsi/lsi53c895a: really fix use-after-free in
- lsi_do_msgout (CVE-2022-0216)
-
-Set current_req to NULL, not current_req->req, to prevent reusing a free'd
-buffer in case of repeated SCSI cancel requests.  Also apply the fix to
-CLEAR QUEUE and BUS DEVICE RESET messages as well, since they also cancel
-the request.
-
-Thanks to Alexander Bulekov for providing a reproducer.
-
-Fixes: CVE-2022-0216
-Resolves: https://gitlab.com/qemu-project/qemu/-/issues/972
-Signed-off-by: Mauro Matteo Cascella <mcascell@redhat.com>
-Tested-by: Alexander Bulekov <alxndr@bu.edu>
-Message-Id: <20220711123316.421279-1-mcascell@redhat.com>
-Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
-
-Upstream-Status: Backport [4367a20cc442c56b05611b4224de9a61908f9eac]
-CVE: CVE-2022-0216
-
-Signed-off-by: Sakib Sajal <sakib.sajal@windriver.com>
----
- hw/scsi/lsi53c895a.c               |  3 +-
- tests/qtest/fuzz-lsi53c895a-test.c | 76 ++++++++++++++++++++++++++++++
- 2 files changed, 78 insertions(+), 1 deletion(-)
-
-diff --git a/hw/scsi/lsi53c895a.c b/hw/scsi/lsi53c895a.c
-index 99ea42d49..ad5f5e5f3 100644
---- a/hw/scsi/lsi53c895a.c
-+++ b/hw/scsi/lsi53c895a.c
-@@ -1030,7 +1030,7 @@ static void lsi_do_msgout(LSIState *s)
-             trace_lsi_do_msgout_abort(current_tag);
-             if (current_req && current_req->req) {
-                 scsi_req_cancel(current_req->req);
--                current_req->req = NULL;
-+                current_req = NULL;
-             }
-             lsi_disconnect(s);
-             break;
-@@ -1056,6 +1056,7 @@ static void lsi_do_msgout(LSIState *s)
-             /* clear the current I/O process */
-             if (s->current) {
-                 scsi_req_cancel(s->current->req);
-+                current_req = NULL;
-             }
- 
-             /* As the current implemented devices scsi_disk and scsi_generic
-diff --git a/tests/qtest/fuzz-lsi53c895a-test.c b/tests/qtest/fuzz-lsi53c895a-test.c
-index ba5d46897..c1af0ab1c 100644
---- a/tests/qtest/fuzz-lsi53c895a-test.c
-+++ b/tests/qtest/fuzz-lsi53c895a-test.c
-@@ -8,6 +8,79 @@
- #include "qemu/osdep.h"
- #include "libqos/libqtest.h"
- 
-+/*
-+ * This used to trigger a UAF in lsi_do_msgout()
-+ * https://gitlab.com/qemu-project/qemu/-/issues/972
-+ */
-+static void test_lsi_do_msgout_cancel_req(void)
-+{
-+    QTestState *s;
-+
-+    if (sizeof(void *) == 4) {
-+        g_test_skip("memory size too big for 32-bit build");
-+        return;
-+    }
-+
-+    s = qtest_init("-M q35 -m 4G -display none -nodefaults "
-+                   "-device lsi53c895a,id=scsi "
-+                   "-device scsi-hd,drive=disk0 "
-+                   "-drive file=null-co://,id=disk0,if=none,format=raw");
-+
-+    qtest_outl(s, 0xcf8, 0x80000810);
-+    qtest_outl(s, 0xcf8, 0xc000);
-+    qtest_outl(s, 0xcf8, 0x80000810);
-+    qtest_outw(s, 0xcfc, 0x7);
-+    qtest_outl(s, 0xcf8, 0x80000810);
-+    qtest_outl(s, 0xcfc, 0xc000);
-+    qtest_outl(s, 0xcf8, 0x80000804);
-+    qtest_outw(s, 0xcfc, 0x05);
-+    qtest_writeb(s, 0x69736c10, 0x08);
-+    qtest_writeb(s, 0x69736c13, 0x58);
-+    qtest_writeb(s, 0x69736c1a, 0x01);
-+    qtest_writeb(s, 0x69736c1b, 0x06);
-+    qtest_writeb(s, 0x69736c22, 0x01);
-+    qtest_writeb(s, 0x69736c23, 0x07);
-+    qtest_writeb(s, 0x69736c2b, 0x02);
-+    qtest_writeb(s, 0x69736c48, 0x08);
-+    qtest_writeb(s, 0x69736c4b, 0x58);
-+    qtest_writeb(s, 0x69736c52, 0x04);
-+    qtest_writeb(s, 0x69736c53, 0x06);
-+    qtest_writeb(s, 0x69736c5b, 0x02);
-+    qtest_outl(s, 0xc02d, 0x697300);
-+    qtest_writeb(s, 0x5a554662, 0x01);
-+    qtest_writeb(s, 0x5a554663, 0x07);
-+    qtest_writeb(s, 0x5a55466a, 0x10);
-+    qtest_writeb(s, 0x5a55466b, 0x22);
-+    qtest_writeb(s, 0x5a55466c, 0x5a);
-+    qtest_writeb(s, 0x5a55466d, 0x5a);
-+    qtest_writeb(s, 0x5a55466e, 0x34);
-+    qtest_writeb(s, 0x5a55466f, 0x5a);
-+    qtest_writeb(s, 0x5a345a5a, 0x77);
-+    qtest_writeb(s, 0x5a345a5b, 0x55);
-+    qtest_writeb(s, 0x5a345a5c, 0x51);
-+    qtest_writeb(s, 0x5a345a5d, 0x27);
-+    qtest_writeb(s, 0x27515577, 0x41);
-+    qtest_outl(s, 0xc02d, 0x5a5500);
-+    qtest_writeb(s, 0x364001d0, 0x08);
-+    qtest_writeb(s, 0x364001d3, 0x58);
-+    qtest_writeb(s, 0x364001da, 0x01);
-+    qtest_writeb(s, 0x364001db, 0x26);
-+    qtest_writeb(s, 0x364001dc, 0x0d);
-+    qtest_writeb(s, 0x364001dd, 0xae);
-+    qtest_writeb(s, 0x364001de, 0x41);
-+    qtest_writeb(s, 0x364001df, 0x5a);
-+    qtest_writeb(s, 0x5a41ae0d, 0xf8);
-+    qtest_writeb(s, 0x5a41ae0e, 0x36);
-+    qtest_writeb(s, 0x5a41ae0f, 0xd7);
-+    qtest_writeb(s, 0x5a41ae10, 0x36);
-+    qtest_writeb(s, 0x36d736f8, 0x0c);
-+    qtest_writeb(s, 0x36d736f9, 0x80);
-+    qtest_writeb(s, 0x36d736fa, 0x0d);
-+    qtest_outl(s, 0xc02d, 0x364000);
-+
-+    qtest_quit(s);
-+}
-+
- /*
-  * This used to trigger the assert in lsi_do_dma()
-  * https://bugs.launchpad.net/qemu/+bug/697510
-@@ -48,5 +121,8 @@ int main(int argc, char **argv)
-                        test_lsi_do_dma_empty_queue);
-     }
- 
-+    qtest_add_func("fuzz/lsi53c895a/lsi_do_msgout_cancel_req",
-+                   test_lsi_do_msgout_cancel_req);
-+
-     return g_test_run();
- }
--- 
-2.33.0
-
diff --git a/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-35414.patch b/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-35414.patch
deleted file mode 100644
index fe79a74..0000000
--- a/poky/meta/recipes-devtools/qemu/qemu/CVE-2022-35414.patch
+++ /dev/null
@@ -1,53 +0,0 @@
-From a10c33942dc8cb31b3762b9dd4adde4c490eed9c Mon Sep 17 00:00:00 2001
-From: Hitendra Prajapati <hprajapati@mvista.com>
-Date: Wed, 3 Aug 2022 10:11:11 +0530
-Subject: [PATCH] CVE-2022-35414
-
-Upstream-Status: Backport [https://github.com/qemu/qemu/commit/418ade7849ce7641c0f7333718caf5091a02fd4c]
-CVE: CVE-2022-35414
-Signed-off-by: Hitendra Prajapati <hprajapati@mvista.com>
----
- softmmu/physmem.c | 13 ++++++++++++-
- 1 file changed, 12 insertions(+), 1 deletion(-)
-
-diff --git a/softmmu/physmem.c b/softmmu/physmem.c
-index 4e1b27a20..ad8a90dec 100644
---- a/softmmu/physmem.c
-+++ b/softmmu/physmem.c
-@@ -669,7 +669,7 @@ void tcg_iommu_init_notifier_list(CPUState *cpu)
- 
- /* Called from RCU critical section */
- MemoryRegionSection *
--address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
-+address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr orig_addr,
-                                   hwaddr *xlat, hwaddr *plen,
-                                   MemTxAttrs attrs, int *prot)
- {
-@@ -678,6 +678,7 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
-     IOMMUMemoryRegionClass *imrc;
-     IOMMUTLBEntry iotlb;
-     int iommu_idx;
-+    hwaddr addr = orig_addr;
-     AddressSpaceDispatch *d =
-         qatomic_rcu_read(&cpu->cpu_ases[asidx].memory_dispatch);
- 
-@@ -722,6 +723,16 @@ address_space_translate_for_iotlb(CPUState *cpu, int asidx, hwaddr addr,
-     return section;
- 
- translate_fail:
-+    /*
-+     * We should be given a page-aligned address -- certainly
-+     * tlb_set_page_with_attrs() does so.  The page offset of xlat
-+     * is used to index sections[], and PHYS_SECTION_UNASSIGNED = 0.
-+     * The page portion of xlat will be logged by memory_region_access_valid()
-+     * when this memory access is rejected, so use the original untranslated
-+     * physical address.
-+     */
-+    assert((orig_addr & ~TARGET_PAGE_MASK) == 0);
-+    *xlat = orig_addr;
-     return &d->map.sections[PHYS_SECTION_UNASSIGNED];
- }
- 
--- 
-2.25.1
-
diff --git a/poky/meta/recipes-devtools/qemu/qemu/arm-cpreg-fix.patch b/poky/meta/recipes-devtools/qemu/qemu/arm-cpreg-fix.patch
new file mode 100644
index 0000000..071691f
--- /dev/null
+++ b/poky/meta/recipes-devtools/qemu/qemu/arm-cpreg-fix.patch
@@ -0,0 +1,27 @@
+target/arm: mark SP_EL1 with ARM_CP_EL3_NO_EL2_KEEP
+
+SP_EL1 must be kept when EL3 is present but EL2 is not. Therefore mark
+it with ARM_CP_EL3_NO_EL2_KEEP.
+
+Fixes: 696ba3771894 ("target/arm: Handle cpreg registration for missing EL")
+Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org>
+
+Upstream-Status: Submitted [https://lists.gnu.org/archive/html/qemu-devel/2022-09/msg04515.html]
+
+---
+ target/arm/helper.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+Index: qemu-7.1.0/target/arm/helper.c
+===================================================================
+--- qemu-7.1.0.orig/target/arm/helper.c
++++ qemu-7.1.0/target/arm/helper.c
+@@ -4971,7 +4971,7 @@ static const ARMCPRegInfo v8_cp_reginfo[
+       .fieldoffset = offsetof(CPUARMState, sp_el[0]) },
+     { .name = "SP_EL1", .state = ARM_CP_STATE_AA64,
+       .opc0 = 3, .opc1 = 4, .crn = 4, .crm = 1, .opc2 = 0,
+-      .access = PL2_RW, .type = ARM_CP_ALIAS,
++      .access = PL2_RW, .type = ARM_CP_ALIAS | ARM_CP_EL3_NO_EL2_KEEP,
+       .fieldoffset = offsetof(CPUARMState, sp_el[1]) },
+     { .name = "SPSel", .state = ARM_CP_STATE_AA64,
+       .opc0 = 3, .opc1 = 0, .crn = 4, .crm = 2, .opc2 = 0,
diff --git a/poky/meta/recipes-devtools/qemu/qemu/cross.patch b/poky/meta/recipes-devtools/qemu/qemu/cross.patch
index d1256a1..ca2ad36 100644
--- a/poky/meta/recipes-devtools/qemu/qemu/cross.patch
+++ b/poky/meta/recipes-devtools/qemu/qemu/cross.patch
@@ -14,19 +14,19 @@
  configure | 4 ----
  1 file changed, 4 deletions(-)
 
-diff --git a/configure b/configure
-index 7c08c1835..0613279f9 100755
---- a/configure
-+++ b/configure
-@@ -3118,7 +3118,6 @@ if test "$skip_meson" = no; then
-   fi
+Index: qemu-7.1.0/configure
+===================================================================
+--- qemu-7.1.0.orig/configure
++++ qemu-7.1.0/configure
+@@ -2710,7 +2710,6 @@ if test "$skip_meson" = no; then
    echo "strip = [$(meson_quote $strip)]" >> $cross
+   echo "widl = [$(meson_quote $widl)]" >> $cross
    echo "windres = [$(meson_quote $windres)]" >> $cross
 -  if test "$cross_compile" = "yes"; then
      cross_arg="--cross-file config-meson.cross"
      echo "[host_machine]" >> $cross
      echo "system = '$targetos'" >> $cross
-@@ -3136,9 +3135,6 @@ if test "$skip_meson" = no; then
+@@ -2728,9 +2727,6 @@ if test "$skip_meson" = no; then
      else
          echo "endian = 'little'" >> $cross
      fi
@@ -36,6 +36,3 @@
    mv $cross config-meson.cross
  
    rm -rf meson-private meson-info meson-logs
--- 
-2.30.2
-
diff --git a/poky/meta/recipes-devtools/qemu/qemu_7.0.0.bb b/poky/meta/recipes-devtools/qemu/qemu_7.1.0.bb
similarity index 100%
rename from poky/meta/recipes-devtools/qemu/qemu_7.0.0.bb
rename to poky/meta/recipes-devtools/qemu/qemu_7.1.0.bb
diff --git a/poky/meta/recipes-devtools/repo/repo_2.29.1.bb b/poky/meta/recipes-devtools/repo/repo_2.29.2.bb
similarity index 94%
rename from poky/meta/recipes-devtools/repo/repo_2.29.1.bb
rename to poky/meta/recipes-devtools/repo/repo_2.29.2.bb
index 740132c..42fd18a 100644
--- a/poky/meta/recipes-devtools/repo/repo_2.29.1.bb
+++ b/poky/meta/recipes-devtools/repo/repo_2.29.2.bb
@@ -12,7 +12,7 @@
 SRC_URI = "git://gerrit.googlesource.com/git-repo.git;protocol=https;branch=main \
            file://0001-python3-shebang.patch \
            "
-SRCREV = "4112c07688d0e0e568478e9f42be349bdd511d45"
+SRCREV = "891e8f72ce3551a19c377456574bbfbeac5c8b8e"
 
 MIRRORS += "git://gerrit.googlesource.com/git-repo.git git://github.com/GerritCodeReview/git-repo.git"
 
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-Add-a-color-setting-for-mips64_n32-binaries.patch b/poky/meta/recipes-devtools/rpm/files/0001-Add-a-color-setting-for-mips64_n32-binaries.patch
index 331ea84..9f5dde0 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-Add-a-color-setting-for-mips64_n32-binaries.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-Add-a-color-setting-for-mips64_n32-binaries.patch
@@ -11,11 +11,11 @@
  rpmrc.in      | 2 ++
  2 files changed, 6 insertions(+)
 
-diff --git a/build/rpmfc.c b/build/rpmfc.c
-index 10c380ee9..b7655aa93 100644
---- a/build/rpmfc.c
-+++ b/build/rpmfc.c
-@@ -639,6 +639,7 @@ exit:
+Index: git/build/rpmfc.c
+===================================================================
+--- git.orig/build/rpmfc.c
++++ git/build/rpmfc.c
+@@ -645,6 +645,7 @@ exit:
  static const struct rpmfcTokens_s rpmfcTokens[] = {
    { "directory",		RPMFC_INCLUDE },
  
@@ -23,7 +23,7 @@
    { "ELF 32-bit",		RPMFC_ELF32|RPMFC_INCLUDE },
    { "ELF 64-bit",		RPMFC_ELF64|RPMFC_INCLUDE },
  
-@@ -1149,6 +1150,9 @@ static uint32_t getElfColor(const char *fn)
+@@ -1150,6 +1151,9 @@ static uint32_t getElfColor(const char *
  		color = RPMFC_ELF32;
  		break;
  	    }
@@ -33,11 +33,11 @@
  	    elf_end(elf);
  	}
  	close(fd);
-diff --git a/rpmrc.in b/rpmrc.in
-index 5bd9ba3e5..f15bb8dad 100644
---- a/rpmrc.in
-+++ b/rpmrc.in
-@@ -137,6 +137,8 @@ archcolor: mipsr6el 1
+Index: git/rpmrc.in
+===================================================================
+--- git.orig/rpmrc.in
++++ git/rpmrc.in
+@@ -139,6 +139,8 @@ archcolor: mipsr6el 1
  archcolor: mips64r6 2
  archcolor: mips64r6el 2
  
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-Do-not-add-an-unsatisfiable-dependency-when-building.patch b/poky/meta/recipes-devtools/rpm/files/0001-Do-not-add-an-unsatisfiable-dependency-when-building.patch
index 4029233..8440c35 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-Do-not-add-an-unsatisfiable-dependency-when-building.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-Do-not-add-an-unsatisfiable-dependency-when-building.patch
@@ -14,11 +14,11 @@
  build/pack.c | 4 ----
  1 file changed, 4 deletions(-)
 
-diff --git a/build/pack.c b/build/pack.c
-index e6cec1816..810cd7351 100644
---- a/build/pack.c
-+++ b/build/pack.c
-@@ -724,10 +724,6 @@ static rpmRC packageBinary(rpmSpec spec, Package pkg, const char *cookie, int ch
+Index: git/build/pack.c
+===================================================================
+--- git.orig/build/pack.c
++++ git/build/pack.c
+@@ -709,10 +709,6 @@ static rpmRC packageBinary(rpmSpec spec,
  	headerPutBin(pkg->header, RPMTAG_SOURCEPKGID, spec->sourcePkgId,16);
      }
  
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-Do-not-hardcode-lib-rpm-as-the-installation-path-for.patch b/poky/meta/recipes-devtools/rpm/files/0001-Do-not-hardcode-lib-rpm-as-the-installation-path-for.patch
index c6cf9d4..6f613d0 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-Do-not-hardcode-lib-rpm-as-the-installation-path-for.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-Do-not-hardcode-lib-rpm-as-the-installation-path-for.patch
@@ -13,11 +13,11 @@
  rpm.am       | 4 ++--
  3 files changed, 4 insertions(+), 4 deletions(-)
 
-diff --git a/configure.ac b/configure.ac
-index 372875fc4..1b7add9ee 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -884,7 +884,7 @@ else
+Index: git/configure.ac
+===================================================================
+--- git.orig/configure.ac
++++ git/configure.ac
+@@ -942,7 +942,7 @@ else
      usrprefix=$prefix
  fi
  
@@ -26,10 +26,10 @@
  AC_SUBST(RPMCONFIGDIR)
  
  AC_SUBST(OBJDUMP)
-diff --git a/macros.in b/macros.in
-index d53ab5ed5..9d10441c8 100644
---- a/macros.in
-+++ b/macros.in
+Index: git/macros.in
+===================================================================
+--- git.orig/macros.in
++++ git/macros.in
 @@ -911,7 +911,7 @@ package or when debugging this package.\
  %_sharedstatedir	%{_prefix}/com
  %_localstatedir		%{_prefix}/var
@@ -39,10 +39,10 @@
  %_includedir		%{_prefix}/include
  %_infodir		%{_datadir}/info
  %_mandir		%{_datadir}/man
-diff --git a/rpm.am b/rpm.am
-index ebe4e40d1..e6920e258 100644
---- a/rpm.am
-+++ b/rpm.am
+Index: git/rpm.am
+===================================================================
+--- git.orig/rpm.am
++++ git/rpm.am
 @@ -1,10 +1,10 @@
  # Internal binaries
  ## HACK: It probably should be $(libexecdir)/rpm or $(libdir)/rpm
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-Do-not-read-config-files-from-HOME.patch b/poky/meta/recipes-devtools/rpm/files/0001-Do-not-read-config-files-from-HOME.patch
index 96eb418..fda64ee 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-Do-not-read-config-files-from-HOME.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-Do-not-read-config-files-from-HOME.patch
@@ -9,10 +9,10 @@
  lib/rpmrc.c | 6 ++----
  1 file changed, 2 insertions(+), 4 deletions(-)
 
-diff --git a/lib/rpmrc.c b/lib/rpmrc.c
-index 4ed991321..19fe80f98 100644
---- a/lib/rpmrc.c
-+++ b/lib/rpmrc.c
+Index: git/lib/rpmrc.c
+===================================================================
+--- git.orig/lib/rpmrc.c
++++ git/lib/rpmrc.c
 @@ -458,8 +458,7 @@ static void setDefaults(void)
      if (!defrcfiles) {
  	defrcfiles = rstrscat(NULL, confdir, "/rpmrc", ":",
@@ -33,6 +33,3 @@
      }
  #else
      macrofiles = MACROFILES;
--- 
-2.11.0
-
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-Do-not-reset-the-PATH-environment-variable-before-ru.patch b/poky/meta/recipes-devtools/rpm/files/0001-Do-not-reset-the-PATH-environment-variable-before-ru.patch
index 41cdf6e..ae24b66 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-Do-not-reset-the-PATH-environment-variable-before-ru.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-Do-not-reset-the-PATH-environment-variable-before-ru.patch
@@ -13,11 +13,11 @@
  lib/rpmscript.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
-diff --git a/lib/rpmscript.c b/lib/rpmscript.c
-index 6a31e0d..2b0e438 100644
---- a/lib/rpmscript.c
-+++ b/lib/rpmscript.c
-@@ -184,7 +184,7 @@ static void doScriptExec(ARGV_const_t argv, ARGV_const_t prefixes,
+Index: git/lib/rpmscript.c
+===================================================================
+--- git.orig/lib/rpmscript.c
++++ git/lib/rpmscript.c
+@@ -231,7 +231,7 @@ static void doScriptExec(ARGV_const_t ar
  	if (ipath && ipath[5] != '%')
  	    path = ipath;
  
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-When-cross-installing-execute-package-scriptlets-wit.patch b/poky/meta/recipes-devtools/rpm/files/0001-When-cross-installing-execute-package-scriptlets-wit.patch
index 2a0069c..bd3314a 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-When-cross-installing-execute-package-scriptlets-wit.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-When-cross-installing-execute-package-scriptlets-wit.patch
@@ -28,9 +28,11 @@
  lib/rpmscript.c | 11 ++++++++---
  1 file changed, 8 insertions(+), 3 deletions(-)
 
---- a/lib/rpmscript.c
-+++ b/lib/rpmscript.c
-@@ -17,7 +17,7 @@
+Index: git/lib/rpmscript.c
+===================================================================
+--- git.orig/lib/rpmscript.c
++++ git/lib/rpmscript.c
+@@ -18,7 +18,7 @@
  #include "rpmio/rpmio_internal.h"
  
  #include "lib/rpmplugins.h"     /* rpm plugins hooks */
@@ -39,7 +41,7 @@
  #include "debug.h"
  
  struct scriptNextFileFunc_s {
-@@ -391,8 +391,7 @@ exit:
+@@ -427,8 +427,7 @@ exit:
  	Fclose(out);	/* XXX dup'd STDOUT_FILENO */
  
      if (fn) {
@@ -49,18 +51,18 @@
  	free(fn);
      }
      free(mline);
-@@ -426,7 +425,13 @@ rpmRC rpmScriptRun(rpmScript script, int
+@@ -462,7 +461,13 @@ rpmRC rpmScriptRun(rpmScript script, int
  
      if (rc != RPMRC_FAIL) {
  	if (script_type & RPMSCRIPTLET_EXEC) {
--	    rc = runExtScript(plugins, prefixes, script->descr, lvl, scriptFd, &args, script->body, arg1, arg2, &script->nextFileFunc);
+-	    rc = runExtScript(plugins, prefixes, script->descr, lvl, scriptFd, &args, script->body, arg1, arg2, script->nextFileFunc);
 +	    if (getenv("RPM_NO_CHROOT_FOR_SCRIPTS") != NULL) {
 +		rpmChrootOut();
-+		rc = runExtScript(plugins, prefixes, script->descr, lvl, scriptFd, &args, script->body, arg1, arg2, &script->nextFileFunc);
++		rc = runExtScript(plugins, prefixes, script->descr, lvl, scriptFd, &args, script->body, arg1, arg2, script->nextFileFunc);
 +		rpmChrootIn();
 +	    } else {
-+		rc = runExtScript(plugins, prefixes, script->descr, lvl, scriptFd, &args, script->body, arg1, arg2, &script->nextFileFunc);
++		rc = runExtScript(plugins, prefixes, script->descr, lvl, scriptFd, &args, script->body, arg1, arg2, script->nextFileFunc);
 +	    }
  	} else {
- 	    rc = runLuaScript(plugins, prefixes, script->descr, lvl, scriptFd, &args, script->body, arg1, arg2, &script->nextFileFunc);
+ 	    rc = runLuaScript(plugins, prefixes, script->descr, lvl, scriptFd, &args, script->body, arg1, arg2, script->nextFileFunc);
  	}
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-build-pack.c-do-not-insert-payloadflags-into-.rpm-me.patch b/poky/meta/recipes-devtools/rpm/files/0001-build-pack.c-do-not-insert-payloadflags-into-.rpm-me.patch
index 79b1682..64433ab 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-build-pack.c-do-not-insert-payloadflags-into-.rpm-me.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-build-pack.c-do-not-insert-payloadflags-into-.rpm-me.patch
@@ -13,10 +13,10 @@
  build/pack.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
-diff --git a/build/pack.c b/build/pack.c
-index 932cb213e..b45d0726f 100644
---- a/build/pack.c
-+++ b/build/pack.c
+Index: git/build/pack.c
+===================================================================
+--- git.orig/build/pack.c
++++ git/build/pack.c
 @@ -328,7 +328,7 @@ static char *getIOFlags(Package pkg)
  	    headerPutString(pkg->header, RPMTAG_PAYLOADCOMPRESSOR, compr);
  	buf = xstrdup(rpmio_flags);
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-configure.ac-add-linux-gnux32-variant-to-triplet-han.patch b/poky/meta/recipes-devtools/rpm/files/0001-configure.ac-add-linux-gnux32-variant-to-triplet-han.patch
index 2174a79..29b6686 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-configure.ac-add-linux-gnux32-variant-to-triplet-han.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-configure.ac-add-linux-gnux32-variant-to-triplet-han.patch
@@ -11,13 +11,13 @@
  configure.ac | 4 ++++
  1 file changed, 4 insertions(+)
 
-diff --git a/configure.ac b/configure.ac
-index 372875fc49..7d6a3d274e 100644
---- a/configure.ac
-+++ b/configure.ac
-@@ -845,6 +845,10 @@ if echo "$host_os" | grep '.*-gnuabi64$' > /dev/null ; then
- 	host_os=`echo "${host_os}" | sed 's/-gnuabi64$//'`
- 	host_os_gnu=-gnuabi64
+Index: git/configure.ac
+===================================================================
+--- git.orig/configure.ac
++++ git/configure.ac
+@@ -903,6 +903,10 @@ if echo "$host_os" | grep '.*-gnux32$' >
+ 	host_os=`echo "${host_os}" | sed 's/-gnux32$//'`
+ 	host_os_gnu=-gnux32
  fi
 +if echo "$host_os" | grep '.*-gnux32$' > /dev/null ; then
 +	host_os=`echo "${host_os}" | sed 's/-gnux32$//'`
@@ -26,6 +26,3 @@
  if echo "$host_os" | grep '.*-gnu$' > /dev/null ; then
  	host_os=`echo "${host_os}" | sed 's/-gnu$//'`
  fi
--- 
-2.30.2
-
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-docs-do-not-build-manpages-requires-pandoc.patch b/poky/meta/recipes-devtools/rpm/files/0001-docs-do-not-build-manpages-requires-pandoc.patch
index ced52d1..d7137f1 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-docs-do-not-build-manpages-requires-pandoc.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-docs-do-not-build-manpages-requires-pandoc.patch
@@ -9,10 +9,10 @@
  docs/Makefile.am | 2 --
  1 file changed, 2 deletions(-)
 
-diff --git a/docs/Makefile.am b/docs/Makefile.am
-index 5a6bd203a..6257767fd 100644
---- a/docs/Makefile.am
-+++ b/docs/Makefile.am
+Index: git/docs/Makefile.am
+===================================================================
+--- git.orig/docs/Makefile.am
++++ git/docs/Makefile.am
 @@ -1,7 +1,5 @@
  ## Process this file with automake to produce Makefile.in
  
@@ -21,6 +21,3 @@
  EXTRA_DIST =
  
  EXTRA_DIST += \
--- 
-2.32.0
-
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-lib-transaction.c-fix-file-conflicts-for-MIPS64-N32.patch b/poky/meta/recipes-devtools/rpm/files/0001-lib-transaction.c-fix-file-conflicts-for-MIPS64-N32.patch
index 6678c10..82e6567 100644
--- a/poky/meta/recipes-devtools/rpm/files/0001-lib-transaction.c-fix-file-conflicts-for-MIPS64-N32.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0001-lib-transaction.c-fix-file-conflicts-for-MIPS64-N32.patch
@@ -31,11 +31,11 @@
  lib/transaction.c | 13 ++++++++++++-
  1 file changed, 12 insertions(+), 1 deletion(-)
 
-diff --git a/lib/transaction.c b/lib/transaction.c
-index 67b9db5..82386b8 100644
---- a/lib/transaction.c
-+++ b/lib/transaction.c
-@@ -391,7 +391,18 @@ static int handleColorConflict(rpmts ts,
+Index: git/lib/transaction.c
+===================================================================
+--- git.orig/lib/transaction.c
++++ git/lib/transaction.c
+@@ -402,7 +402,18 @@ static int handleColorConflict(rpmts ts,
  		    rpmfsSetAction(ofs, ofx, FA_CREATE);
  		rpmfsSetAction(fs, fx, FA_SKIPCOLOR);
  		rConflicts = 0;
@@ -55,6 +55,3 @@
  	}
      }
  
--- 
-2.7.4
-
diff --git a/poky/meta/recipes-devtools/rpm/files/0001-tools-Add-error.h-for-non-glibc-case.patch b/poky/meta/recipes-devtools/rpm/files/0001-tools-Add-error.h-for-non-glibc-case.patch
deleted file mode 100644
index 9783396..0000000
--- a/poky/meta/recipes-devtools/rpm/files/0001-tools-Add-error.h-for-non-glibc-case.patch
+++ /dev/null
@@ -1,71 +0,0 @@
-From 9b9d717f484ec913cdd3804e43489b3dc18bd77c Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sat, 31 Oct 2020 22:14:05 -0700
-Subject: [PATCH] tools: Add error.h for non-glibc case
-
-error is glibc specific API, so this patch will mostly not accepted
-upstream given that elfutils has been closely tied to glibc
-
-Upstream-Status: Inappropriate [workaround for musl]
-
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
-
----
- tools/elfdeps.c |  6 +++++-
- tools/error.h   | 27 +++++++++++++++++++++++++++
- 2 files changed, 32 insertions(+), 1 deletion(-)
- create mode 100644 tools/error.h
-
-diff --git a/tools/elfdeps.c b/tools/elfdeps.c
-index d205935bb..3a8945b33 100644
---- a/tools/elfdeps.c
-+++ b/tools/elfdeps.c
-@@ -5,10 +5,14 @@
- #include <unistd.h>
- #include <stdlib.h>
- #include <fcntl.h>
--#include <error.h>
- #include <errno.h>
- #include <popt.h>
- #include <gelf.h>
-+#ifdef __GLIBC__
-+#include <error.h>
-+#else
-+#include "error.h"
-+#endif
- 
- #include <rpm/rpmstring.h>
- #include <rpm/argv.h>
-diff --git a/tools/error.h b/tools/error.h
-new file mode 100644
-index 000000000..ef06827a0
---- /dev/null
-+++ b/tools/error.h
-@@ -0,0 +1,27 @@
-+#ifndef _ERROR_H_
-+#define _ERROR_H_
-+
-+#include <stdarg.h>
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <errno.h>
-+
-+static unsigned int error_message_count = 0;
-+
-+static inline void error(int status, int errnum, const char* format, ...)
-+{
-+	va_list ap;
-+	fprintf(stderr, "%s: ", program_invocation_name);
-+	va_start(ap, format);
-+	vfprintf(stderr, format, ap);
-+	va_end(ap);
-+	if (errnum)
-+		fprintf(stderr, ": %s", strerror(errnum));
-+	fprintf(stderr, "\n");
-+	error_message_count++;
-+	if (status)
-+		exit(status);
-+}
-+
-+#endif	/* _ERROR_H_ */
diff --git a/poky/meta/recipes-devtools/rpm/files/0002-Add-support-for-prefixing-etc-from-RPM_ETCCONFIGDIR-.patch b/poky/meta/recipes-devtools/rpm/files/0002-Add-support-for-prefixing-etc-from-RPM_ETCCONFIGDIR-.patch
index b3dbc31..2fe96a8 100644
--- a/poky/meta/recipes-devtools/rpm/files/0002-Add-support-for-prefixing-etc-from-RPM_ETCCONFIGDIR-.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0002-Add-support-for-prefixing-etc-from-RPM_ETCCONFIGDIR-.patch
@@ -13,11 +13,11 @@
  lib/rpmrc.c | 19 ++++++++++++++-----
  1 file changed, 14 insertions(+), 5 deletions(-)
 
-diff --git a/lib/rpmrc.c b/lib/rpmrc.c
-index 19fe80f98..6b27b3941 100644
---- a/lib/rpmrc.c
-+++ b/lib/rpmrc.c
-@@ -455,10 +455,14 @@ const char * lookupInDefaultTable(const char * name,
+Index: git/lib/rpmrc.c
+===================================================================
+--- git.orig/lib/rpmrc.c
++++ git/lib/rpmrc.c
+@@ -455,10 +455,14 @@ const char * lookupInDefaultTable(const
  static void setDefaults(void)
  {
      const char *confdir = rpmConfigDir();
@@ -46,7 +46,7 @@
      }
  #else
      macrofiles = MACROFILES;
-@@ -989,7 +993,11 @@ static void read_auxv(void)
+@@ -997,7 +1001,11 @@ static void read_auxv(void)
   */
  static void defaultMachine(rpmrcCtx ctx, const char ** arch, const char ** os)
  {
@@ -59,7 +59,7 @@
      static struct utsname un;
      char * chptr;
      canonEntry canon;
-@@ -1286,6 +1294,7 @@ static void defaultMachine(rpmrcCtx ctx, const char ** arch, const char ** os)
+@@ -1307,6 +1315,7 @@ static void defaultMachine(rpmrcCtx ctx,
  
      if (arch) *arch = un.machine;
      if (os) *os = un.sysname;
@@ -67,6 +67,3 @@
  }
  
  static
--- 
-2.11.0
-
diff --git a/poky/meta/recipes-devtools/rpm/files/0016-rpmscript.c-change-logging-level-around-scriptlets-t.patch b/poky/meta/recipes-devtools/rpm/files/0016-rpmscript.c-change-logging-level-around-scriptlets-t.patch
index 43e9859..9dbe712 100644
--- a/poky/meta/recipes-devtools/rpm/files/0016-rpmscript.c-change-logging-level-around-scriptlets-t.patch
+++ b/poky/meta/recipes-devtools/rpm/files/0016-rpmscript.c-change-logging-level-around-scriptlets-t.patch
@@ -13,11 +13,11 @@
  lib/rpmscript.c | 8 ++++----
  1 file changed, 4 insertions(+), 4 deletions(-)
 
-diff --git a/lib/rpmscript.c b/lib/rpmscript.c
-index 2b0e43862..e319673f1 100644
---- a/lib/rpmscript.c
-+++ b/lib/rpmscript.c
-@@ -226,7 +226,7 @@ static char * writeScript(const char *cmd, const char *script)
+Index: git/lib/rpmscript.c
+===================================================================
+--- git.orig/lib/rpmscript.c
++++ git/lib/rpmscript.c
+@@ -270,7 +270,7 @@ static char * writeScript(const char *cm
      if (Ferror(fd))
  	goto exit;
  
@@ -26,7 +26,7 @@
  	static const char set_x[] = "set -x\n";
  	/* Assume failures will be caught by the write below */
  	Fwrite(set_x, sizeof(set_x[0]), sizeof(set_x)-1, fd);
-@@ -258,7 +258,7 @@ static rpmRC runExtScript(rpmPlugins plugins, ARGV_const_t prefixes,
+@@ -302,7 +302,7 @@ static rpmRC runExtScript(rpmPlugins plu
      char *mline = NULL;
      rpmRC rc = RPMRC_FAIL;
  
@@ -35,7 +35,7 @@
  
      if (script) {
  	fn = writeScript(*argvp[0], script);
-@@ -310,7 +310,7 @@ static rpmRC runExtScript(rpmPlugins plugins, ARGV_const_t prefixes,
+@@ -354,7 +354,7 @@ static rpmRC runExtScript(rpmPlugins plu
  		sname, strerror(errno));
  	goto exit;
      } else if (pid == 0) {/* Child */
@@ -44,7 +44,7 @@
  	       sname, *argvp[0], (unsigned)getpid());
  
  	fclose(in);
-@@ -353,7 +353,7 @@ static rpmRC runExtScript(rpmPlugins plugins, ARGV_const_t prefixes,
+@@ -397,7 +397,7 @@ static rpmRC runExtScript(rpmPlugins plu
  	reaped = waitpid(pid, &status, 0);
      } while (reaped == -1 && errno == EINTR);
  
diff --git a/poky/meta/recipes-devtools/rpm/files/fifofix.patch b/poky/meta/recipes-devtools/rpm/files/fifofix.patch
new file mode 100644
index 0000000..71703d7
--- /dev/null
+++ b/poky/meta/recipes-devtools/rpm/files/fifofix.patch
@@ -0,0 +1,22 @@
+Calling openat() on a fifo causes a pseudo hang for us (e.g. the fifo in psplash).
+Avoid calling openat for fifos.
+
+Introduced upstream with:
+
+https://github.com/rpm-software-management/rpm/commit/96ec957e281220f8e137a2d5eb23b83a6377d556
+
+Upstream-Status: Submitted [https://github.com/rpm-software-management/rpm/issues/2195]
+
+Index: git/lib/fsm.c
+===================================================================
+--- git.orig/lib/fsm.c
++++ git/lib/fsm.c
+@@ -1010,7 +1010,7 @@ int rpmPackageFilesInstall(rpmts ts, rpm
+                     rc = RPMERR_UNKNOWN_FILETYPE;
+             }
+ 
+-	    if (!rc && fd == -1 && !S_ISLNK(fp->sb.st_mode)) {
++	    if (!rc && fd == -1 && !S_ISLNK(fp->sb.st_mode) && !S_ISFIFO(fp->sb.st_mode)) {
+ 		/* Only follow safe symlinks, and never on temporary files */
+ 		fd = fsmOpenat(di.dirfd, fp->fpath,
+ 				fp->suffix ? AT_SYMLINK_NOFOLLOW : 0, 0);
diff --git a/poky/meta/recipes-devtools/rpm/rpm_4.17.1.bb b/poky/meta/recipes-devtools/rpm/rpm_4.18.0.bb
similarity index 97%
rename from poky/meta/recipes-devtools/rpm/rpm_4.17.1.bb
rename to poky/meta/recipes-devtools/rpm/rpm_4.18.0.bb
index 9b6446f..5f3986d 100644
--- a/poky/meta/recipes-devtools/rpm/rpm_4.17.1.bb
+++ b/poky/meta/recipes-devtools/rpm/rpm_4.18.0.bb
@@ -24,7 +24,7 @@
 LICENSE = "GPL-2.0-only"
 LIC_FILES_CHKSUM = "file://COPYING;md5=c4eec0c20c6034b9407a09945b48a43f"
 
-SRC_URI = "git://github.com/rpm-software-management/rpm;branch=rpm-4.17.x;protocol=https \
+SRC_URI = "git://github.com/rpm-software-management/rpm;branch=rpm-4.18.x;protocol=https \
            file://environment.d-rpm.sh \
            file://0001-Do-not-add-an-unsatisfiable-dependency-when-building.patch \
            file://0001-Do-not-read-config-files-from-HOME.patch \
@@ -36,14 +36,14 @@
            file://0001-perl-disable-auto-reqs.patch \
            file://0016-rpmscript.c-change-logging-level-around-scriptlets-t.patch \
            file://0001-lib-transaction.c-fix-file-conflicts-for-MIPS64-N32.patch \
-           file://0001-tools-Add-error.h-for-non-glibc-case.patch \
            file://0001-docs-do-not-build-manpages-requires-pandoc.patch \
            file://0001-build-pack.c-do-not-insert-payloadflags-into-.rpm-me.patch \
            file://0001-configure.ac-add-linux-gnux32-variant-to-triplet-han.patch \
+           file://fifofix.patch \
            "
 
 PE = "1"
-SRCREV = "5bef402da334595ed9302b8bca1acdf5e88bfe11"
+SRCREV = "ea0d77c52e176e2876fdb1d07ad41e9e2635a93e"
 
 S = "${WORKDIR}/git"
 
@@ -80,6 +80,7 @@
 PACKAGECONFIG[inhibit] = "--enable-inhibit-plugin,--disable-inhibit-plugin,dbus"
 PACKAGECONFIG[rpm2archive] = "--with-archive,--without-archive,libarchive"
 PACKAGECONFIG[sqlite] = "--enable-sqlite=yes,--enable-sqlite=no,sqlite3"
+PACKAGECONFIG[readline] = "--with-readline,--without-readline,readline"
 PACKAGECONFIG[ndb] = "--enable-ndb,--disable-ndb"
 PACKAGECONFIG[bdb-ro] = "--enable-bdb-ro,--disable-bdb-ro"
 PACKAGECONFIG[zstd] = "--enable-zstd=yes,--enable-zstd=no,zstd"
diff --git a/poky/meta/recipes-devtools/rust/files/target-rust-ccld.c b/poky/meta/recipes-devtools/rust/files/target-rust-ccld.c
new file mode 100644
index 0000000..d3d491f
--- /dev/null
+++ b/poky/meta/recipes-devtools/rust/files/target-rust-ccld.c
@@ -0,0 +1,19 @@
+/*
+*
+* Copyright (C) 2022      Wind River Systems
+*
+* SPDX-License-Identifier: MIT
+*
+*/
+
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+int main (int argc, char *argv[])
+{
+    unsetenv("LD_LIBRARY_PATH");
+    execvp("target-rust-ccld-wrapper", argv);
+
+    return 0;
+}
diff --git a/poky/meta/recipes-devtools/rust/rust-cross-canadian.inc b/poky/meta/recipes-devtools/rust/rust-cross-canadian.inc
index 7bf75a4..17f9339 100644
--- a/poky/meta/recipes-devtools/rust/rust-cross-canadian.inc
+++ b/poky/meta/recipes-devtools/rust/rust-cross-canadian.inc
@@ -7,13 +7,18 @@
 
 MODIFYTOS = "0"
 
+DEPENDS += "virtual/${SDK_PREFIX}gcc-crosssdk virtual/nativesdk-libc virtual/nativesdk-${SDK_PREFIX}compilerlibs"
+
+SRC_URI += "file://target-rust-ccld.c"
+LIC_FILES_CHKSUM = "file://target-rust-ccld.c;md5=af4e0e29f81a34cffe05aa07c89e93e9;endline=7"
+S = "${WORKDIR}"
+
 # Need to use our SDK's sh here, see #14878
 create_sdk_wrapper () {
         file="$1"
         shift
-
         cat <<- EOF > "${file}"
-		#!${base_prefix}/bin/sh
+		#!/bin/sh
 		\$$1 \$@
 		EOF
 
@@ -32,8 +37,17 @@
     install -m 0644 "${RUST_TARGETS_DIR}/${RUST_TARGET_SYS}.json" "${RUSTLIB_DIR}"
 
     # Uses SDK's CC as linker so linked binaries works out of box.
+    # We have a problem as rust sets LD_LIBRARY_PATH and this will break running host
+    # binaries (even /bin/sh) in the SDK as they detect a newer glibc from the SDK
+    # in those paths and we hit symbol errors. We saw particular problems with symbol
+    # mismatch on ubuntu1804 during development. To avoid this we have an SDK built
+    # binary which unsets LD_LIBRARY_PATH, which can then call the wrapper script
+    # where the context is easier to do the env maniupations needed
     install -d ${SYS_BINDIR}
-    create_sdk_wrapper "${SYS_BINDIR}/target-rust-ccld" "CC"
+    outfile="${SYS_BINDIR}/target-rust-ccld"
+    ${CC} ${WORKDIR}/target-rust-ccld.c -o $outfile
+    chmod +x "$outfile"
+    create_sdk_wrapper "${SYS_BINDIR}/target-rust-ccld-wrapper" "CC"
 
     ENV_SETUP_DIR=${D}${base_prefix}/environment-setup.d
     mkdir "${ENV_SETUP_DIR}"
diff --git a/poky/meta/recipes-devtools/rust/rust.inc b/poky/meta/recipes-devtools/rust/rust.inc
index 284347d..9563010 100644
--- a/poky/meta/recipes-devtools/rust/rust.inc
+++ b/poky/meta/recipes-devtools/rust/rust.inc
@@ -81,6 +81,7 @@
 
     config.set(host_section, "cxx", e(d.expand("${RUST_TARGET_CXX}")))
     config.set(host_section, "cc", e(d.expand("${RUST_TARGET_CC}")))
+    config.set(host_section, "linker", e(d.expand("${RUST_TARGET_CCLD}")))
     if "musl" in host_section:
         config.set(host_section, "musl-root", e(d.expand("${STAGING_DIR_HOST}${exec_prefix}")))
 
@@ -94,6 +95,7 @@
 
         config.set(build_section, "cxx", e(d.expand("${RUST_BUILD_CXX}")))
         config.set(build_section, "cc", e(d.expand("${RUST_BUILD_CC}")))
+        config.set(build_section, "linker", e(d.expand("${RUST_BUILD_CCLD}")))
 
     target_section = "target.{}".format(d.getVar('RUST_TARGET_SYS', True))
     if target_section != host_section and target_section != build_section:
@@ -103,10 +105,13 @@
 
         config.set(target_section, "cxx", e(d.expand("${RUST_TARGET_CXX}")))
         config.set(target_section, "cc", e(d.expand("${RUST_TARGET_CC}")))
+        config.set(target_section, "linker", e(d.expand("${RUST_TARGET_CCLD}")))
 
     # [llvm]
     config.add_section("llvm")
     config.set("llvm", "static-libstdcpp", e(False))
+    if "llvm" in (d.getVar('TC_CXX_RUNTIME') or ""):
+        config.set("llvm", "use-libcxx", e(True))
 
     # [rust]
     config.add_section("rust")
@@ -116,6 +121,10 @@
     # Whether or not to optimize the compiler and standard library
     config.set("rust", "optimize", e(True))
 
+    # Emits extraneous output from tests to ensure that failures of the test
+    # harness are debuggable just from logfiles
+    config.set("rust", "verbose-tests", e(True))
+
     # [build]
     config.add_section("build")
     config.set("build", "submodules", e(False))
diff --git a/poky/meta/recipes-devtools/rust/rust_1.63.0.bb b/poky/meta/recipes-devtools/rust/rust_1.63.0.bb
index 401d510..1f9dbd3 100644
--- a/poky/meta/recipes-devtools/rust/rust_1.63.0.bb
+++ b/poky/meta/recipes-devtools/rust/rust_1.63.0.bb
@@ -56,13 +56,15 @@
     rm ${D}${libdir}/rustlib/manifest*
 }
 
+EXTRA_TOOLS ?= "cargo-clippy clippy-driver rustfmt"
+EXTRA_TOOLS:remove:riscv32 = "rustfmt"
 rust_do_install:class-target() {
     export PSEUDO_UNLOAD=1
     rust_runx install
     unset PSEUDO_UNLOAD
 
     install -d ${D}${bindir}
-    for i in cargo-clippy clippy-driver rustfmt; do
+    for i in ${EXTRA_TOOLS}; do
         cp build/${RUST_BUILD_SYS}/stage2-tools/${RUST_HOST_SYS}/release/$i ${D}${bindir}
         chrpath -r "\$ORIGIN/../lib" ${D}${bindir}/$i
     done
diff --git a/poky/meta/recipes-devtools/squashfs-tools/squashfs-tools_git.bb b/poky/meta/recipes-devtools/squashfs-tools/squashfs-tools_git.bb
index 0a7e0f2..4e009d2 100644
--- a/poky/meta/recipes-devtools/squashfs-tools/squashfs-tools_git.bb
+++ b/poky/meta/recipes-devtools/squashfs-tools/squashfs-tools_git.bb
@@ -18,7 +18,7 @@
 
 EXTRA_OEMAKE = "${PACKAGECONFIG_CONFARGS}"
 
-PACKAGECONFIG ??= "gzip xz lz4 lzma xattr zstd reproducible"
+PACKAGECONFIG ??= "gzip xz lzo lz4 lzma xattr zstd reproducible"
 PACKAGECONFIG[gzip] = "GZIP_SUPPORT=1,GZIP_SUPPORT=0,zlib"
 PACKAGECONFIG[xz] = "XZ_SUPPORT=1,XZ_SUPPORT=0,xz"
 PACKAGECONFIG[lzo] = "LZO_SUPPORT=1,LZO_SUPPORT=0,lzo"
diff --git a/poky/meta/recipes-devtools/vala/vala_0.56.2.bb b/poky/meta/recipes-devtools/vala/vala_0.56.2.bb
deleted file mode 100644
index 08c8ccc..0000000
--- a/poky/meta/recipes-devtools/vala/vala_0.56.2.bb
+++ /dev/null
@@ -1,3 +0,0 @@
-require ${BPN}.inc
-
-SRC_URI[sha256sum] = "66c9619bb17859fd1ac3aba0a57970613e38fd2a1ee30541174260c9fb90124c"
diff --git a/poky/meta/recipes-devtools/vala/vala_0.56.3.bb b/poky/meta/recipes-devtools/vala/vala_0.56.3.bb
new file mode 100644
index 0000000..83f61e5
--- /dev/null
+++ b/poky/meta/recipes-devtools/vala/vala_0.56.3.bb
@@ -0,0 +1,3 @@
+require ${BPN}.inc
+
+SRC_URI[sha256sum] = "e1066221bf7b89cb1fa7327a3888645cb33b604de3bf45aa81132fd040b699bf"
diff --git a/poky/meta/recipes-devtools/xmlto/xmlto/0001-Skip-validating-xmlto-output.patch b/poky/meta/recipes-devtools/xmlto/xmlto/0001-Skip-validating-xmlto-output.patch
deleted file mode 100644
index c6857a9..0000000
--- a/poky/meta/recipes-devtools/xmlto/xmlto/0001-Skip-validating-xmlto-output.patch
+++ /dev/null
@@ -1,29 +0,0 @@
-From 3deb7a0eded04ab08a9cb2d88526cb1c7b440061 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sun, 14 Aug 2022 00:23:29 -0700
-Subject: [PATCH] Skip validating xmlto output
-
-Avoids network access
-
-Upstream-Status: Submitted [https://pagure.io/xmlto/pull-request/11]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- Makefile.am | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/Makefile.am b/Makefile.am
-index 50fa279..6a2da62 100644
---- a/Makefile.am
-+++ b/Makefile.am
-@@ -68,7 +68,7 @@ EXTRA_DIST = xmlto.spec \
- 	doc/xmlif.xml \
- 	xmlto.mak
- 
--GEN_MANPAGE = FORMAT_DIR=$(top_srcdir)/format $(BASH) ./xmlto -o $(@D) man $<
-+GEN_MANPAGE = FORMAT_DIR=$(top_srcdir)/format $(BASH) ./xmlto --skip-validation -o $(@D) man $<
- man/man1/xmlto.1: doc/xmlto.xml ; $(GEN_MANPAGE)
- man/man1/xmlif.1: doc/xmlif.xml ; $(GEN_MANPAGE)
- 
--- 
-2.37.2
-
diff --git a/poky/meta/recipes-devtools/xmlto/xmlto/configure.in-drop-the-test-of-xmllint-and-xsltproc.patch b/poky/meta/recipes-devtools/xmlto/xmlto/configure.in-drop-the-test-of-xmllint-and-xsltproc.patch
deleted file mode 100644
index 7cc3cbe..0000000
--- a/poky/meta/recipes-devtools/xmlto/xmlto/configure.in-drop-the-test-of-xmllint-and-xsltproc.patch
+++ /dev/null
@@ -1,30 +0,0 @@
-configure.in: drop the test of xmllint and xsltproc
-
-The test is unnecessary, the xmllint and xsltproc were explicitly
-added to RDEPENDS.
-
-Upstream-Status: Inappropriate
-Signed-off-by: Hongxu Jia <hongxu.jia@windriver.com>
----
- configure.in | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/configure.ac b/configure.ac
---- a/configure.ac
-+++ b/configure.ac
-@@ -42,10 +42,10 @@ AC_ARG_VAR([LOCALE], [Name and path of the `locale' program.])
- AC_PATH_PROG([LOCALE], [locale], [locale])
- 
- AC_ARG_VAR([XMLLINT], [Name and path of the `xmllint' program.])
--AC_PATH_PROG([XMLLINT], [xmllint], [xmllint])
-+dnl AC_PATH_PROG([XMLLINT], [xmllint], [xmllint])
- 
- AC_ARG_VAR([XSLTPROC], [Name and path of the `xsltproc' program.])
--AC_PATH_PROG([XSLTPROC], [xsltproc], [xsltproc])
-+dnl AC_PATH_PROG([XSLTPROC], [xsltproc], [xsltproc])
- 
- dnl
- dnl toolchains
--- 
-1.8.1.2
-
diff --git a/poky/meta/recipes-devtools/xmlto/xmlto_0.0.28.bb b/poky/meta/recipes-devtools/xmlto/xmlto_0.0.28.bb
index 373eca2..5e3fac7 100644
--- a/poky/meta/recipes-devtools/xmlto/xmlto_0.0.28.bb
+++ b/poky/meta/recipes-devtools/xmlto/xmlto_0.0.28.bb
@@ -8,10 +8,7 @@
 
 SRCREV = "6fa6a0e07644f20abf2596f78a60112713e11cbe"
 UPSTREAM_CHECK_COMMITS = "1"
-SRC_URI = "git://pagure.io/xmlto.git;protocol=https;branch=master \
-           file://configure.in-drop-the-test-of-xmllint-and-xsltproc.patch \
-           file://0001-Skip-validating-xmlto-output.patch \
-"
+SRC_URI = "git://pagure.io/xmlto.git;protocol=https;branch=master"
 S = "${WORKDIR}/git"
 
 PV .= "+0.0.29+git${SRCPV}"
@@ -34,11 +31,11 @@
                   libxslt-bin \
                   coreutils \
 "
-CACHED_CONFIGUREVARS += "ac_cv_path_TAIL=tail ac_cv_path_GREP=grep"
+CACHED_CONFIGUREVARS += "ac_cv_path_TAIL=tail ac_cv_path_GREP=grep ac_cv_path_XMLLINT=xmllint ac_cv_path_XSLTPROC=xsltproc"
 
 BBCLASSEXTEND = "native"
 
-EXTRA_OECONF:append = " BASH=/bin/bash GCP=/bin/cp XMLLINT=xmllint XSLTPROC=xsltproc"
+EXTRA_OECONF:append = " BASH=/bin/bash GCP=/bin/cp"
 
 do_configure:prepend() {
     (cd ${S} && flex -o xmlif/xmlif.c xmlif/xmlif.l)
diff --git a/poky/meta/recipes-extended/cracklib/cracklib/0001-rules-Drop-using-register-keyword.patch b/poky/meta/recipes-extended/cracklib/cracklib/0001-rules-Drop-using-register-keyword.patch
deleted file mode 100644
index a844665..0000000
--- a/poky/meta/recipes-extended/cracklib/cracklib/0001-rules-Drop-using-register-keyword.patch
+++ /dev/null
@@ -1,278 +0,0 @@
-From fe49471cfa7fe0618615c065f4c0ad04e888bf92 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sun, 7 Aug 2022 12:24:39 -0700
-Subject: [PATCH 1/2] rules: Drop using register keyword
-
-This is a deprecated keyword
-
-Upstream-Status: Submitted [https://github.com/cracklib/cracklib/pull/48]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- src/lib/rules.c | 94 ++++++++++++++++++++++++-------------------------
- 1 file changed, 47 insertions(+), 47 deletions(-)
-
-diff --git a/lib/rules.c b/lib/rules.c
-index 3a2aa46..6e7a12a 100644
---- a/lib/rules.c
-+++ b/lib/rules.c
-@@ -67,8 +67,8 @@ Suffix(myword, suffix)
-     char *myword;
-     char *suffix;
- {
--    register int i;
--    register int j;
-+    int i;
-+    int j;
-     i = strlen(myword);
-     j = strlen(suffix);
- 
-@@ -83,10 +83,10 @@ Suffix(myword, suffix)
- 
- char *
- Reverse(str)			/* return a pointer to a reversal */
--    register char *str;
-+    char *str;
- {
--    register int i;
--    register int j;
-+    int i;
-+    int j;
-     static char area[STRINGSIZE];
-     j = i = strlen(str);
-     while (*str)
-@@ -99,9 +99,9 @@ Reverse(str)			/* return a pointer to a reversal */
- 
- char *
- Uppercase(str)			/* return a pointer to an uppercase */
--    register char *str;
-+    char *str;
- {
--    register char *ptr;
-+    char *ptr;
-     static char area[STRINGSIZE];
-     ptr = area;
-     while (*str)
-@@ -116,9 +116,9 @@ Uppercase(str)			/* return a pointer to an uppercase */
- 
- char *
- Lowercase(str)			/* return a pointer to an lowercase */
--    register char *str;
-+    char *str;
- {
--    register char *ptr;
-+    char *ptr;
-     static char area[STRINGSIZE];
-     ptr = area;
-     while (*str)
-@@ -133,9 +133,9 @@ Lowercase(str)			/* return a pointer to an lowercase */
- 
- char *
- Capitalise(str)			/* return a pointer to an capitalised */
--    register char *str;
-+    char *str;
- {
--    register char *ptr;
-+    char *ptr;
-     static char area[STRINGSIZE];
-     ptr = area;
- 
-@@ -152,9 +152,9 @@ Capitalise(str)			/* return a pointer to an capitalised */
- 
- char *
- Pluralise(string)		/* returns a pointer to a plural */
--    register char *string;
-+    char *string;
- {
--    register int length;
-+    int length;
-     static char area[STRINGSIZE];
-     length = strlen(string);
-     strcpy(area, string);
-@@ -193,11 +193,11 @@ Pluralise(string)		/* returns a pointer to a plural */
- 
- char *
- Substitute(string, old, new)	/* returns pointer to a swapped about copy */
--    register char *string;
--    register char old;
--    register char new;
-+    char *string;
-+    char old;
-+    char new;
- {
--    register char *ptr;
-+    char *ptr;
-     static char area[STRINGSIZE];
-     ptr = area;
-     while (*string)
-@@ -211,11 +211,11 @@ Substitute(string, old, new)	/* returns pointer to a swapped about copy */
- 
- char *
- Purge(string, target)		/* returns pointer to a purged copy */
--    register char *string;
--    register char target;
-+    char *string;
-+    char target;
- {
--    register char *ptr;
--    static char area[STRINGSIZE];
-+    char *ptr;
-+    char area[STRINGSIZE];
-     ptr = area;
-     while (*string)
-     {
-@@ -238,11 +238,11 @@ Purge(string, target)		/* returns pointer to a purged copy */
- 
- int
- MatchClass(class, input)
--    register char class;
--    register char input;
-+    char class;
-+    char input;
- {
--    register char c;
--    register int retval;
-+    char c;
-+    int retval;
-     retval = 0;
- 
-     switch (class)
-@@ -357,8 +357,8 @@ MatchClass(class, input)
- 
- char *
- PolyStrchr(string, class)
--    register char *string;
--    register char class;
-+    char *string;
-+    char class;
- {
-     while (*string)
-     {
-@@ -373,11 +373,11 @@ PolyStrchr(string, class)
- 
- char *
- PolySubst(string, class, new)	/* returns pointer to a swapped about copy */
--    register char *string;
--    register char class;
--    register char new;
-+    char *string;
-+    char class;
-+    char new;
- {
--    register char *ptr;
-+    char *ptr;
-     static char area[STRINGSIZE];
-     ptr = area;
-     while (*string)
-@@ -391,10 +391,10 @@ PolySubst(string, class, new)	/* returns pointer to a swapped about copy */
- 
- char *
- PolyPurge(string, class)	/* returns pointer to a purged copy */
--    register char *string;
--    register char class;
-+    char *string;
-+    char class;
- {
--    register char *ptr;
-+    char *ptr;
-     static char area[STRINGSIZE];
-     ptr = area;
-     while (*string)
-@@ -433,7 +433,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
-     char *control;
- {
-     int limit;
--    register char *ptr;
-+    char *ptr;
-     static char area[STRINGSIZE * 2] = {0};
-     char area2[STRINGSIZE * 2] = {0};
-     strcpy(area, input);
-@@ -523,7 +523,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 		return NULL;
- 	    } else
- 	    {
--		register char *string;
-+		char *string;
- 		string = area;
- 		while (*(string++));
- 		string[-1] = *(++ptr);
-@@ -537,7 +537,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 		return NULL;
- 	    } else
- 	    {
--		register int i;
-+		int i;
- 		int start;
- 		int length;
- 		start = Char2Int(*(++ptr));
-@@ -563,7 +563,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 		return NULL;
- 	    } else
- 	    {
--		register int i;
-+		int i;
- 		i = Char2Int(*(++ptr));
- 		if (i < 0)
- 		{
-@@ -587,9 +587,9 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 		return NULL;
- 	    } else
- 	    {
--		register int i;
--		register char *p1;
--		register char *p2;
-+		int i;
-+		char *p1;
-+		char *p2;
- 		i = Char2Int(*(++ptr));
- 		if (i < 0)
- 		{
-@@ -696,7 +696,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 		return NULL;
- 	    } else
- 	    {
--		register int i;
-+		int i;
- 		if ((i = Char2Int(ptr[1])) < 0)
- 		{
- 		    Debug(1, "Mangle: '=' weird argument in '%s'\n", control);
-@@ -723,7 +723,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 	case RULE_DFIRST:
- 	    if (area[0])
- 	    {
--		register int i;
-+		int i;
- 		for (i = 1; area[i]; i++)
- 		{
- 		    area[i - 1] = area[i];
-@@ -735,7 +735,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 	case RULE_DLAST:
- 	    if (area[0])
- 	    {
--		register int i;
-+		int i;
- 		for (i = 1; area[i]; i++);
- 		area[i - 1] = '\0';
- 	    }
-@@ -771,7 +771,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 		return NULL;
- 	    } else
- 	    {
--		register int i;
-+		int i;
- 
- 		for (i = 0; area[i]; i++);
- 
-@@ -815,8 +815,8 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 
- int
- PMatch(control, string)
--register char *control;
--register char *string;
-+char *control;
-+char *string;
- {
-     while (*string && *control)
-     {
--- 
-2.37.1
-
diff --git a/poky/meta/recipes-extended/cracklib/cracklib/0002-rules-Correct-parameter-types-to-Debug-calls.patch b/poky/meta/recipes-extended/cracklib/cracklib/0002-rules-Correct-parameter-types-to-Debug-calls.patch
deleted file mode 100644
index a8692b0..0000000
--- a/poky/meta/recipes-extended/cracklib/cracklib/0002-rules-Correct-parameter-types-to-Debug-calls.patch
+++ /dev/null
@@ -1,40 +0,0 @@
-From 793921a8ee4ae7f20e1fd2bbec5196bc83176b01 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Sun, 7 Aug 2022 12:25:24 -0700
-Subject: [PATCH 2/2] rules: Correct parameter types to Debug() calls
-
-Fixes
-src/lib/rules.c:346:45: error: incompatible integer to pointer conversion passing 'char' to parameter of type 'char *'; take the address with & [-Wint-conversion]
-src/lib/rules.c:804:53: error: incompatible integer to pointer conversion passing 'char' to parameter of type 'char *'; remove * [-Wint-conversion]                                           Debug(1, "Mangle: unknown command %c in %s\n", *ptr, control);
-                                                           ^~~~
-Upstream-Status: Submitted [https://github.com/cracklib/cracklib/pull/48]
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- src/lib/rules.c | 4 ++--
- 1 file changed, 2 insertions(+), 2 deletions(-)
-
-diff --git a/lib/rules.c b/lib/rules.c
-index 6e7a12a..4a34f91 100644
---- a/lib/rules.c
-+++ b/lib/rules.c
-@@ -343,7 +343,7 @@ MatchClass(class, input)
- 	break;
- 
-     default:
--	Debug(1, "MatchClass: unknown class %c\n", class);
-+	Debug(1, "MatchClass: unknown class %c\n", &class);
- 	return (0);
- 	break;
-     }
-@@ -801,7 +801,7 @@ Mangle(input, control)		/* returns a pointer to a controlled Mangle */
- 	    }
- 
- 	default:
--	    Debug(1, "Mangle: unknown command %c in %s\n", *ptr, control);
-+	    Debug(1, "Mangle: unknown command %c in %s\n", ptr, control);
- 	    return NULL;
- 	    break;
- 	}
--- 
-2.37.1
-
diff --git a/poky/meta/recipes-extended/cracklib/cracklib_2.9.7.bb b/poky/meta/recipes-extended/cracklib/cracklib_2.9.8.bb
similarity index 83%
rename from poky/meta/recipes-extended/cracklib/cracklib_2.9.7.bb
rename to poky/meta/recipes-extended/cracklib/cracklib_2.9.8.bb
index ffed88e..786940a 100644
--- a/poky/meta/recipes-extended/cracklib/cracklib_2.9.7.bb
+++ b/poky/meta/recipes-extended/cracklib/cracklib_2.9.8.bb
@@ -12,11 +12,9 @@
 SRC_URI = "git://github.com/cracklib/cracklib;protocol=https;branch=master \
            file://0001-packlib.c-support-dictionary-byte-order-dependent.patch \
            file://0002-craklib-fix-testnum-and-teststr-failed.patch \
-           file://0001-rules-Drop-using-register-keyword.patch \
-           file://0002-rules-Correct-parameter-types-to-Debug-calls.patch \
            "
 
-SRCREV = "f83934cf3cced0c9600c7d81332f4169f122a2cf"
+SRCREV = "d9e8f9f47718539aeba80f90f4e072549926dc9c"
 S = "${WORKDIR}/git/src"
 
 inherit autotools gettext
diff --git a/poky/meta/recipes-extended/cronie/cronie_1.6.1.bb b/poky/meta/recipes-extended/cronie/cronie_1.6.1.bb
index 6d150dd..0781875 100644
--- a/poky/meta/recipes-extended/cronie/cronie_1.6.1.bb
+++ b/poky/meta/recipes-extended/cronie/cronie_1.6.1.bb
@@ -14,9 +14,9 @@
 
 SECTION = "utils"
 
-UPSTREAM_CHECK_URI = "https://github.com/cronie-crond/${BPN}/releases/"
+GITHUB_BASE_URI = "https://github.com/cronie-crond/${BPN}/releases/"
 
-SRC_URI = "https://github.com/cronie-crond/cronie/releases/download/cronie-${PV}/cronie-${PV}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/cronie-${PV}/cronie-${PV}.tar.gz \
            file://crond.init \
            file://crontab \
            file://crond.service \
@@ -27,7 +27,8 @@
 
 SRC_URI[sha256sum] = "2cd0f0dd1680e6b9c39bf1e3a5e7ad6df76aa940de1ee90a453633aa59984e62"
 
-inherit autotools update-rc.d useradd systemd
+inherit autotools update-rc.d useradd systemd github-releases
+UPSTREAM_CHECK_REGEX = "releases/tag/cronie-(?P<pver>\d+(\.\d+)+)"
 
 PACKAGECONFIG ?= "${@bb.utils.filter('DISTRO_FEATURES', 'pam', d)}"
 
diff --git a/poky/meta/recipes-extended/cups/cups.inc b/poky/meta/recipes-extended/cups/cups.inc
index 4592980..9c920bb 100644
--- a/poky/meta/recipes-extended/cups/cups.inc
+++ b/poky/meta/recipes-extended/cups/cups.inc
@@ -8,7 +8,7 @@
 LICENSE = "Apache-2.0"
 DEPENDS = "libpng jpeg dbus zlib libusb1"
 
-SRC_URI = "https://github.com/OpenPrinting/cups/releases/download/v${PV}/cups-${PV}-source.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/cups-${PV}-source.tar.gz \
            file://0001-use-echo-only-in-init.patch \
            file://0002-don-t-try-to-run-generated-binaries.patch \
            file://libexecdir.patch \
@@ -17,8 +17,7 @@
            file://cups-volatiles.conf \
            "
 
-UPSTREAM_CHECK_URI = "https://github.com/OpenPrinting/cups/releases"
-UPSTREAM_CHECK_REGEX = "cups-(?P<pver>(?!.+\d(b|rc)\d.+).+)-source.tar"
+GITHUB_BASE_URI = "https://github.com/OpenPrinting/cups/releases"
 
 # Issue only applies to MacOS
 CVE_CHECK_IGNORE += "CVE-2008-1033"
@@ -33,7 +32,7 @@
 
 CLEANBROKEN = "1"
 
-inherit autotools-brokensep binconfig useradd systemd pkgconfig multilib_script
+inherit autotools-brokensep binconfig useradd systemd pkgconfig multilib_script github-releases
 
 USERADD_PACKAGES = "${PN}"
 GROUPADD_PARAM:${PN} = "--system lpadmin"
diff --git a/poky/meta/recipes-extended/less/less_600.bb b/poky/meta/recipes-extended/less/less_608.bb
similarity index 90%
rename from poky/meta/recipes-extended/less/less_600.bb
rename to poky/meta/recipes-extended/less/less_608.bb
index f51083e..f411a8f 100644
--- a/poky/meta/recipes-extended/less/less_600.bb
+++ b/poky/meta/recipes-extended/less/less_608.bb
@@ -28,10 +28,9 @@
 SRC_URI = "http://www.greenwoodsoftware.com/${BPN}/${BPN}-${PV}.tar.gz \
 	  "
 
-SRC_URI[sha256sum] = "6633d6aa2b3cc717afb2c205778c7c42c4620f63b1d682f3d12c98af0be74d20"
+SRC_URI[sha256sum] = "a69abe2e0a126777e021d3b73aa3222e1b261f10e64624d41ec079685a6ac209"
 
 UPSTREAM_CHECK_URI = "http://www.greenwoodsoftware.com/less/download.html"
-UPSTREAM_VERSION_UNKNOWN = "1"
 
 inherit autotools update-alternatives
 
diff --git a/poky/meta/recipes-extended/logrotate/logrotate_3.20.1.bb b/poky/meta/recipes-extended/logrotate/logrotate_3.20.1.bb
index 3597753..034c37b 100644
--- a/poky/meta/recipes-extended/logrotate/logrotate_3.20.1.bb
+++ b/poky/meta/recipes-extended/logrotate/logrotate_3.20.1.bb
@@ -10,10 +10,7 @@
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=b234ee4d69f5fce4486a80fdaf4a4263"
 
-UPSTREAM_CHECK_URI = "https://github.com/${BPN}/${BPN}/releases"
-UPSTREAM_CHECK_REGEX = "logrotate-(?P<pver>\d+(\.\d+)+).tar"
-
-SRC_URI = "https://github.com/${BPN}/${BPN}/releases/download/${PV}/${BP}.tar.xz"
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/${BP}.tar.xz"
 
 SRC_URI[sha256sum] = "742f6d6e18eceffa49a4bacd933686d3e42931cfccfb694d7f6369b704e5d094"
 
@@ -47,7 +44,7 @@
 # INSTALL=install and BASEDIR=/usr.
 OS_NAME = "Linux"
 
-inherit autotools systemd
+inherit autotools systemd github-releases
 
 SYSTEMD_SERVICE:${PN} = "\
     ${BPN}.service \
diff --git a/poky/meta/recipes-extended/ltp/ltp_20220527.bb b/poky/meta/recipes-extended/ltp/ltp_20220527.bb
index 00ff906..b657a6a 100644
--- a/poky/meta/recipes-extended/ltp/ltp_20220527.bb
+++ b/poky/meta/recipes-extended/ltp/ltp_20220527.bb
@@ -150,13 +150,11 @@
 	# sync with upstream
 	# https://github.com/linux-test-project/ltp/blob/master/ci/alpine.sh#L33
 	rm -rfv \
-		testcases/kernel/syscalls/confstr/confstr01.c \
 		testcases/kernel/syscalls/fmtmsg/fmtmsg01.c \
 		testcases/kernel/syscalls/getcontext/getcontext01.c \
 		testcases/kernel/syscalls/rt_tgsigqueueinfo/rt_tgsigqueueinfo01.c \
 		testcases/kernel/syscalls/timer_create/timer_create01.c \
-		testcases/kernel/syscalls/timer_create/timer_create03.c \
-		utils/benchmark/ebizzy-0.3
+		testcases/kernel/syscalls/timer_create/timer_create03.c
 }
 do_patch[postfuncs] += "remove_broken_musl_sources"
 
diff --git a/poky/meta/recipes-extended/pam/libpam_1.3.1.bb b/poky/meta/recipes-extended/pam/libpam_1.3.1.bb
index 9a7cfec..bede441 100644
--- a/poky/meta/recipes-extended/pam/libpam_1.3.1.bb
+++ b/poky/meta/recipes-extended/pam/libpam_1.3.1.bb
@@ -12,7 +12,7 @@
                     file://libpamc/License;md5=a4da476a14c093fdc73be3c3c9ba8fb3 \
                     "
 
-SRC_URI = "https://github.com/linux-pam/linux-pam/releases/download/v${PV}/Linux-PAM-${PV}.tar.xz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/Linux-PAM-${PV}.tar.xz \
            file://99_pam \
            file://pam.d/common-account \
            file://pam.d/common-auth \
@@ -42,7 +42,7 @@
 
 S = "${WORKDIR}/Linux-PAM-${PV}"
 
-inherit autotools gettext pkgconfig
+inherit autotools gettext pkgconfig systemd ptest github-releases
 
 PACKAGECONFIG ??= ""
 PACKAGECONFIG[audit] = "--enable-audit,--disable-audit,audit,"
@@ -191,6 +191,6 @@
 CONFFILES:${PN}-runtime += "${sysconfdir}/pam.d/common-account"
 CONFFILES:${PN}-runtime += "${sysconfdir}/security/limits.conf"
 
-UPSTREAM_CHECK_URI = "https://github.com/linux-pam/linux-pam/releases"
+GITHUB_BASE_URI = "https://github.com/linux-pam/linux-pam/releases"
 
 CVE_PRODUCT = "linux-pam"
diff --git a/poky/meta/recipes-example/rust-hello-world/rust-hello-world/0001-enable-LTO.patch b/poky/meta/recipes-extended/rust-example/rust-hello-world/0001-enable-LTO.patch
similarity index 100%
rename from poky/meta/recipes-example/rust-hello-world/rust-hello-world/0001-enable-LTO.patch
rename to poky/meta/recipes-extended/rust-example/rust-hello-world/0001-enable-LTO.patch
diff --git a/poky/meta/recipes-example/rust-hello-world/rust-hello-world_git.bb b/poky/meta/recipes-extended/rust-example/rust-hello-world_git.bb
similarity index 100%
rename from poky/meta/recipes-example/rust-hello-world/rust-hello-world_git.bb
rename to poky/meta/recipes-extended/rust-example/rust-hello-world_git.bb
diff --git a/poky/meta/recipes-extended/shadow/files/securetty b/poky/meta/recipes-extended/shadow/files/securetty
index 2be341a..820728f 100644
--- a/poky/meta/recipes-extended/shadow/files/securetty
+++ b/poky/meta/recipes-extended/shadow/files/securetty
@@ -7,6 +7,7 @@
 ttyS1
 ttyS2
 ttyS3
+ttyS4
 
 # ARM AMBA SoCs
 ttyAM0
diff --git a/poky/meta/recipes-extended/shadow/shadow.inc b/poky/meta/recipes-extended/shadow/shadow.inc
index 414bf46..a87e235 100644
--- a/poky/meta/recipes-extended/shadow/shadow.inc
+++ b/poky/meta/recipes-extended/shadow/shadow.inc
@@ -10,8 +10,8 @@
 
 DEPENDS = "virtual/crypt"
 
-UPSTREAM_CHECK_URI = "https://github.com/shadow-maint/shadow/releases"
-SRC_URI = "https://github.com/shadow-maint/shadow/releases/download/${PV}/${BP}.tar.gz \
+GITHUB_BASE_URI = "https://github.com/shadow-maint/shadow/releases"
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/${BP}.tar.gz \
            file://0001-shadow-use-relaxed-usernames.patch \
            ${@bb.utils.contains('PACKAGECONFIG', 'pam', '${PAM_SRC_URI}', '', d)} \
            file://useradd \
@@ -42,7 +42,7 @@
                file://pam.d/passwd \
                file://pam.d/su"
 
-inherit autotools gettext
+inherit autotools gettext github-releases
 
 export CONFIG_SHELL="/bin/sh"
 
diff --git a/poky/meta/recipes-extended/timezone/timezone.inc b/poky/meta/recipes-extended/timezone/timezone.inc
index 2b956cf..d3c78e9 100644
--- a/poky/meta/recipes-extended/timezone/timezone.inc
+++ b/poky/meta/recipes-extended/timezone/timezone.inc
@@ -6,7 +6,7 @@
 LICENSE = "PD & BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=c679c9d6b02bc2757b3eaf8f53c43fba"
 
-PV = "2022b"
+PV = "2022d"
 
 SRC_URI =" http://www.iana.org/time-zones/repository/releases/tzcode${PV}.tar.gz;name=tzcode \
            http://www.iana.org/time-zones/repository/releases/tzdata${PV}.tar.gz;name=tzdata \
@@ -14,6 +14,6 @@
 
 UPSTREAM_CHECK_URI = "http://www.iana.org/time-zones"
 
-SRC_URI[tzcode.sha256sum] = "bab20d943e59a3218435f48d868a4e552f18d6d7f3dd128660c5660c80b8a05f"
-SRC_URI[tzdata.sha256sum] = "f590eaf04a395245426c2be4fae71c143aea5cebc11088b7a0a5704461df397d"
+SRC_URI[tzcode.sha256sum] = "d644ba0f938899374ea8cb554e35fb4afa0f7bd7b716c61777cd00500b8759e0"
+SRC_URI[tzdata.sha256sum] = "6ecdbee27fa43dcfa49f3d4fd8bb1dfef54c90da1abcd82c9abcf2dc4f321de0"
 
diff --git a/poky/meta/recipes-extended/watchdog/watchdog_5.16.bb b/poky/meta/recipes-extended/watchdog/watchdog_5.16.bb
index 26fcc10..6031dca 100644
--- a/poky/meta/recipes-extended/watchdog/watchdog_5.16.bb
+++ b/poky/meta/recipes-extended/watchdog/watchdog_5.16.bb
@@ -28,10 +28,6 @@
 
 inherit autotools update-rc.d systemd pkgconfig
 
-DEPENDS += "libtirpc"
-CFLAGS += "-I${STAGING_INCDIR}/tirpc"
-LDFLAGS += "-ltirpc"
-
 EXTRA_OECONF += " --disable-nfs "
 CACHED_CONFIGUREVARS += "ac_cv_path_PATH_SENDMAIL=${sbindir}/sendmail"
 
diff --git a/poky/meta/recipes-gnome/gobject-introspection/gobject-introspection_1.72.0.bb b/poky/meta/recipes-gnome/gobject-introspection/gobject-introspection_1.72.0.bb
index 9a47e90..d935cbd 100644
--- a/poky/meta/recipes-gnome/gobject-introspection/gobject-introspection_1.72.0.bb
+++ b/poky/meta/recipes-gnome/gobject-introspection/gobject-introspection_1.72.0.bb
@@ -103,7 +103,7 @@
         # for a different architecture
         cat > ${B}/g-ir-scanner-lddwrapper << EOF
 #!/bin/sh
-$OBJDUMP -p "\$@"
+\$OBJDUMP -p "\$@"
 EOF
         chmod +x ${B}/g-ir-scanner-lddwrapper
 
diff --git a/poky/meta/recipes-graphics/cairo/cairo_1.16.0.bb b/poky/meta/recipes-graphics/cairo/cairo_1.16.0.bb
index 67081bb..ffb813d 100644
--- a/poky/meta/recipes-graphics/cairo/cairo_1.16.0.bb
+++ b/poky/meta/recipes-graphics/cairo/cairo_1.16.0.bb
@@ -17,9 +17,13 @@
 LICENSE:${PN}-gobject = "MPL-1.1 | LGPL-2.1-only"
 LICENSE:${PN}-script-interpreter = "MPL-1.1 | LGPL-2.1-only"
 LICENSE:${PN}-perf-utils = "GPL-3.0-or-later"
+# Adapt the licenses for cairo-dbg and cairo-src depending on whether
+# cairo-trace is being built.
+LICENSE:${PN}-dbg = "(MPL-1.1 | LGPL-2.1-only)${@bb.utils.contains('PACKAGECONFIG', 'trace', ' & GPL-3.0-or-later', '', d)}"
+LICENSE:${PN}-src = "(MPL-1.1 | LGPL-2.1-only)${@bb.utils.contains('PACKAGECONFIG', 'trace', ' & GPL-3.0-or-later', '', d)}"
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=e73e999e0c72b5ac9012424fa157ad77 \
-                    file://util/cairo-trace/COPYING-GPL-3;md5=d32239bcb673463ab874e80d47fae504"
+                    ${@bb.utils.contains('PACKAGECONFIG', 'trace', 'file://util/cairo-trace/COPYING-GPL-3;md5=d32239bcb673463ab874e80d47fae504', '', d)}"
 
 
 DEPENDS = "fontconfig glib-2.0 libpng pixman zlib"
diff --git a/poky/meta/recipes-graphics/drm/libdrm_2.4.112.bb b/poky/meta/recipes-graphics/drm/libdrm_2.4.113.bb
similarity index 62%
rename from poky/meta/recipes-graphics/drm/libdrm_2.4.112.bb
rename to poky/meta/recipes-graphics/drm/libdrm_2.4.113.bb
index 4883824..959ef68 100644
--- a/poky/meta/recipes-graphics/drm/libdrm_2.4.112.bb
+++ b/poky/meta/recipes-graphics/drm/libdrm_2.4.113.bb
@@ -13,28 +13,29 @@
 SRC_URI = "http://dri.freedesktop.org/libdrm/${BP}.tar.xz \
           "
 
-SRC_URI[sha256sum] = "00b07710bd09b35cd8d80eaf4f4497fe27f4becf467a9830f1f5e8324f8420ff"
+SRC_URI[sha256sum] = "7fd7eb2967f63beb4606f22d50e277d993480d05ef75dd88a9bd8e677323e5e1"
 
 inherit meson pkgconfig manpages
 
-PACKAGECONFIG ??= "intel radeon amdgpu nouveau vmwgfx omap freedreno vc4 etnaviv install-test-programs"
-PACKAGECONFIG[intel] = "-Dintel=true,-Dintel=false,libpciaccess"
-PACKAGECONFIG[radeon] = "-Dradeon=true,-Dradeon=false"
-PACKAGECONFIG[amdgpu] = "-Damdgpu=true,-Damdgpu=false"
-PACKAGECONFIG[nouveau] = "-Dnouveau=true,-Dnouveau=false"
-PACKAGECONFIG[vmwgfx] = "-Dvmwgfx=true,-Dvmwgfx=false"
-PACKAGECONFIG[omap] = "-Domap=true,-Domap=false"
-PACKAGECONFIG[exynos] = "-Dexynos=true,-Dexynos=false"
-PACKAGECONFIG[freedreno] = "-Dfreedreno=true,-Dfreedreno=false"
-PACKAGECONFIG[tegra] = "-Dtegra=true,-Dtegra=false"
-PACKAGECONFIG[vc4] = "-Dvc4=true,-Dvc4=false"
-PACKAGECONFIG[etnaviv] = "-Detnaviv=true,-Detnaviv=false"
+PACKAGECONFIG ??= "intel radeon amdgpu nouveau vmwgfx omap freedreno vc4 etnaviv tests install-test-programs"
+PACKAGECONFIG[intel] = "-Dintel=enabled,-Dintel=disabled,libpciaccess"
+PACKAGECONFIG[radeon] = "-Dradeon=enabled,-Dradeon=disabled"
+PACKAGECONFIG[amdgpu] = "-Damdgpu=enabled,-Damdgpu=disabled"
+PACKAGECONFIG[nouveau] = "-Dnouveau=enabled,-Dnouveau=disabled"
+PACKAGECONFIG[vmwgfx] = "-Dvmwgfx=enabled,-Dvmwgfx=disabled"
+PACKAGECONFIG[omap] = "-Domap=enabled,-Domap=disabled"
+PACKAGECONFIG[exynos] = "-Dexynos=enabled,-Dexynos=disabled"
+PACKAGECONFIG[freedreno] = "-Dfreedreno=enabled,-Dfreedreno=disabled"
+PACKAGECONFIG[tegra] = "-Dtegra=enabled,-Dtegra=disabled"
+PACKAGECONFIG[vc4] = "-Dvc4=enabled,-Dvc4=disabled"
+PACKAGECONFIG[etnaviv] = "-Detnaviv=enabled,-Detnaviv=disabled"
 PACKAGECONFIG[freedreno-kgsl] = "-Dfreedreno-kgsl=true,-Dfreedreno-kgsl=false"
-PACKAGECONFIG[valgrind] = "-Dvalgrind=true,-Dvalgrind=false,valgrind"
+PACKAGECONFIG[valgrind] = "-Dvalgrind=enabled,-Dvalgrind=disabled,valgrind"
 PACKAGECONFIG[install-test-programs] = "-Dinstall-test-programs=true,-Dinstall-test-programs=false"
-PACKAGECONFIG[cairo-tests] = "-Dcairo-tests=true,-Dcairo-tests=false"
+PACKAGECONFIG[cairo-tests] = "-Dcairo-tests=enabled,-Dcairo-tests=disabled"
+PACKAGECONFIG[tests] = "-Dtests=true,-Dtests=false"
 PACKAGECONFIG[udev] = "-Dudev=true,-Dudev=false,udev"
-PACKAGECONFIG[manpages] = "-Dman-pages=true,-Dman-pages=false,libxslt-native xmlto-native python3-docutils-native"
+PACKAGECONFIG[manpages] = "-Dman-pages=enabled,-Dman-pages=disabled,libxslt-native xmlto-native python3-docutils-native"
 
 ALLOW_EMPTY:${PN}-drivers = "1"
 PACKAGES =+ "${PN}-tests ${PN}-drivers ${PN}-radeon ${PN}-nouveau ${PN}-omap \
diff --git a/poky/meta/recipes-graphics/harfbuzz/harfbuzz_5.1.0.bb b/poky/meta/recipes-graphics/harfbuzz/harfbuzz_5.1.0.bb
index 4c2d774..4905e8e 100644
--- a/poky/meta/recipes-graphics/harfbuzz/harfbuzz_5.1.0.bb
+++ b/poky/meta/recipes-graphics/harfbuzz/harfbuzz_5.1.0.bb
@@ -8,15 +8,12 @@
                     file://src/hb-ucd.cc;beginline=1;endline=15;md5=29d4dcb6410429195df67efe3382d8bc \
                     "
 
-UPSTREAM_CHECK_URI = "https://github.com/${BPN}/${BPN}/releases"
-UPSTREAM_CHECK_REGEX = "harfbuzz-(?P<pver>\d+(\.\d+)+).tar"
-
-SRC_URI = "https://github.com/${BPN}/${BPN}/releases/download/${PV}/${BPN}-${PV}.tar.xz \
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/${BPN}-${PV}.tar.xz \
            file://0001-fix-signedness-of-char-in-tests.patch \
            "
 SRC_URI[sha256sum] = "2edb95db668781aaa8d60959d21be2ff80085f31b12053cdd660d9a50ce84f05"
 
-inherit meson pkgconfig lib_package gtk-doc gobject-introspection
+inherit meson pkgconfig lib_package gtk-doc gobject-introspection github-releases
 
 GIR_MESON_ENABLE_FLAG = 'enabled'
 GIR_MESON_DISABLE_FLAG = 'disabled'
diff --git a/poky/meta/recipes-graphics/kmscube/kmscube/0001-drm-common.c-do-not-use-invalid-modifier.patch b/poky/meta/recipes-graphics/kmscube/kmscube/0001-drm-common.c-do-not-use-invalid-modifier.patch
deleted file mode 100644
index 58ff3ba..0000000
--- a/poky/meta/recipes-graphics/kmscube/kmscube/0001-drm-common.c-do-not-use-invalid-modifier.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From bdde833c254092a47df6c7109a9751653c82aaae Mon Sep 17 00:00:00 2001
-From: Alexander Kanavin <alex@linutronix.de>
-Date: Mon, 8 Aug 2022 20:22:39 +0200
-Subject: [PATCH] drm-common.c: do not use invalid modifier
-
-Prior to kernel 5.19 this was a soft failure, but 5.19
-adds checks that result in a hard syscall fail.
-
-Upstream-Status: Submitted [https://gitlab.freedesktop.org/mesa/kmscube/-/merge_requests/33]
-Signed-off-by: Alexander Kanavin <alex@linutronix.de>
----
- drm-common.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/drm-common.c b/drm-common.c
-index 5c9cca2..964e1c3 100644
---- a/drm-common.c
-+++ b/drm-common.c
-@@ -92,7 +92,7 @@ struct drm_fb * drm_fb_get_from_bo(struct gbm_bo *bo)
- 			modifiers[i] = modifiers[0];
- 		}
- 
--		if (modifiers[0]) {
-+		if (modifiers[0] && modifiers[0] != DRM_FORMAT_MOD_INVALID) {
- 			flags = DRM_MODE_FB_MODIFIERS;
- 			printf("Using modifier %" PRIx64 "\n", modifiers[0]);
- 		}
diff --git a/poky/meta/recipes-graphics/kmscube/kmscube/0001-texturator-Use-correct-GL-extension-header.patch b/poky/meta/recipes-graphics/kmscube/kmscube/0001-texturator-Use-correct-GL-extension-header.patch
deleted file mode 100644
index 5965782..0000000
--- a/poky/meta/recipes-graphics/kmscube/kmscube/0001-texturator-Use-correct-GL-extension-header.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From 2b74e0e32235f6ab2e3e42d53dea985a7ba6227f Mon Sep 17 00:00:00 2001
-From: Damian Hobson-Garcia <dhobsong@igel.co.jp>
-Date: Wed, 16 Dec 2020 11:08:25 +0900
-Subject: [PATCH] texturator: Use correct GL extension header
-
-gl2ext.h is the extenstion header for OpenGL ES 2.0 and all later
-versions according to the Khronos documentation [1].  gl3ext.h is either
-an empty stub, or may not even exist on some platforms.
-
-[1]: https://www.khronos.org/registry/OpenGL/index_es.php#headers
-
-Upstream-Status: Submitted [https://gitlab.freedesktop.org/mesa/kmscube/-/merge_requests/26]
-Signed-off-by: Quentin Schulz <quentin.schulz@theobroma-systems.com>
----
- texturator.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/texturator.c b/texturator.c
-index d9335d7..6d97856 100644
---- a/texturator.c
-+++ b/texturator.c
-@@ -30,7 +30,7 @@
- #include <math.h>
- 
- #include <GLES3/gl3.h>
--#include <GLES3/gl3ext.h>
-+#include <GLES2/gl2ext.h>
- 
- #ifdef HAVE_LIBPNG
- #include <png.h>
--- 
-2.33.1
-
diff --git a/poky/meta/recipes-graphics/kmscube/kmscube_git.bb b/poky/meta/recipes-graphics/kmscube/kmscube_git.bb
index f7ee6e4..6ed97df 100644
--- a/poky/meta/recipes-graphics/kmscube/kmscube_git.bb
+++ b/poky/meta/recipes-graphics/kmscube/kmscube_git.bb
@@ -10,11 +10,8 @@
 
 LIC_FILES_CHKSUM = "file://kmscube.c;beginline=1;endline=23;md5=8b309d4ee67b7315ff7381270dd631fb"
 
-SRCREV = "9f63f359fab1b5d8e862508e4e51c9dfe339ccb0"
-SRC_URI = "git://gitlab.freedesktop.org/mesa/kmscube;branch=master;protocol=https \
-           file://0001-texturator-Use-correct-GL-extension-header.patch \
-           file://0001-drm-common.c-do-not-use-invalid-modifier.patch \
-           "
+SRCREV = "3bf6ee1a02334386d87cfe356c3bfb0b24e1fed8"
+SRC_URI = "git://gitlab.freedesktop.org/mesa/kmscube;branch=master;protocol=https"
 UPSTREAM_CHECK_COMMITS = "1"
 
 S = "${WORKDIR}/git"
diff --git a/poky/meta/recipes-graphics/libepoxy/libepoxy_1.5.9.bb b/poky/meta/recipes-graphics/libepoxy/libepoxy_1.5.9.bb
index 487fc00..1210f73 100644
--- a/poky/meta/recipes-graphics/libepoxy/libepoxy_1.5.9.bb
+++ b/poky/meta/recipes-graphics/libepoxy/libepoxy_1.5.9.bb
@@ -9,13 +9,13 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://COPYING;md5=58ef4c80d401e07bd9ee8b6b58cf464b"
 
-SRC_URI = "https://github.com/anholt/${BPN}/releases/download/${PV}/${BP}.tar.xz \
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/${BP}.tar.xz \
            file://0001-dispatch_common.h-define-also-EGL_NO_X11.patch \
            "
 SRC_URI[sha256sum] = "d168a19a6edfdd9977fef1308ccf516079856a4275cf876de688fb7927e365e4"
-UPSTREAM_CHECK_URI = "https://github.com/anholt/libepoxy/releases"
+GITHUB_BASE_URI = "https://github.com/anholt/libepoxy/releases"
 
-inherit meson pkgconfig features_check
+inherit meson pkgconfig features_check github-releases
 
 REQUIRED_DISTRO_FEATURES = "opengl"
 
diff --git a/poky/meta/recipes-graphics/libsdl2/libsdl2/0001-video-restore-ability-to-disable-fb-accel-via-hint.patch b/poky/meta/recipes-graphics/libsdl2/libsdl2/0001-video-restore-ability-to-disable-fb-accel-via-hint.patch
deleted file mode 100644
index fc74d30..0000000
--- a/poky/meta/recipes-graphics/libsdl2/libsdl2/0001-video-restore-ability-to-disable-fb-accel-via-hint.patch
+++ /dev/null
@@ -1,36 +0,0 @@
-From 14ad91658fd296e34bb9e833281e72c871bfb189 Mon Sep 17 00:00:00 2001
-From: Alexander Kanavin <alex@linutronix.de>
-Date: Tue, 3 May 2022 12:31:50 +0200
-Subject: [PATCH] video: restore ability to disable fb accel via hint
-
-Somewhere in code refactoring between .20 and .22 this check
-was lost, and so the hint had no effect anymore.
-
-Upstream-Status: Submitted [https://github.com/libsdl-org/SDL/pull/5611]
-Signed-off-by: Alexander Kanavin <alex@linutronix.de>
----
- src/video/SDL_video.c | 8 ++++++++
- 1 file changed, 8 insertions(+)
-
-diff --git a/src/video/SDL_video.c b/src/video/SDL_video.c
-index 93c803e..cbe7616 100644
---- a/src/video/SDL_video.c
-+++ b/src/video/SDL_video.c
-@@ -2503,6 +2503,14 @@ SDL_CreateWindowFramebuffer(SDL_Window * window)
-     if (!_this->checked_texture_framebuffer) {
-         SDL_bool attempt_texture_framebuffer = SDL_TRUE;
- 
-+        /* See if the user or application wants to specifically disable the framebuffer */
-+        const char *hint = SDL_GetHint(SDL_HINT_FRAMEBUFFER_ACCELERATION);
-+        if (hint) {
-+            if (*hint == '0' || SDL_strcasecmp(hint, "false") == 0) {
-+                attempt_texture_framebuffer = SDL_FALSE;
-+            }
-+        }
-+
-         if (_this->is_dummy) {  /* dummy driver never has GPU support, of course. */
-             attempt_texture_framebuffer = SDL_FALSE;
-         }
--- 
-2.30.2
-
diff --git a/poky/meta/recipes-graphics/libsdl2/libsdl2_2.0.22.bb b/poky/meta/recipes-graphics/libsdl2/libsdl2_2.24.0.bb
similarity index 92%
rename from poky/meta/recipes-graphics/libsdl2/libsdl2_2.0.22.bb
rename to poky/meta/recipes-graphics/libsdl2/libsdl2_2.24.0.bb
index ff3e162..d5cbf73 100644
--- a/poky/meta/recipes-graphics/libsdl2/libsdl2_2.0.22.bb
+++ b/poky/meta/recipes-graphics/libsdl2/libsdl2_2.24.0.bb
@@ -22,13 +22,12 @@
 PROVIDES = "virtual/libsdl2"
 
 SRC_URI = "http://www.libsdl.org/release/SDL2-${PV}.tar.gz \
-           file://0001-video-restore-ability-to-disable-fb-accel-via-hint.patch \
            "
 SRC_URI:append:class-native = " file://0001-Disable-libunwind-in-native-OE-builds-by-not-looking.patch"
 
 S = "${WORKDIR}/SDL2-${PV}"
 
-SRC_URI[sha256sum] = "fe7cbf3127882e3fc7259a75a0cb585620272c51745d3852ab9dd87960697f2e"
+SRC_URI[sha256sum] = "91e4c34b1768f92d399b078e171448c6af18cafda743987ed2064a28954d6d97"
 
 inherit cmake lib_package binconfig-disabled pkgconfig upstream-version-is-even
 
@@ -43,9 +42,7 @@
                  -DSDL_PTHREADS=ON \
                  -DSDL_RPATH=OFF \
                  -DSDL_SNDIO=OFF \
-                 -DSDL_X11_XVM=OFF \
                  -DSDL_X11_XCURSOR=OFF \
-                 -DSDL_X11_XINERAMA=OFF \
                  -DSDL_X11_XDBE=OFF \
                  -DSDL_X11_XFIXES=OFF \
                  -DSDL_X11_XINPUT=OFF \
@@ -62,7 +59,7 @@
 PACKAGECONFIG:class-nativesdk = "${@bb.utils.filter('DISTRO_FEATURES', 'x11', d)} ${PACKAGECONFIG_GL}"
 PACKAGECONFIG ??= " \
     ${PACKAGECONFIG_GL} \
-    ${@bb.utils.filter('DISTRO_FEATURES', 'alsa directfb pulseaudio x11', d)} \
+    ${@bb.utils.filter('DISTRO_FEATURES', 'alsa directfb pulseaudio x11 vulkan', d)} \
     ${@bb.utils.contains('DISTRO_FEATURES', 'wayland', 'wayland gles2', '', d)} \
     ${@bb.utils.contains("TUNE_FEATURES", "neon","arm-neon","",d)} \
 "
@@ -77,9 +74,12 @@
 PACKAGECONFIG[libusb] = ",,libusb1"
 PACKAGECONFIG[opengl]     = "-DSDL_OPENGL=ON,-DSDL_OPENGL=OFF,virtual/egl"
 PACKAGECONFIG[pulseaudio] = "-DSDL_PULSEAUDIO=ON,-DSDL_PULSEAUDIO=OFF,pulseaudio"
+PACKAGECONFIG[vulkan]    = "-DSDL_VULKAN=ON,-DSDL_VULKAN=OFF"
 PACKAGECONFIG[wayland]    = "-DSDL_WAYLAND=ON,-DSDL_WAYLAND=OFF,wayland-native wayland wayland-protocols libxkbcommon"
 PACKAGECONFIG[x11]        = "-DSDL_X11=ON,-DSDL_X11=OFF,virtual/libx11 libxext libxrandr libxrender"
 
 CFLAGS:append:class-native = " -DNO_SHARED_MEMORY"
 
+FILES:${PN} += "${datadir}/licenses/SDL2/LICENSE.txt"
+
 BBCLASSEXTEND = "native nativesdk"
diff --git a/poky/meta/recipes-graphics/libva/libva.inc b/poky/meta/recipes-graphics/libva/libva.inc
index 956cd50..735f2fc 100644
--- a/poky/meta/recipes-graphics/libva/libva.inc
+++ b/poky/meta/recipes-graphics/libva/libva.inc
@@ -16,17 +16,17 @@
 SECTION = "x11"
 LICENSE = "MIT"
 
-SRC_URI = "https://github.com/intel/libva/releases/download/${PV}/libva-${PV}.tar.bz2"
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/libva-${PV}.tar.bz2"
 LIC_FILES_CHKSUM = "file://COPYING;md5=2e48940f94acb0af582e5ef03537800f"
 SRC_URI[sha256sum] = "258ed409458d3e7a000e1908bc852619615ee6a933359c745fcd93eb3e461eca"
 
 S = "${WORKDIR}/libva-${PV}"
 
-UPSTREAM_CHECK_URI = "https://github.com/intel/libva/releases"
+GITHUB_BASE_URI = "https://github.com/intel/libva/releases"
 
 DEPENDS = "libdrm"
 
-inherit meson pkgconfig
+inherit meson pkgconfig github-releases
 
 PACKAGECONFIG[x11] = "-Dwith_x11=yes,-Dwith_x11=no,virtual/libx11 libxext libxfixes"
 PACKAGECONFIG[glx] = "-Dwith_glx=yes,-Dwith_glx=no,virtual/mesa"
diff --git a/poky/meta/recipes-graphics/mesa/files/0001-Revert-egl-wayland-deprecate-drm_handle_format-and-d.patch b/poky/meta/recipes-graphics/mesa/files/0001-Revert-egl-wayland-deprecate-drm_handle_format-and-d.patch
deleted file mode 100644
index dac2de4..0000000
--- a/poky/meta/recipes-graphics/mesa/files/0001-Revert-egl-wayland-deprecate-drm_handle_format-and-d.patch
+++ /dev/null
@@ -1,158 +0,0 @@
-From 7796c2c56c960ac55e49246f0349ac52539ada55 Mon Sep 17 00:00:00 2001
-From: Leandro Ribeiro <leandro.ribeiro@collabora.com>
-Date: Sun, 10 Apr 2022 22:54:36 -0300
-Subject: [PATCH] Revert "egl/wayland: deprecate drm_handle_format() and
- drm_handle_capabilities()"
-
-Commit af1ee8e010441f8f2ed8c77065b159652a4ac9fe dropped support to
-wl_drm, as we thought that most compositors from active projects were
-already supporting zwp_linux_dmabuf_v1.
-
-But that's not true, so revert this commit in order to give these
-projects a longer transition period.
-
-Note that we didn't add back the support to GEM name API, and that was
-on purpose.
-
-Signed-off-by: Leandro Ribeiro <leandro.ribeiro@collabora.com>
-Reviewed-by: Simon Ser <contact@emersion.fr>
-Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15822>
-
-Signed-off-by: Martin Jansa <Martin.Jansa@gmail.com>
-Upstream-Status: Backport [https://gitlab.freedesktop.org/mesa/mesa/-/commit/c60fea8c228ae3f32e20d6b65c473d9f04871d20]
----
- src/egl/drivers/dri2/egl_dri2.h         |  1 +
- src/egl/drivers/dri2/platform_wayland.c | 59 +++++++++++++++++++------
- 2 files changed, 47 insertions(+), 13 deletions(-)
-
-diff --git a/src/egl/drivers/dri2/egl_dri2.h b/src/egl/drivers/dri2/egl_dri2.h
-index c466ff83c53..eecb32a53fd 100644
---- a/src/egl/drivers/dri2/egl_dri2.h
-+++ b/src/egl/drivers/dri2/egl_dri2.h
-@@ -283,6 +283,7 @@ struct dri2_egl_display
-    struct zwp_linux_dmabuf_feedback_v1 *wl_dmabuf_feedback;
-    struct dmabuf_feedback_format_table format_table;
-    bool authenticated;
-+   uint32_t capabilities;
-    char *device_name;
- #endif
- 
-diff --git a/src/egl/drivers/dri2/platform_wayland.c b/src/egl/drivers/dri2/platform_wayland.c
-index 5ff83cce08a..843434376a7 100644
---- a/src/egl/drivers/dri2/platform_wayland.c
-+++ b/src/egl/drivers/dri2/platform_wayland.c
-@@ -1343,7 +1343,7 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy,
-                  struct dri2_egl_surface *dri2_surf,
-                  __DRIimage *image)
- {
--   struct wl_buffer *ret;
-+   struct wl_buffer *ret = NULL;
-    EGLBoolean query;
-    int width, height, fourcc, num_planes;
-    uint64_t modifier = DRM_FORMAT_MOD_INVALID;
-@@ -1447,11 +1447,28 @@ create_wl_buffer(struct dri2_egl_display *dri2_dpy,
-       ret = zwp_linux_buffer_params_v1_create_immed(params, width, height,
-                                                     fourcc, 0);
-       zwp_linux_buffer_params_v1_destroy(params);
-+   } else {
-+      struct wl_drm *wl_drm =
-+         dri2_surf ? dri2_surf->wl_drm_wrapper : dri2_dpy->wl_drm;
-+      int fd = -1, stride;
-+
-+      if (num_planes > 1)
-+         return NULL;
-+
-+      query = dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_FD, &fd);
-+      query &= dri2_dpy->image->queryImage(image, __DRI_IMAGE_ATTRIB_STRIDE, &stride);
-+      if (!query) {
-+         if (fd >= 0)
-+            close(fd);
-+         return NULL;
-+      }
- 
--      return ret;
-+      ret = wl_drm_create_prime_buffer(wl_drm, fd, width, height, fourcc, 0,
-+                                       stride, 0, 0, 0, 0);
-+      close(fd);
-    }
- 
--   return NULL;
-+   return ret;
- }
- 
- static EGLBoolean
-@@ -1698,16 +1715,21 @@ drm_handle_device(void *data, struct wl_drm *drm, const char *device)
- static void
- drm_handle_format(void *data, struct wl_drm *drm, uint32_t format)
- {
--   /* deprecated, as compositors already support the dma-buf protocol extension
--    * and so we can rely on dmabuf_handle_modifier() to receive formats and
--    * modifiers */
-+   struct dri2_egl_display *dri2_dpy = data;
-+   int visual_idx = dri2_wl_visual_idx_from_fourcc(format);
-+
-+   if (visual_idx == -1)
-+      return;
-+
-+   BITSET_SET(dri2_dpy->formats.formats_bitmap, visual_idx);
- }
- 
- static void
- drm_handle_capabilities(void *data, struct wl_drm *drm, uint32_t value)
- {
--   /* deprecated, as compositors already support the dma-buf protocol extension
--    * and so we can rely on it to create wl_buffer's */
-+   struct dri2_egl_display *dri2_dpy = data;
-+
-+   dri2_dpy->capabilities = value;
- }
- 
- static void
-@@ -2075,13 +2097,12 @@ dri2_initialize_wayland_drm(_EGLDisplay *disp)
-    wl_registry_add_listener(dri2_dpy->wl_registry,
-                             &registry_listener_drm, dri2_dpy);
- 
--   /* The compositor must expose the dma-buf interface. */
--   if (roundtrip(dri2_dpy) < 0 || dri2_dpy->wl_dmabuf == NULL)
-+   if (roundtrip(dri2_dpy) < 0)
-       goto cleanup;
- 
-    /* Get default dma-buf feedback */
--   if (zwp_linux_dmabuf_v1_get_version(dri2_dpy->wl_dmabuf) >=
--       ZWP_LINUX_DMABUF_V1_GET_DEFAULT_FEEDBACK_SINCE_VERSION) {
-+   if (dri2_dpy->wl_dmabuf && zwp_linux_dmabuf_v1_get_version(dri2_dpy->wl_dmabuf) >=
-+                              ZWP_LINUX_DMABUF_V1_GET_DEFAULT_FEEDBACK_SINCE_VERSION) {
-       dmabuf_feedback_format_table_init(&dri2_dpy->format_table);
-       dri2_dpy->wl_dmabuf_feedback =
-          zwp_linux_dmabuf_v1_get_default_feedback(dri2_dpy->wl_dmabuf);
-@@ -2089,7 +2110,6 @@ dri2_initialize_wayland_drm(_EGLDisplay *disp)
-                                                 &dmabuf_feedback_listener, dri2_dpy);
-    }
- 
--   /* Receive events from the interfaces */
-    if (roundtrip(dri2_dpy) < 0)
-       goto cleanup;
- 
-@@ -2176,6 +2196,19 @@ dri2_initialize_wayland_drm(_EGLDisplay *disp)
- 
-    dri2_wl_setup_swap_interval(disp);
- 
-+   if (dri2_dpy->wl_drm) {
-+      /* To use Prime, we must have _DRI_IMAGE v7 at least. createImageFromFds
-+       * support indicates that Prime export/import is supported by the driver.
-+       * We deprecated the support to GEM names API, so we bail out if the
-+       * driver does not suport Prime. */
-+      if (!(dri2_dpy->capabilities & WL_DRM_CAPABILITY_PRIME) ||
-+          (dri2_dpy->image->base.version < 7) ||
-+          (dri2_dpy->image->createImageFromFds == NULL)) {
-+         _eglLog(_EGL_WARNING, "wayland-egl: display does not support prime");
-+         goto cleanup;
-+      }
-+   }
-+
-    if (dri2_dpy->is_different_gpu &&
-        (dri2_dpy->image->base.version < 9 ||
-         dri2_dpy->image->blitImage == NULL)) {
--- 
-2.35.1
-
diff --git a/poky/meta/recipes-graphics/mesa/files/0001-futex.h-Define-__NR_futex-if-it-does-not-exist.patch b/poky/meta/recipes-graphics/mesa/files/0001-futex.h-Define-__NR_futex-if-it-does-not-exist.patch
index 3b0bfa3..e7d9219 100644
--- a/poky/meta/recipes-graphics/mesa/files/0001-futex.h-Define-__NR_futex-if-it-does-not-exist.patch
+++ b/poky/meta/recipes-graphics/mesa/files/0001-futex.h-Define-__NR_futex-if-it-does-not-exist.patch
@@ -1,4 +1,4 @@
-From 253b042d2bf10e9abfa9cc508e0782aefd834145 Mon Sep 17 00:00:00 2001
+From 859bfc342d1db9b61c43f30d9aa27cea35ca7599 Mon Sep 17 00:00:00 2001
 From: Khem Raj <raj.khem@gmail.com>
 Date: Fri, 16 Oct 2020 11:03:47 -0700
 Subject: [PATCH] futex.h: Define __NR_futex if it does not exist
@@ -18,12 +18,12 @@
  1 file changed, 4 insertions(+)
 
 diff --git a/src/util/futex.h b/src/util/futex.h
-index 43097f4..941b0ec 100644
+index 2a2a00c..92fc123 100644
 --- a/src/util/futex.h
 +++ b/src/util/futex.h
-@@ -34,6 +34,10 @@
- #include <sys/syscall.h>
- #include <sys/time.h>
+@@ -38,6 +38,10 @@
+ #define SYS_futex SYS_futex_time64
+ #endif
  
 +#if !defined(SYS_futex) && defined(SYS_futex_time64)
 +# define SYS_futex SYS_futex_time64
diff --git a/poky/meta/recipes-graphics/mesa/files/0001-meson-misdetects-64bit-atomics-on-mips-clang.patch b/poky/meta/recipes-graphics/mesa/files/0001-meson-misdetects-64bit-atomics-on-mips-clang.patch
index b08e4d8..9cd9bee 100644
--- a/poky/meta/recipes-graphics/mesa/files/0001-meson-misdetects-64bit-atomics-on-mips-clang.patch
+++ b/poky/meta/recipes-graphics/mesa/files/0001-meson-misdetects-64bit-atomics-on-mips-clang.patch
@@ -1,4 +1,4 @@
-From d34bdbd80e5a1f309d2ba280cdc66ff0ee0e5c43 Mon Sep 17 00:00:00 2001
+From 3ef37c63f03ad6f2af407de350486fdd25e9132a Mon Sep 17 00:00:00 2001
 From: Khem Raj <raj.khem@gmail.com>
 Date: Mon, 13 Jan 2020 15:23:47 -0800
 Subject: [PATCH] meson misdetects 64bit atomics on mips/clang
diff --git a/poky/meta/recipes-graphics/mesa/files/0001-meson.build-check-for-all-linux-host_os-combinations.patch b/poky/meta/recipes-graphics/mesa/files/0001-meson.build-check-for-all-linux-host_os-combinations.patch
index aea23d0..ec263ce 100644
--- a/poky/meta/recipes-graphics/mesa/files/0001-meson.build-check-for-all-linux-host_os-combinations.patch
+++ b/poky/meta/recipes-graphics/mesa/files/0001-meson.build-check-for-all-linux-host_os-combinations.patch
@@ -1,4 +1,4 @@
-From f9c597a2c517eb85c23cbeeb2e95c55794c74cda Mon Sep 17 00:00:00 2001
+From d092a9000da62dfccca5b58fca56b94eb9989c48 Mon Sep 17 00:00:00 2001
 From: Alistair Francis <alistair@alistair23.me>
 Date: Thu, 14 Nov 2019 13:04:49 -0800
 Subject: [PATCH] meson.build: check for all linux host_os combinations
@@ -20,10 +20,10 @@
  1 file changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/meson.build b/meson.build
-index bca6b1f..70d06c0 100644
+index 250b528..b5c98f8 100644
 --- a/meson.build
 +++ b/meson.build
-@@ -172,7 +172,7 @@ with_any_opengl = with_opengl or with_gles1 or with_gles2
+@@ -173,7 +173,7 @@ with_any_opengl = with_opengl or with_gles1 or with_gles2
  # Only build shared_glapi if at least one OpenGL API is enabled
  with_shared_glapi = with_shared_glapi and with_any_opengl
  
@@ -32,11 +32,11 @@
  
  dri_drivers = get_option('dri-drivers')
  if dri_drivers.length() != 0
-@@ -1074,7 +1074,7 @@ if cc.compiles('__uint128_t foo(void) { return 0; }',
+@@ -1091,7 +1091,7 @@ if cc.has_function('reallocarray')
  endif
  
  # TODO: this is very incomplete
--if ['linux', 'cygwin', 'gnu', 'freebsd', 'gnu/kfreebsd', 'haiku'].contains(host_machine.system())
+-if ['linux', 'cygwin', 'gnu', 'freebsd', 'gnu/kfreebsd', 'haiku', 'android'].contains(host_machine.system())
 +if ['linux', 'cygwin', 'gnu', 'freebsd', 'gnu/kfreebsd', 'haiku'].contains(host_machine.system()) or host_machine.system().startswith('linux')
    pre_args += '-D_GNU_SOURCE'
  elif host_machine.system() == 'sunos'
diff --git a/poky/meta/recipes-graphics/mesa/files/0001-nir-nir_opt_move-fix-ALWAYS_INLINE-compiler-error.patch b/poky/meta/recipes-graphics/mesa/files/0001-nir-nir_opt_move-fix-ALWAYS_INLINE-compiler-error.patch
index 48fc1e3..7989843 100644
--- a/poky/meta/recipes-graphics/mesa/files/0001-nir-nir_opt_move-fix-ALWAYS_INLINE-compiler-error.patch
+++ b/poky/meta/recipes-graphics/mesa/files/0001-nir-nir_opt_move-fix-ALWAYS_INLINE-compiler-error.patch
@@ -1,3 +1,11 @@
+From da6e47f1717f34c73de388c56ffaf4861a07fdc5 Mon Sep 17 00:00:00 2001
+From: t bettler <thomas.bettler@gmail.com>
+Date: Sat, 9 Jul 2022 09:28:51 +0000
+Subject: [PATCH] nir/nir_opt_move: fix ALWAYS_INLINE compiler error
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
 Backport merge request to fix mesa compile error when debug build
 enabled.
 
@@ -5,10 +13,6 @@
 
 Signed-off-by: Kai Kang <kai.kang@windriver.com>
 
-From c69c6e7a35205557de73734ad4a1f411c8f99926 Mon Sep 17 00:00:00 2001
-From: t bettler <thomas.bettler@gmail.com>
-Date: Sat, 9 Jul 2022 09:28:51 +0000
-Subject: [PATCH] nir/nir_opt_move: fix ALWAYS_INLINE compiler error
 MIME-Version: 1.0
 Content-Type: text/plain; charset=UTF-8
 Content-Transfer-Encoding: 8bit
@@ -17,13 +21,14 @@
 
 Closes: https://gitlab.freedesktop.org/mesa/mesa/-/issues/6825
 Fixes: f1d20ec67c3f186886b97de94f74484650f8fda1 ("nir/nir_opt_move: handle non-SSA defs ")
+
 ---
  src/compiler/nir/nir_inline_helpers.h | 10 ++++++++--
  src/compiler/nir/nir_opt_move.c       |  2 +-
  2 files changed, 9 insertions(+), 3 deletions(-)
 
 diff --git a/src/compiler/nir/nir_inline_helpers.h b/src/compiler/nir/nir_inline_helpers.h
-index 125dd8a537c..ec33f0509f7 100644
+index 125dd8a..ec33f05 100644
 --- a/src/compiler/nir/nir_inline_helpers.h
 +++ b/src/compiler/nir/nir_inline_helpers.h
 @@ -73,8 +73,8 @@ nir_foreach_dest(nir_instr *instr, nir_foreach_dest_cb cb, void *state)
@@ -48,7 +53,7 @@
 +   return _nir_foreach_src(instr, cb, state);
 +}
 diff --git a/src/compiler/nir/nir_opt_move.c b/src/compiler/nir/nir_opt_move.c
-index 81bcde5c436..051c3cc6295 100644
+index 81bcde5..051c3cc 100644
 --- a/src/compiler/nir/nir_opt_move.c
 +++ b/src/compiler/nir/nir_opt_move.c
 @@ -60,7 +60,7 @@ src_is_ssa(nir_src *src, void *state)
@@ -60,6 +65,3 @@
  }
  
  static bool
--- 
-2.34.1
-
diff --git a/poky/meta/recipes-graphics/mesa/files/0001-swrast_kms-use-swkmsDRI2Extension-instead-of-driDRI2.patch b/poky/meta/recipes-graphics/mesa/files/0001-swrast_kms-use-swkmsDRI2Extension-instead-of-driDRI2.patch
deleted file mode 100644
index db25e16..0000000
--- a/poky/meta/recipes-graphics/mesa/files/0001-swrast_kms-use-swkmsDRI2Extension-instead-of-driDRI2.patch
+++ /dev/null
@@ -1,113 +0,0 @@
-From feb4ec510b1328fdd9aa77305d3273d1f9c7e124 Mon Sep 17 00:00:00 2001
-From: Alexander Kanavin <alex@linutronix.de>
-Date: Thu, 9 Jun 2022 11:42:05 +0200
-Subject: [PATCH] swrast_kms: use swkmsDRI2Extension instead of
- driDRI2Extension
-
-This set of changes:
-https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15649
-
-caused a regression in Xorg when using swrast_kms:
-   (EE) AIGLX error: Calling driver entry point failed
-
-This commit changes the swrast_kms driver to use a dedicated screen init function
-(which I believe was overlooked); I also took the opportunity to rename the
-associated plumbling to have swrast-specific names.
-
-Upstream-Status: Submitted [https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/16942]
-Signed-off-by: Alexander Kanavin <alex@linutronix.de>
----
- src/gallium/frontends/dri/dri2.c       | 16 ++++++++--------
- src/gallium/frontends/dri/dri_screen.h |  4 ++--
- src/gallium/frontends/dri/dri_util.c   |  2 +-
- src/gallium/targets/dri/target.c       |  2 +-
- 4 files changed, 12 insertions(+), 12 deletions(-)
-
-diff --git a/src/gallium/frontends/dri/dri2.c b/src/gallium/frontends/dri/dri2.c
-index 63ac9d5..de33789 100644
---- a/src/gallium/frontends/dri/dri2.c
-+++ b/src/gallium/frontends/dri/dri2.c
-@@ -2508,7 +2508,7 @@ release_pipe:
-  * Returns the struct gl_config supported by this driver.
-  */
- static const __DRIconfig **
--dri_kms_init_screen(__DRIscreen * sPriv)
-+dri_swrast_kms_init_screen(__DRIscreen * sPriv)
- {
- #if defined(GALLIUM_SOFTPIPE)
-    const __DRIconfig **configs;
-@@ -2613,8 +2613,8 @@ static const struct __DRIDriverVtableExtensionRec galliumdrm_vtable = {
-  * hook. The latter is used to explicitly initialise the kms_swrast driver
-  * rather than selecting the approapriate driver as suggested by the loader.
-  */
--const struct __DriverAPIRec dri_kms_driver_api = {
--   .InitScreen = dri_kms_init_screen,
-+const struct __DriverAPIRec dri_swrast_kms_driver_api = {
-+   .InitScreen = dri_swrast_kms_init_screen,
-    .DestroyScreen = dri_destroy_screen,
-    .CreateBuffer = dri2_create_buffer,
-    .DestroyBuffer = dri_destroy_buffer,
-@@ -2633,17 +2633,17 @@ const __DRIextension *galliumdrm_driver_extensions[] = {
-     NULL
- };
- 
--static const struct __DRIDriverVtableExtensionRec dri_kms_vtable = {
-+static const struct __DRIDriverVtableExtensionRec dri_swrast_kms_vtable = {
-    .base = { __DRI_DRIVER_VTABLE, 1 },
--   .vtable = &dri_kms_driver_api,
-+   .vtable = &dri_swrast_kms_driver_api,
- };
- 
--const __DRIextension *dri_kms_driver_extensions[] = {
-+const __DRIextension *dri_swrast_kms_driver_extensions[] = {
-     &driCoreExtension.base,
-     &driImageDriverExtension.base,
--    &driDRI2Extension.base,
-+    &swkmsDRI2Extension.base,
-     &gallium_config_options.base,
--    &dri_kms_vtable.base,
-+    &dri_swrast_kms_vtable.base,
-     NULL
- };
- 
-diff --git a/src/gallium/frontends/dri/dri_screen.h b/src/gallium/frontends/dri/dri_screen.h
-index 0ee2feb..0bb8817 100644
---- a/src/gallium/frontends/dri/dri_screen.h
-+++ b/src/gallium/frontends/dri/dri_screen.h
-@@ -168,8 +168,8 @@ dri_destroy_screen_helper(struct dri_screen * screen);
- void
- dri_destroy_screen(__DRIscreen * sPriv);
- 
--extern const struct __DriverAPIRec dri_kms_driver_api;
--extern const __DRIextension *dri_kms_driver_extensions[];
-+extern const struct __DriverAPIRec dri_swrast_kms_driver_api;
-+extern const __DRIextension *dri_swrast_kms_driver_extensions[];
- extern const struct __DriverAPIRec galliumdrm_driver_api;
- extern const __DRIextension *galliumdrm_driver_extensions[];
- extern const struct __DriverAPIRec galliumsw_driver_api;
-diff --git a/src/gallium/frontends/dri/dri_util.c b/src/gallium/frontends/dri/dri_util.c
-index 8d60526..03614e1 100644
---- a/src/gallium/frontends/dri/dri_util.c
-+++ b/src/gallium/frontends/dri/dri_util.c
-@@ -187,7 +187,7 @@ swkmsCreateNewScreen(int scrn, int fd,
- 		     const __DRIconfig ***driver_configs, void *data)
- {
-    return driCreateNewScreen2(scrn, fd, extensions,
--                              dri_kms_driver_extensions,
-+                              dri_swrast_kms_driver_extensions,
-                               driver_configs, data);
- }
- 
-diff --git a/src/gallium/targets/dri/target.c b/src/gallium/targets/dri/target.c
-index 9b78351..e02b03b 100644
---- a/src/gallium/targets/dri/target.c
-+++ b/src/gallium/targets/dri/target.c
-@@ -25,7 +25,7 @@ const __DRIextension **__driDriverGetExtensions_kms_swrast(void);
- 
- PUBLIC const __DRIextension **__driDriverGetExtensions_kms_swrast(void)
- {
--   return dri_kms_driver_extensions;
-+   return dri_swrast_kms_driver_extensions;
- }
- 
- #endif
diff --git a/poky/meta/recipes-graphics/mesa/files/0001-util-format-Check-for-NEON-before-using-it.patch b/poky/meta/recipes-graphics/mesa/files/0001-util-format-Check-for-NEON-before-using-it.patch
index 5c6165c..d22ff3c 100644
--- a/poky/meta/recipes-graphics/mesa/files/0001-util-format-Check-for-NEON-before-using-it.patch
+++ b/poky/meta/recipes-graphics/mesa/files/0001-util-format-Check-for-NEON-before-using-it.patch
@@ -1,4 +1,4 @@
-From fdb2face4eeac3c20eedcca7520f4e7014225fb4 Mon Sep 17 00:00:00 2001
+From f17e836ef9b1bbc6056790596420b699e48128c2 Mon Sep 17 00:00:00 2001
 From: Khem Raj <raj.khem@gmail.com>
 Date: Thu, 2 Dec 2021 19:57:42 -0800
 Subject: [PATCH] util/format: Check for NEON before using it
@@ -20,10 +20,10 @@
  2 files changed, 2 insertions(+), 2 deletions(-)
 
 diff --git a/src/util/format/u_format.c b/src/util/format/u_format.c
-index 36c5e52..f0a0097 100644
+index c071250..0880984 100644
 --- a/src/util/format/u_format.c
 +++ b/src/util/format/u_format.c
-@@ -1138,7 +1138,7 @@ static void
+@@ -1184,7 +1184,7 @@ static void
  util_format_unpack_table_init(void)
  {
     for (enum pipe_format format = PIPE_FORMAT_NONE; format < PIPE_FORMAT_COUNT; format++) {
diff --git a/poky/meta/recipes-graphics/mesa/files/0002-meson.build-make-TLS-ELF-optional.patch b/poky/meta/recipes-graphics/mesa/files/0002-meson.build-make-TLS-ELF-optional.patch
deleted file mode 100644
index af11bae..0000000
--- a/poky/meta/recipes-graphics/mesa/files/0002-meson.build-make-TLS-ELF-optional.patch
+++ /dev/null
@@ -1,61 +0,0 @@
-From bf41fa026ae3d378e62fd83d03a6f5933b52ca04 Mon Sep 17 00:00:00 2001
-From: Alistair Francis <alistair@alistair23.me>
-Date: Thu, 14 Nov 2019 13:08:31 -0800
-Subject: [PATCH] meson.build: make TLS ELF optional
-
-USE_ELF_TLS has replaced GLX_USE_TLS so this patch is the original "make
-TLS GLX optional again" patch updated to the latest mesa.
-
-For details, see:
-https://gitlab.freedesktop.org/mesa/mesa/-/issues/966
-
-This prevents runtime segfault on musl:
-
-Traceback (most recent call last):
-  File "/home/pokybuild/yocto-worker/musl-qemux86/build/meta/lib/oeqa/core/decorator/__init__.py", line 36, in wrapped_f
-    return func(*args, **kwargs)
-  File "/home/pokybuild/yocto-worker/musl-qemux86/build/meta/lib/oeqa/runtime/cases/parselogs.py", line 378, in test_parselogs
-    self.assertEqual(errcount, 0, msg=self.msg)
-AssertionError: 1 != 0 : Log: /home/pokybuild/yocto-worker/musl-qemux86/build/build/tmp/work/qemux86-poky-linux-musl/core-image-sato-sdk/1.0-r0/target_logs/Xorg.0.log
-
-Upstream-Status: Inappropriate [configuration]
----
- meson.build       | 7 +++++--
- meson_options.txt | 6 ++++++
- 2 files changed, 11 insertions(+), 2 deletions(-)
-
-diff --git a/meson.build b/meson.build
-index 70d06c0..1441611 100644
---- a/meson.build
-+++ b/meson.build
-@@ -490,8 +490,11 @@ foreach platform : _platforms
-   pre_args += '-DHAVE_@0@_PLATFORM'.format(platform.to_upper())
- endforeach
- 
--use_elf_tls = true
--pre_args += '-DUSE_ELF_TLS'
-+use_elf_tls = false
-+if get_option('elf-tls')
-+  use_elf_tls = true
-+  pre_args += '-DUSE_ELF_TLS'
-+endif
- 
- if with_platform_android and get_option('platform-sdk-version') >= 29
-   # By default the NDK compiler, at least, emits emutls references instead of
-diff --git a/meson_options.txt b/meson_options.txt
-index 1f6ef38..99cc5cb 100644
---- a/meson_options.txt
-+++ b/meson_options.txt
-@@ -440,6 +440,12 @@ option(
-   value : true,
-   description : 'Enable direct rendering in GLX and EGL for DRI',
- )
-+option(
-+  'elf-tls',
-+  type : 'boolean',
-+  value : true,
-+  description : 'Enable TLS support in ELF',
-+)
- option('egl-lib-suffix',
-   type : 'string',
-   value : '',
diff --git a/poky/meta/recipes-graphics/mesa/mesa-gl_22.1.6.bb b/poky/meta/recipes-graphics/mesa/mesa-gl_22.2.0.bb
similarity index 100%
rename from poky/meta/recipes-graphics/mesa/mesa-gl_22.1.6.bb
rename to poky/meta/recipes-graphics/mesa/mesa-gl_22.2.0.bb
diff --git a/poky/meta/recipes-graphics/mesa/mesa.inc b/poky/meta/recipes-graphics/mesa/mesa.inc
index ea7ed4f..8a74e0a 100644
--- a/poky/meta/recipes-graphics/mesa/mesa.inc
+++ b/poky/meta/recipes-graphics/mesa/mesa.inc
@@ -10,22 +10,19 @@
 BUGTRACKER = "https://bugs.freedesktop.org"
 SECTION = "x11"
 LICENSE = "MIT"
-LIC_FILES_CHKSUM = "file://docs/license.rst;md5=9a383ee9f65a4e939d6630e9b067ff58"
+LIC_FILES_CHKSUM = "file://docs/license.rst;md5=63779ec98d78d823a9dc533a0735ef10"
 
 PE = "2"
 
 SRC_URI = "https://mesa.freedesktop.org/archive/mesa-${PV}.tar.xz \
            file://0001-meson.build-check-for-all-linux-host_os-combinations.patch \
-           file://0002-meson.build-make-TLS-ELF-optional.patch \
            file://0001-meson-misdetects-64bit-atomics-on-mips-clang.patch \
            file://0001-futex.h-Define-__NR_futex-if-it-does-not-exist.patch \
            file://0001-util-format-Check-for-NEON-before-using-it.patch \
-           file://0001-Revert-egl-wayland-deprecate-drm_handle_format-and-d.patch \
-           file://0001-swrast_kms-use-swkmsDRI2Extension-instead-of-driDRI2.patch \
            file://0001-nir-nir_opt_move-fix-ALWAYS_INLINE-compiler-error.patch \
            "
 
-SRC_URI[sha256sum] = "22ced061eb9adab8ea35368246c1995c09723f3f71653cd5050c5cec376e671a"
+SRC_URI[sha256sum] = "b1f9c8fd08f2cae3adf83355bef4d2398e8025f44947332880f2d0066bdafa8c"
 
 UPSTREAM_CHECK_GITTAGREGEX = "mesa-(?P<pver>\d+(\.\d+)+)"
 
@@ -95,7 +92,7 @@
 	${@bb.utils.contains('DISTRO_FEATURES', 'opengl', 'opengl egl gles gbm virgl', '', d)} \
 	${@bb.utils.contains('DISTRO_FEATURES', 'x11 opengl', 'dri3', '', d)} \
 	${@bb.utils.contains('DISTRO_FEATURES', 'x11 vulkan', 'dri3', '', d)} \
-	${@bb.utils.contains('TCLIBC', 'glibc', 'elf-tls', '', d)} \
+	${@bb.utils.contains('DISTRO_FEATURES', 'vulkan', 'zink', '', d)} \
 "
 
 # "gbm" requires "opengl"
@@ -104,7 +101,6 @@
 X11_DEPS = "xorgproto virtual/libx11 libxext libxxf86vm libxdamage libxfixes xrandr"
 # "x11" requires "opengl"
 PACKAGECONFIG[x11] = ",-Dglx=disabled,${X11_DEPS}"
-PACKAGECONFIG[elf-tls] = "-Delf-tls=true, -Delf-tls=false"
 PACKAGECONFIG[xvmc] = "-Dgallium-xvmc=enabled,-Dgallium-xvmc=disabled,libxvmc"
 PACKAGECONFIG[wayland] = ",,wayland-native wayland libdrm wayland-protocols"
 
@@ -117,7 +113,7 @@
 VULKAN_DRIVERS:append:x86-64:class-target = ",intel"
 VULKAN_DRIVERS:append ="${@bb.utils.contains('PACKAGECONFIG', 'freedreno', ',freedreno', '', d)}"
 VULKAN_DRIVERS:append ="${@bb.utils.contains('PACKAGECONFIG', 'broadcom', ',broadcom', '', d)}"
-PACKAGECONFIG[vulkan] = "-Dvulkan-drivers=${@strip_comma('${VULKAN_DRIVERS}')}, -Dvulkan-drivers='',"
+PACKAGECONFIG[vulkan] = "-Dvulkan-drivers=${@strip_comma('${VULKAN_DRIVERS}')}, -Dvulkan-drivers='',glslang-native vulkan-loader vulkan-headers"
 
 PACKAGECONFIG[opengl] = "-Dopengl=true, -Dopengl=false"
 
@@ -137,6 +133,7 @@
 PACKAGECONFIG[kmsro] = ""
 PACKAGECONFIG[vc4] = ""
 PACKAGECONFIG[v3d] = ""
+PACKAGECONFIG[zink] = ""
 
 GALLIUMDRIVERS = "swrast"
 # gallium swrast was found to crash Xorg on startup in x32 qemu
@@ -149,6 +146,7 @@
 GALLIUMDRIVERS:append ="${@bb.utils.contains('PACKAGECONFIG', 'kmsro', ',kmsro', '', d)}"
 GALLIUMDRIVERS:append ="${@bb.utils.contains('PACKAGECONFIG', 'vc4', ',vc4', '', d)}"
 GALLIUMDRIVERS:append ="${@bb.utils.contains('PACKAGECONFIG', 'v3d', ',v3d', '', d)}"
+GALLIUMDRIVERS:append ="${@bb.utils.contains('PACKAGECONFIG', 'zink', ',zink', '', d)}"
 
 # radeonsi requires LLVM
 GALLIUMDRIVERS_RADEONSI = "${@bb.utils.contains('PACKAGECONFIG', 'r600', ',radeonsi', '', d)}"
@@ -177,6 +175,8 @@
 PACKAGECONFIG[panfrost] = ""
 GALLIUMDRIVERS:append ="${@bb.utils.contains('PACKAGECONFIG', 'panfrost', ',panfrost', '', d)}"
 
+PACKAGECONFIG[vulkan-beta] = "-Dvulkan-beta=true,-Dvulkan-beta=false"
+
 PACKAGECONFIG[osmesa] = "-Dosmesa=true,-Dosmesa=false"
 
 PACKAGECONFIG[unwind] = "-Dlibunwind=enabled,-Dlibunwind=disabled,libunwind"
diff --git a/poky/meta/recipes-graphics/mesa/mesa_22.1.6.bb b/poky/meta/recipes-graphics/mesa/mesa_22.2.0.bb
similarity index 100%
rename from poky/meta/recipes-graphics/mesa/mesa_22.1.6.bb
rename to poky/meta/recipes-graphics/mesa/mesa_22.2.0.bb
diff --git a/poky/meta/recipes-graphics/piglit/piglit/0004-CMakeLists.txt-add-missing-endian.h-check.patch b/poky/meta/recipes-graphics/piglit/piglit/0004-CMakeLists.txt-add-missing-endian.h-check.patch
deleted file mode 100644
index 68f783e..0000000
--- a/poky/meta/recipes-graphics/piglit/piglit/0004-CMakeLists.txt-add-missing-endian.h-check.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-From a8ea135125b284a8a9c965d8091b3d1d9a31849c Mon Sep 17 00:00:00 2001
-From: Alexander Kanavin <alex@linutronix.de>
-Date: Tue, 5 Apr 2022 16:51:34 +0200
-Subject: [PATCH] CMakeLists.txt: add missing endian.h check
-
-This is needed to actually have HAVE_ENDIAN_H defined properly in config.h.
-
-Upstream-Status: Submitted [https://gitlab.freedesktop.org/mesa/piglit/-/merge_requests/648]
-Signed-off-by: Alexander Kanavin <alex@linutronix.de>
----
- CMakeLists.txt | 1 +
- 1 file changed, 1 insertion(+)
-
-diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 9fb0508da..158fac4bc 100644
---- a/CMakeLists.txt
-+++ b/CMakeLists.txt
-@@ -481,6 +481,7 @@ check_include_file(sys/stat.h  HAVE_SYS_STAT_H)
- check_include_file(unistd.h    HAVE_UNISTD_H)
- check_include_file(fcntl.h     HAVE_FCNTL_H)
- check_include_file(linux/sync_file.h HAVE_LINUX_SYNC_FILE_H)
-+check_include_file(endian.h    HAVE_ENDIAN_H)
- 
- if(DEFINED PIGLIT_INSTALL_VERSION)
- 	set(PIGLIT_INSTALL_VERSION_SUFFIX
diff --git a/poky/meta/recipes-graphics/piglit/piglit_git.bb b/poky/meta/recipes-graphics/piglit/piglit_git.bb
index 29360a2..f758208 100644
--- a/poky/meta/recipes-graphics/piglit/piglit_git.bb
+++ b/poky/meta/recipes-graphics/piglit/piglit_git.bb
@@ -10,11 +10,11 @@
            file://0001-cmake-install-bash-completions-in-the-right-place.patch \
            file://0002-cmake-use-proper-WAYLAND_INCLUDE_DIRS-variable.patch \
            file://0003-tests-util-piglit-shader.c-do-not-hardcode-build-pat.patch \
-           file://0004-CMakeLists.txt-add-missing-endian.h-check.patch \
-           file://0005-cmake-Don-t-enable-GLX-if-tests-are-disabled.patch"
+           file://0005-cmake-Don-t-enable-GLX-if-tests-are-disabled.patch \
+           "
 UPSTREAM_CHECK_COMMITS = "1"
 
-SRCREV = "6403e90dc7da02d486906cddab8d02c2552a8d46"
+SRCREV = "265896c86f90cb72e8f218ba6a3617fca8b9a1e3"
 # (when PV goes above 1.0 remove the trailing r)
 PV = "1.0+gitr${SRCPV}"
 
diff --git a/poky/meta/recipes-graphics/ttf-fonts/liberation-fonts_2.1.5.bb b/poky/meta/recipes-graphics/ttf-fonts/liberation-fonts_2.1.5.bb
index 32247b0..b7fdc0d 100644
--- a/poky/meta/recipes-graphics/ttf-fonts/liberation-fonts_2.1.5.bb
+++ b/poky/meta/recipes-graphics/ttf-fonts/liberation-fonts_2.1.5.bb
@@ -13,11 +13,11 @@
 SRC_URI = "https://github.com/liberationfonts/liberation-fonts/files/7261482/liberation-fonts-ttf-${PV}.tar.gz \
            file://30-liberation-aliases.conf"
 SRC_URI[sha256sum] = "7191c669bf38899f73a2094ed00f7b800553364f90e2637010a69c0e268f25d0"
-UPSTREAM_CHECK_URI = "https://github.com/liberationfonts/liberation-fonts/releases"
+GITHUB_BASE_URI = "https://github.com/liberationfonts/liberation-fonts/releases"
 
 S = "${WORKDIR}/liberation-fonts-ttf-${PV}"
 
-inherit allarch fontcache
+inherit allarch fontcache github-releases
 
 do_install () {
 	install -d ${D}${datadir}/fonts/ttf/
diff --git a/poky/meta/recipes-graphics/virglrenderer/virglrenderer/0001-meson.build-use-python3-directly-for-python.patch b/poky/meta/recipes-graphics/virglrenderer/virglrenderer/0001-meson.build-use-python3-directly-for-python.patch
index 0fd1d51..8230ba5 100644
--- a/poky/meta/recipes-graphics/virglrenderer/virglrenderer/0001-meson.build-use-python3-directly-for-python.patch
+++ b/poky/meta/recipes-graphics/virglrenderer/virglrenderer/0001-meson.build-use-python3-directly-for-python.patch
@@ -1,4 +1,4 @@
-From 63788c63ed39a3ce9994f4315d8997e1a9300d4d Mon Sep 17 00:00:00 2001
+From c853c9e5c44f1b23a15a7ba629ee02f7d8ec23a0 Mon Sep 17 00:00:00 2001
 From: Alexander Kanavin <alex.kanavin@gmail.com>
 Date: Mon, 6 Jan 2020 12:44:42 +0100
 Subject: [PATCH] meson.build: use 'python3' directly for python
@@ -8,17 +8,18 @@
 
 Upstream-Status: Inappropriate [oe-core specific]
 Signed-off-by: Alexander Kanavin <alex.kanavin@gmail.com>
+
 ---
  meson.build | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)
 
 diff --git a/meson.build b/meson.build
-index 682d7c8..19d2eae 100644
+index 13d95bb..b241eb2 100644
 --- a/meson.build
 +++ b/meson.build
-@@ -60,7 +60,7 @@ foreach w : warnings
-    endif
- endforeach
+@@ -64,7 +64,7 @@ flags = [
+ 
+ add_project_arguments(cc.get_supported_arguments(flags), language : 'c')
  
 -prog_python = import('python').find_installation('python3')
 +prog_python = 'python3'
diff --git a/poky/meta/recipes-graphics/virglrenderer/virglrenderer/cve-2022-0135.patch b/poky/meta/recipes-graphics/virglrenderer/virglrenderer/cve-2022-0135.patch
deleted file mode 100644
index ae42dc8..0000000
--- a/poky/meta/recipes-graphics/virglrenderer/virglrenderer/cve-2022-0135.patch
+++ /dev/null
@@ -1,117 +0,0 @@
-From 63aee871365f9c9e7fa9125672302a0fb250d34d Mon Sep 17 00:00:00 2001
-From: Gert Wollny <gert.wollny@collabora.com>
-Date: Tue, 30 Nov 2021 09:16:24 +0100
-Subject: [PATCH 2/2] vrend: propperly check whether the shader image range is
- correct
-
-Also add a test to check the integer underflow.
-
-Closes: #251
-Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
-Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
-
-cherry-pick from anongit.freedesktop.org/virglrenderer
-commit 2aed5d4...
-
-CVE: CVE-2022-0135
-Upstream-Status: Backport
-Signed-off-by: Joe Slater <joe.slater@windriver.com>
-
----
- src/vrend_decode.c          |  3 +-
- tests/test_fuzzer_formats.c | 57 +++++++++++++++++++++++++++++++++++++
- 2 files changed, 59 insertions(+), 1 deletion(-)
-
-diff --git a/src/vrend_decode.c b/src/vrend_decode.c
-index 91f5f24..6771b10 100644
---- a/src/vrend_decode.c
-+++ b/src/vrend_decode.c
-@@ -1249,8 +1249,9 @@ static int vrend_decode_set_shader_images(struct vrend_context *ctx, const uint3
-    if (num_images < 1) {
-       return 0;
-    }
-+
-    if (start_slot > PIPE_MAX_SHADER_IMAGES ||
--       start_slot > PIPE_MAX_SHADER_IMAGES - num_images)
-+       start_slot + num_images > PIPE_MAX_SHADER_IMAGES)
-       return EINVAL;
- 
-    for (uint32_t i = 0; i < num_images; i++) {
-diff --git a/tests/test_fuzzer_formats.c b/tests/test_fuzzer_formats.c
-index 154a2e5..e32caf0 100644
---- a/tests/test_fuzzer_formats.c
-+++ b/tests/test_fuzzer_formats.c
-@@ -958,6 +958,61 @@ static void test_vrend_set_signle_abo_heap_overflow() {
-     virgl_renderer_submit_cmd((void *) cmd, ctx_id, 0xde);
- }
- 
-+static void test_vrend_set_shader_images_overflow()
-+{
-+    uint32_t num_shaders = PIPE_MAX_SHADER_IMAGES + 1;
-+    uint32_t size = num_shaders * VIRGL_SET_SHADER_IMAGE_ELEMENT_SIZE + 3;
-+    uint32_t cmd[size];
-+    int i = 0;
-+    cmd[i++] = ((size - 1)<< 16) | 0 << 8 | VIRGL_CCMD_SET_SHADER_IMAGES;
-+    cmd[i++] = PIPE_SHADER_FRAGMENT;
-+    memset(&cmd[i], 0, size - i);
-+
-+    virgl_renderer_submit_cmd((void *) cmd, ctx_id, size);
-+}
-+
-+/* Test adapted from yaojun8558363@gmail.com:
-+ * https://gitlab.freedesktop.org/virgl/virglrenderer/-/issues/250
-+*/
-+static void test_vrend_3d_resource_overflow() {
-+
-+    struct virgl_renderer_resource_create_args resource;
-+    resource.handle = 0x4c474572;
-+    resource.target = PIPE_TEXTURE_2D_ARRAY;
-+    resource.format = VIRGL_FORMAT_Z24X8_UNORM;
-+    resource.nr_samples = 2;
-+    resource.last_level = 0;
-+    resource.array_size = 3;
-+    resource.bind = VIRGL_BIND_SAMPLER_VIEW;
-+    resource.depth = 1;
-+    resource.width = 8;
-+    resource.height = 4;
-+    resource.flags = 0;
-+
-+    virgl_renderer_resource_create(&resource, NULL, 0);
-+    virgl_renderer_ctx_attach_resource(ctx_id, resource.handle);
-+
-+    uint32_t size = 0x400;
-+    uint32_t cmd[size];
-+    int i = 0;
-+    cmd[i++] = (size - 1) << 16 | 0 << 8 | VIRGL_CCMD_RESOURCE_INLINE_WRITE;
-+    cmd[i++] = resource.handle;
-+    cmd[i++] = 0; // level
-+    cmd[i++] = 0; // usage
-+    cmd[i++] = 0; // stride
-+    cmd[i++] = 0; // layer_stride
-+    cmd[i++] = 0; // x
-+    cmd[i++] = 0; // y
-+    cmd[i++] = 0; // z
-+    cmd[i++] = 8; // w
-+    cmd[i++] = 4; // h
-+    cmd[i++] = 3; // d
-+    memset(&cmd[i], 0, size - i);
-+
-+    virgl_renderer_submit_cmd((void *) cmd, ctx_id, size);
-+}
-+
-+
- int main()
- {
-    initialize_environment();
-@@ -980,6 +1035,8 @@ int main()
-    test_cs_nullpointer_deference();
-    test_vrend_set_signle_abo_heap_overflow();
- 
-+   test_vrend_set_shader_images_overflow();
-+   test_vrend_3d_resource_overflow();
- 
-    virgl_renderer_context_destroy(ctx_id);
-    virgl_renderer_cleanup(&cookie);
--- 
-2.25.1
-
diff --git a/poky/meta/recipes-graphics/virglrenderer/virglrenderer/cve-2022-0175.patch b/poky/meta/recipes-graphics/virglrenderer/virglrenderer/cve-2022-0175.patch
deleted file mode 100644
index 7fbab75..0000000
--- a/poky/meta/recipes-graphics/virglrenderer/virglrenderer/cve-2022-0175.patch
+++ /dev/null
@@ -1,107 +0,0 @@
-From 5ca7aca001092c557f0b6fc1ba3db7dcdab860b7 Mon Sep 17 00:00:00 2001
-From: Gert Wollny <gert.wollny@collabora.com>
-Date: Tue, 30 Nov 2021 09:29:42 +0100
-Subject: [PATCH 1/2] vrend: clear memory when allocating a host-backed memory
- resource
-
-Closes: #249
-Signed-off-by: Gert Wollny <gert.wollny@collabora.com>
-Reviewed-by: Chia-I Wu <olvaffe@gmail.com>
-
-cherry-pick from anongit.freedesktop.org/virglrenderer
-commit b05bb61...
-
-CVE: CVE-2022-0175
-Upstream-Status: Backport
-Signed-off-by: Joe Slater <joe.slater@windriver.com>
-
----
- src/vrend_renderer.c        |  2 +-
- tests/test_virgl_transfer.c | 51 +++++++++++++++++++++++++++++++++++++
- 2 files changed, 52 insertions(+), 1 deletion(-)
-
-diff --git a/src/vrend_renderer.c b/src/vrend_renderer.c
-index b8b2a36..2650cf2 100644
---- a/src/vrend_renderer.c
-+++ b/src/vrend_renderer.c
-@@ -6788,7 +6788,7 @@ vrend_resource_alloc_buffer(struct vrend_resource *gr, uint32_t flags)
-    if (bind == VIRGL_BIND_CUSTOM) {
-       /* use iovec directly when attached */
-       gr->storage_bits |= VREND_STORAGE_HOST_SYSTEM_MEMORY;
--      gr->ptr = malloc(size);
-+      gr->ptr = calloc(1, size);
-       if (!gr->ptr)
-          return -ENOMEM;
-    } else if (bind == VIRGL_BIND_STAGING) {
-diff --git a/tests/test_virgl_transfer.c b/tests/test_virgl_transfer.c
-index bf7f438..3c53c3d 100644
---- a/tests/test_virgl_transfer.c
-+++ b/tests/test_virgl_transfer.c
-@@ -952,6 +952,56 @@ START_TEST(virgl_test_transfer_near_res_bounds_with_stride_succeeds)
- }
- END_TEST
- 
-+START_TEST(test_vrend_host_backed_memory_no_data_leak)
-+{
-+   struct iovec iovs[1];
-+   int niovs = 1;
-+
-+   struct virgl_context ctx = {0};
-+
-+   int ret = testvirgl_init_ctx_cmdbuf(&ctx);
-+
-+   struct virgl_renderer_resource_create_args res;
-+   res.handle = 0x400;
-+   res.target = PIPE_BUFFER;
-+   res.format = VIRGL_FORMAT_R8_UNORM;
-+   res.nr_samples = 0;
-+   res.last_level = 0;
-+   res.array_size = 1;
-+   res.bind = VIRGL_BIND_CUSTOM;
-+   res.depth = 1;
-+   res.width = 32;
-+   res.height = 1;
-+   res.flags = 0;
-+
-+   uint32_t size = 32;
-+   uint8_t* data = calloc(1, size);
-+   memset(data, 1, 32);
-+   iovs[0].iov_base = data;
-+   iovs[0].iov_len = size;
-+
-+   struct pipe_box box = {0,0,0, size, 1,1};
-+
-+   virgl_renderer_resource_create(&res, NULL, 0);
-+   virgl_renderer_ctx_attach_resource(ctx.ctx_id, res.handle);
-+
-+   ret = virgl_renderer_transfer_read_iov(res.handle, ctx.ctx_id, 0, 0, 0,
-+                                          (struct virgl_box *)&box, 0, iovs, niovs);
-+
-+   ck_assert_int_eq(ret, 0);
-+
-+   for (int i = 0; i < 32; ++i)
-+      ck_assert_int_eq(data[i], 0);
-+
-+   virgl_renderer_ctx_detach_resource(1, res.handle);
-+
-+   virgl_renderer_resource_unref(res.handle);
-+   free(data);
-+
-+}
-+END_TEST
-+
-+
- static Suite *virgl_init_suite(void)
- {
-   Suite *s;
-@@ -981,6 +1031,7 @@ static Suite *virgl_init_suite(void)
-   tcase_add_test(tc_core, virgl_test_transfer_buffer_bad_strides);
-   tcase_add_test(tc_core, virgl_test_transfer_2d_array_bad_layer_stride);
-   tcase_add_test(tc_core, virgl_test_transfer_2d_bad_level);
-+  tcase_add_test(tc_core, test_vrend_host_backed_memory_no_data_leak);
- 
-   tcase_add_loop_test(tc_core, virgl_test_transfer_res_read_valid, 0, PIPE_MAX_TEXTURE_TYPES);
-   tcase_add_loop_test(tc_core, virgl_test_transfer_res_write_valid, 0, PIPE_MAX_TEXTURE_TYPES);
--- 
-2.25.1
-
diff --git a/poky/meta/recipes-graphics/virglrenderer/virglrenderer_0.10.3.bb b/poky/meta/recipes-graphics/virglrenderer/virglrenderer_0.10.3.bb
new file mode 100644
index 0000000..3480eb9
--- /dev/null
+++ b/poky/meta/recipes-graphics/virglrenderer/virglrenderer_0.10.3.bb
@@ -0,0 +1,33 @@
+SUMMARY = "VirGL virtual OpenGL renderer"
+DESCRIPTION = "Virgil is a research project to investigate the possibility of \
+creating a virtual 3D GPU for use inside qemu virtual machines, that allows \
+the guest operating system to use the capabilities of the host GPU to \
+accelerate 3D rendering."
+HOMEPAGE = "https://virgil3d.github.io/"
+
+LICENSE = "MIT"
+LIC_FILES_CHKSUM = "file://COPYING;md5=c81c08eeefd9418fca8f88309a76db10"
+
+DEPENDS = "libdrm libepoxy virtual/egl virtual/libgbm"
+SRCREV = "0922041ec6730122e0fec11404e6859e2efc4bc0"
+SRC_URI = "git://gitlab.freedesktop.org/virgl/virglrenderer.git;branch=master;protocol=https \
+           file://0001-meson.build-use-python3-directly-for-python.patch \
+           "
+
+S = "${WORKDIR}/git"
+
+inherit meson pkgconfig features_check
+
+PACKAGECONFIG ?= "${@bb.utils.contains('DISTRO_FEATURES', 'vulkan', 'venus-experimental', '', d)}"
+
+PACKAGECONFIG[venus-experimental] = "-Dvenus-experimental=true,-Dvenus-experimental=false,vulkan-loader vulkan-headers"
+PACKAGECONFIG[va] = "-Dvideo=true,-Dvideo=false,libva"
+PACKAGECONFIG[render-server] = "-Drender-server=true,-Drender-server=false"
+PACKAGECONFIG[drm-msm-experimental] = "-Ddrm-msm-experimental=true,-Ddrm-msm-experimental=false"
+PACKAGECONFIG[minigbm_allocation] = "-Dminigbm_allocation=true,-Dminigbm_allocation=false"
+PACKAGECONFIG[venus-validate] = "-Dvenus-validate=true,-Dvenus-validate=false"
+PACKAGECONFIG[tests] = "-Dtests=true,-Dtests=false,libcheck"
+
+BBCLASSEXTEND = "native nativesdk"
+
+REQUIRED_DISTRO_FEATURES = "opengl"
diff --git a/poky/meta/recipes-graphics/virglrenderer/virglrenderer_0.9.1.bb b/poky/meta/recipes-graphics/virglrenderer/virglrenderer_0.9.1.bb
deleted file mode 100644
index ad3688e..0000000
--- a/poky/meta/recipes-graphics/virglrenderer/virglrenderer_0.9.1.bb
+++ /dev/null
@@ -1,25 +0,0 @@
-SUMMARY = "VirGL virtual OpenGL renderer"
-DESCRIPTION = "Virgil is a research project to investigate the possibility of \
-creating a virtual 3D GPU for use inside qemu virtual machines, that allows \
-the guest operating system to use the capabilities of the host GPU to \
-accelerate 3D rendering."
-HOMEPAGE = "https://virgil3d.github.io/"
-
-LICENSE = "MIT"
-LIC_FILES_CHKSUM = "file://COPYING;md5=c81c08eeefd9418fca8f88309a76db10"
-
-DEPENDS = "libdrm virtual/egl virtual/libgbm libepoxy"
-SRCREV = "363915595e05fb252e70d6514be2f0c0b5ca312b"
-SRC_URI = "git://anongit.freedesktop.org/git/virglrenderer;branch=branch-0.9.1 \
-           file://0001-meson.build-use-python3-directly-for-python.patch \
-           file://cve-2022-0135.patch \
-           file://cve-2022-0175.patch \
-           "
-
-S = "${WORKDIR}/git"
-
-inherit meson pkgconfig features_check
-
-BBCLASSEXTEND = "native nativesdk"
-
-REQUIRED_DISTRO_FEATURES = "opengl"
diff --git a/poky/meta/recipes-graphics/xorg-lib/libfontenc_1.1.4.bb b/poky/meta/recipes-graphics/xorg-lib/libfontenc_1.1.6.bb
similarity index 71%
rename from poky/meta/recipes-graphics/xorg-lib/libfontenc_1.1.4.bb
rename to poky/meta/recipes-graphics/xorg-lib/libfontenc_1.1.6.bb
index 1fc74b1..8f9602d 100644
--- a/poky/meta/recipes-graphics/xorg-lib/libfontenc_1.1.4.bb
+++ b/poky/meta/recipes-graphics/xorg-lib/libfontenc_1.1.6.bb
@@ -11,7 +11,8 @@
 DEPENDS += "zlib xorgproto font-util"
 PE = "1"
 
-SRC_URI[md5sum] = "6447db6a689fb530c218f0f8328c3abc"
-SRC_URI[sha256sum] = "2cfcce810ddd48f2e5dc658d28c1808e86dcf303eaff16728b9aa3dbc0092079"
+XORG_EXT = "tar.xz"
+
+SRC_URI[sha256sum] = "ea8606ed5255dda8f570b7d1a74d59ee8d198675b2f114d07807431e6ba1d111"
 
 BBCLASSEXTEND = "native"
diff --git a/poky/meta/recipes-graphics/xorg-lib/libxau_1.0.9.bb b/poky/meta/recipes-graphics/xorg-lib/libxau_1.0.10.bb
similarity index 76%
rename from poky/meta/recipes-graphics/xorg-lib/libxau_1.0.9.bb
rename to poky/meta/recipes-graphics/xorg-lib/libxau_1.0.10.bb
index 3fbc3a5..a62c9f0 100644
--- a/poky/meta/recipes-graphics/xorg-lib/libxau_1.0.9.bb
+++ b/poky/meta/recipes-graphics/xorg-lib/libxau_1.0.10.bb
@@ -17,8 +17,8 @@
 PE = "1"
 
 XORG_PN = "libXau"
+XORG_EXT = "tar.xz"
 
 BBCLASSEXTEND = "native nativesdk"
 
-SRC_URI[md5sum] = "c5f16288f2da9f071b29111d68797480"
-SRC_URI[sha256sum] = "ccf8cbf0dbf676faa2ea0a6d64bcc3b6746064722b606c8c52917ed00dcb73ec"
+SRC_URI[sha256sum] = "8be6f292334d2f87e5b919c001e149a9fdc27005d6b3e053862ac6ebbf1a0c0a"
diff --git a/poky/meta/recipes-graphics/xorg-lib/libxfont2_2.0.5.bb b/poky/meta/recipes-graphics/xorg-lib/libxfont2_2.0.6.bb
similarity index 85%
rename from poky/meta/recipes-graphics/xorg-lib/libxfont2_2.0.5.bb
rename to poky/meta/recipes-graphics/xorg-lib/libxfont2_2.0.6.bb
index 10e44c3..59d63e7 100644
--- a/poky/meta/recipes-graphics/xorg-lib/libxfont2_2.0.5.bb
+++ b/poky/meta/recipes-graphics/xorg-lib/libxfont2_2.0.6.bb
@@ -12,10 +12,11 @@
 DEPENDS += "freetype xtrans xorgproto libfontenc zlib"
 
 XORG_PN = "libXfont2"
+XORG_EXT = "tar.xz"
 
 BBCLASSEXTEND = "native"
 
-SRC_URI[sha256sum] = "aa7c6f211cf7215c0ab4819ed893dc98034363d7b930b844bb43603c2e10b53e"
+SRC_URI[sha256sum] = "74ca20017eb0fb3f56d8d5e60685f560fc85e5ff3d84c61c4cb891e40c27aef4"
 
 PACKAGECONFIG ??= "${@bb.utils.filter('DISTRO_FEATURES', 'ipv6', d)}"
 PACKAGECONFIG[ipv6] = "--enable-ipv6,--disable-ipv6,"
diff --git a/poky/meta/recipes-graphics/xorg-lib/xorg-lib-common.inc b/poky/meta/recipes-graphics/xorg-lib/xorg-lib-common.inc
index 60bc8c7..68137c4 100644
--- a/poky/meta/recipes-graphics/xorg-lib/xorg-lib-common.inc
+++ b/poky/meta/recipes-graphics/xorg-lib/xorg-lib-common.inc
@@ -6,8 +6,9 @@
 DEPENDS = "util-macros"
 
 XORG_PN = "${BPN}"
+XORG_EXT ?= "tar.bz2"
 
-SRC_URI = "${XORG_MIRROR}/individual/lib/${XORG_PN}-${PV}.tar.bz2"
+SRC_URI = "${XORG_MIRROR}/individual/lib/${XORG_PN}-${PV}.${XORG_EXT}"
 
 S = "${WORKDIR}/${XORG_PN}-${PV}"
 
diff --git a/poky/meta/recipes-kernel/kern-tools/kern-tools-native_git.bb b/poky/meta/recipes-kernel/kern-tools/kern-tools-native_git.bb
index 11613ab..dea7b65 100644
--- a/poky/meta/recipes-kernel/kern-tools/kern-tools-native_git.bb
+++ b/poky/meta/recipes-kernel/kern-tools/kern-tools-native_git.bb
@@ -11,7 +11,7 @@
 
 DEPENDS = "git-native"
 
-SRCREV = "f70b1d52f4706a263ae22e2c61039ccd875e97b6"
+SRCREV = "ba600ef61a85966596126a6e8d936971905e8749"
 PV = "0.3+git${SRCPV}"
 
 inherit native
diff --git a/poky/meta/recipes-kernel/kmod/depmodwrapper-cross_1.0.bb b/poky/meta/recipes-kernel/kmod/depmodwrapper-cross_1.0.bb
index 303026a..6c0739d 100644
--- a/poky/meta/recipes-kernel/kmod/depmodwrapper-cross_1.0.bb
+++ b/poky/meta/recipes-kernel/kmod/depmodwrapper-cross_1.0.bb
@@ -21,13 +21,17 @@
 #!/bin/sh
 # Expected to be called as: depmodwrapper -a KERNEL_VERSION
 if [ "\$1" != "-a" -o "\$2" != "-b" ]; then
-    echo "Usage: depmodwrapper -a -b rootfs KERNEL_VERSION" >&2
+    echo "Usage: depmodwrapper -a -b rootfs KERNEL_VERSION [KERNEL_PACKAGE_NAME]" >&2
     exit 1
 fi
 
+kernelpkgname="kernel"
+# If no KERNEL_PACKAGE_NAME, assume "kernel".
+[ -z "\$5" ] || kernelpkgname="\$5"
+
 kernelabi=""
-if [ -r "${PKGDATA_DIR}/kernel-depmod/kernel-abiversion" ]; then
-    kernelabi=\$(cat "${PKGDATA_DIR}/kernel-depmod/kernel-abiversion")
+if [ -r "${PKGDATA_DIR}/\${kernelpkgname}-depmod/\${kernelpkgname}-abiversion" ]; then
+    kernelabi=\$(cat "${PKGDATA_DIR}/\${kernelpkgname}-depmod/\${kernelpkgname}-abiversion")
 fi
 
 if [ ! -e "\$3${nonarch_base_libdir}/depmod.d/exclude.conf" ]; then
@@ -35,11 +39,11 @@
     echo "exclude .debug" > "\$3${nonarch_base_libdir}/depmod.d/exclude.conf"
 fi
 
-if [ ! -r ${PKGDATA_DIR}/kernel-depmod/System.map-\$4 ] || [ "\$kernelabi" != "\$4" ]; then
-    echo "Unable to read: ${PKGDATA_DIR}/kernel-depmod/System.map-\$4" >&2
+if [ ! -r ${PKGDATA_DIR}/\${kernelpkgname}-depmod/System.map-\$4 ] || [ "\$kernelabi" != "\$4" ]; then
+    echo "Unable to read: ${PKGDATA_DIR}/\${kernelpkgname}-depmod/System.map-\$4" >&2
     exec env depmod -C "\$3${nonarch_base_libdir}/depmod.d" "\$1" "\$2" "\$3" "\$4"
 else
-    exec env depmod -C "\$3${nonarch_base_libdir}/depmod.d" "\$1" "\$2" "\$3" -F "${PKGDATA_DIR}/kernel-depmod/System.map-\$4" "\$4"
+    exec env depmod -C "\$3${nonarch_base_libdir}/depmod.d" "\$1" "\$2" "\$3" -F "${PKGDATA_DIR}/\${kernelpkgname}-depmod/System.map-\$4" "\$4"
 fi
 EOF
 	chmod +x ${D}${bindir_crossscripts}/depmodwrapper
diff --git a/poky/meta/recipes-kernel/linux-firmware/linux-firmware_20220708.bb b/poky/meta/recipes-kernel/linux-firmware/linux-firmware_20220913.bb
similarity index 96%
rename from poky/meta/recipes-kernel/linux-firmware/linux-firmware_20220708.bb
rename to poky/meta/recipes-kernel/linux-firmware/linux-firmware_20220913.bb
index 91c32e4..45c9d0e 100644
--- a/poky/meta/recipes-kernel/linux-firmware/linux-firmware_20220708.bb
+++ b/poky/meta/recipes-kernel/linux-firmware/linux-firmware_20220913.bb
@@ -132,7 +132,7 @@
                     "
 # WHENCE checksum is defined separately to ease overriding it if
 # class-devupstream is selected.
-WHENCE_CHKSUM  = "def08711eb23ba967fb7e1f8cff66178"
+WHENCE_CHKSUM  = "98ecc3d3223df7ebdc23b0ec56aafb20"
 
 # These are not common licenses, set NO_GENERIC_LICENSE for them
 # so that the license files will be copied from fetched source
@@ -209,7 +209,7 @@
 # Pin this to the 20220509 release, override this in local.conf
 SRCREV:class-devupstream ?= "b19cbdca78ab2adfd210c91be15a22568e8b8cae"
 
-SRC_URI[sha256sum] = "0abec827a035c82bdcabdf82aa37ded247bc682ef05861bd409ea6f477bab81d"
+SRC_URI[sha256sum] = "26fd00f2d8e96c4af6f44269a6b893eb857253044f75ad28ef6706a2250cd8e9"
 
 inherit allarch
 
@@ -311,6 +311,11 @@
              ${PN}-qcom-adreno-a2xx ${PN}-qcom-adreno-a3xx ${PN}-qcom-adreno-a4xx ${PN}-qcom-adreno-a530 \
              ${PN}-qcom-adreno-a630 ${PN}-qcom-adreno-a650 ${PN}-qcom-adreno-a660 \
              ${PN}-qcom-apq8096-audio ${PN}-qcom-apq8096-modem \
+             ${PN}-qcom-sc8280xp-lenovo-x13s-compat \
+             ${PN}-qcom-sc8280xp-lenovo-x13s-audio \
+             ${PN}-qcom-sc8280xp-lenovo-x13s-adreno \
+             ${PN}-qcom-sc8280xp-lenovo-x13s-compute \
+             ${PN}-qcom-sc8280xp-lenovo-x13s-sensors \
              ${PN}-qcom-sdm845-audio ${PN}-qcom-sdm845-compute ${PN}-qcom-sdm845-modem \
              ${PN}-qcom-sm8250-audio ${PN}-qcom-sm8250-compute \
              ${PN}-amlogic-vdec-license ${PN}-amlogic-vdec \
@@ -976,6 +981,11 @@
 FILES:${PN}-qcom-adreno-a660 = "${nonarch_base_libdir}/firmware/qcom/a660*.*"
 FILES:${PN}-qcom-apq8096-audio = "${nonarch_base_libdir}/firmware/qcom/apq8096/adsp*.*"
 FILES:${PN}-qcom-apq8096-modem = "${nonarch_base_libdir}/firmware/qcom/apq8096/mba.mbn ${nonarch_base_libdir}/firmware/qcom/apq8096/modem*.* ${nonarch_base_libdir}/firmware/qcom/apq8096/wlanmdsp.mbn"
+FILES:${PN}-qcom-sc8280xp-lenovo-x13s-compat = "${nonarch_base_libdir}/firmware/qcom/LENOVO/21BX"
+FILES:${PN}-qcom-sc8280xp-lenovo-x13s-audio = "${nonarch_base_libdir}/firmware/qcom/sc8280xp/LENOVO/21BX/*adsp*.* ${nonarch_base_libdir}/firmware/qcom/sc8280xp/LENOVO/21BX/battmgr.jsn"
+FILES:${PN}-qcom-sc8280xp-lenovo-x13s-adreno = "${nonarch_base_libdir}/firmware/qcom/sc8280xp/LENOVO/21BX/qcdxkmsuc8280.mbn"
+FILES:${PN}-qcom-sc8280xp-lenovo-x13s-compute = "${nonarch_base_libdir}/firmware/qcom/sc8280xp/LENOVO/21BX/*cdsp*.*"
+FILES:${PN}-qcom-sc8280xp-lenovo-x13s-sensors = "${nonarch_base_libdir}/firmware/qcom/sc8280xp/LENOVO/21BX/*slpi*.*"
 FILES:${PN}-qcom-sdm845-audio = "${nonarch_base_libdir}/firmware/qcom/sdm845/adsp*.*"
 FILES:${PN}-qcom-sdm845-compute = "${nonarch_base_libdir}/firmware/qcom/sdm845/cdsp*.*"
 FILES:${PN}-qcom-sdm845-modem = "${nonarch_base_libdir}/firmware/qcom/sdm845/mba.mbn ${nonarch_base_libdir}/firmware/qcom/sdm845/modem*.* ${nonarch_base_libdir}/firmware/qcom/sdm845/wlanmdsp.mbn"
@@ -996,12 +1006,21 @@
 RDEPENDS:${PN}-qcom-adreno-a660 = "${PN}-qcom-license"
 RDEPENDS:${PN}-qcom-apq8096-audio = "${PN}-qcom-license"
 RDEPENDS:${PN}-qcom-apq8096-modem = "${PN}-qcom-license"
+RDEPENDS:${PN}-qcom-sc8280xp-lenovo-x13s-audio = "${PN}-qcom-license"
+RDEPENDS:${PN}-qcom-sc8280xp-lenovo-x13s-adreno = "${PN}-qcom-license"
+RDEPENDS:${PN}-qcom-sc8280xp-lenovo-x13s-compute = "${PN}-qcom-license"
+RDEPENDS:${PN}-qcom-sc8280xp-lenovo-x13s-sensors = "${PN}-qcom-license"
 RDEPENDS:${PN}-qcom-sdm845-audio = "${PN}-qcom-license"
 RDEPENDS:${PN}-qcom-sdm845-compute = "${PN}-qcom-license"
 RDEPENDS:${PN}-qcom-sdm845-modem = "${PN}-qcom-license"
 RDEPENDS:${PN}-qcom-sm8250-audio = "${PN}-qcom-license"
 RDEPENDS:${PN}-qcom-sm8250-compute = "${PN}-qcom-license"
 
+RRECOMMENDS:${PN}-qcom-sc8280xp-lenovo-x13s-audio = "${PN}-qcom-sc8280xp-lenovo-x13s-compat"
+RRECOMMENDS:${PN}-qcom-sc8280xp-lenovo-x13s-adreno = "${PN}-qcom-sc8280xp-lenovo-x13s-compat"
+RRECOMMENDS:${PN}-qcom-sc8280xp-lenovo-x13s-compute = "${PN}-qcom-sc8280xp-lenovo-x13s-compat"
+RRECOMMENDS:${PN}-qcom-sc8280xp-lenovo-x13s-sensors = "${PN}-qcom-sc8280xp-lenovo-x13s-compat"
+
 FILES:${PN}-liquidio = "${nonarch_base_libdir}/firmware/liquidio"
 
 # For Amlogic VDEC
@@ -1081,3 +1100,6 @@
 # Firmware files are generally not ran on the CPU, so they can be
 # allarch despite being architecture specific
 INSANE_SKIP = "arch"
+
+# Don't warn about already stripped files
+INSANE_SKIP:${PN} = "already-stripped"
diff --git a/poky/meta/recipes-kernel/linux-libc-headers/linux-libc-headers_5.19.bb b/poky/meta/recipes-kernel/linux-libc-headers/linux-libc-headers_5.19.bb
index 528e1d3..3557526 100644
--- a/poky/meta/recipes-kernel/linux-libc-headers/linux-libc-headers_5.19.bb
+++ b/poky/meta/recipes-kernel/linux-libc-headers/linux-libc-headers_5.19.bb
@@ -7,7 +7,7 @@
     file://0001-include-linux-stddef.h-in-swab.h-uapi-header.patch \
    "
 
-SRC_URI:append = "\
+SRC_URI += "\
     file://0001-scripts-Use-fixed-input-and-output-files-instead-of-.patch \
     file://0001-kbuild-install_headers.sh-Strip-_UAPI-from-if-define.patch \
 "
diff --git a/poky/meta/recipes-kernel/linux/linux-yocto-rt_5.15.bb b/poky/meta/recipes-kernel/linux/linux-yocto-rt_5.15.bb
index 9e37494..6f8648e 100644
--- a/poky/meta/recipes-kernel/linux/linux-yocto-rt_5.15.bb
+++ b/poky/meta/recipes-kernel/linux/linux-yocto-rt_5.15.bb
@@ -11,13 +11,13 @@
         raise bb.parse.SkipRecipe("Set PREFERRED_PROVIDER_virtual/kernel to linux-yocto-rt to enable it")
 }
 
-SRCREV_machine ?= "cb561ee4438e5961e5c471eee8094737ca873135"
-SRCREV_meta ?= "59c8898d450152a0875af340e6f0e72d05aafdfa"
+SRCREV_machine ?= "dba1b7d90813231782bdeda1bd169c93b35c94e0"
+SRCREV_meta ?= "1128d7bcdcde490d4f35cc00c97f5410bb240d99"
 
 SRC_URI = "git://git.yoctoproject.org/linux-yocto.git;branch=${KBRANCH};name=machine \
            git://git.yoctoproject.org/yocto-kernel-cache;type=kmeta;name=meta;branch=yocto-5.15;destsuffix=${KMETA}"
 
-LINUX_VERSION ?= "5.15.62"
+LINUX_VERSION ?= "5.15.68"
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=6bc538ed5bd9a7fc9398086aedcd7e46"
 
diff --git a/poky/meta/recipes-kernel/linux/linux-yocto-rt_5.19.bb b/poky/meta/recipes-kernel/linux/linux-yocto-rt_5.19.bb
index c12bec3..b3e9fba 100644
--- a/poky/meta/recipes-kernel/linux/linux-yocto-rt_5.19.bb
+++ b/poky/meta/recipes-kernel/linux/linux-yocto-rt_5.19.bb
@@ -11,13 +11,13 @@
         raise bb.parse.SkipRecipe("Set PREFERRED_PROVIDER_virtual/kernel to linux-yocto-rt to enable it")
 }
 
-SRCREV_machine ?= "df2290e83a50563688e5ea0be34e091f1c623069"
-SRCREV_meta ?= "5eb0fa93f8490a962ff0c36c14d8def271d75128"
+SRCREV_machine ?= "bc8af638c00f28a46e77e34056079087638f6e65"
+SRCREV_meta ?= "350b544d077955b599b54ab364f6227d96a90455"
 
 SRC_URI = "git://git.yoctoproject.org/linux-yocto.git;branch=${KBRANCH};name=machine \
            git://git.yoctoproject.org/yocto-kernel-cache;type=kmeta;name=meta;branch=yocto-5.19;destsuffix=${KMETA}"
 
-LINUX_VERSION ?= "5.19.3"
+LINUX_VERSION ?= "5.19.9"
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=6bc538ed5bd9a7fc9398086aedcd7e46"
 
diff --git a/poky/meta/recipes-kernel/linux/linux-yocto-tiny_5.15.bb b/poky/meta/recipes-kernel/linux/linux-yocto-tiny_5.15.bb
index 2de32ff..4f2bb48 100644
--- a/poky/meta/recipes-kernel/linux/linux-yocto-tiny_5.15.bb
+++ b/poky/meta/recipes-kernel/linux/linux-yocto-tiny_5.15.bb
@@ -5,7 +5,7 @@
 
 require recipes-kernel/linux/linux-yocto.inc
 
-LINUX_VERSION ?= "5.15.62"
+LINUX_VERSION ?= "5.15.68"
 LIC_FILES_CHKSUM = "file://COPYING;md5=6bc538ed5bd9a7fc9398086aedcd7e46"
 
 DEPENDS += "${@bb.utils.contains('ARCH', 'x86', 'elfutils-native', '', d)}"
@@ -14,8 +14,8 @@
 KMETA = "kernel-meta"
 KCONF_BSP_AUDIT_LEVEL = "2"
 
-SRCREV_machine ?= "b708cb8412758a382516bdc46f26a0b43c50fb82"
-SRCREV_meta ?= "59c8898d450152a0875af340e6f0e72d05aafdfa"
+SRCREV_machine ?= "33e7eea5c4545a973cf01a849c2b45fa0cd1fa13"
+SRCREV_meta ?= "1128d7bcdcde490d4f35cc00c97f5410bb240d99"
 
 PV = "${LINUX_VERSION}+git${SRCPV}"
 
diff --git a/poky/meta/recipes-kernel/linux/linux-yocto-tiny_5.19.bb b/poky/meta/recipes-kernel/linux/linux-yocto-tiny_5.19.bb
index 339f7f6..466b706 100644
--- a/poky/meta/recipes-kernel/linux/linux-yocto-tiny_5.19.bb
+++ b/poky/meta/recipes-kernel/linux/linux-yocto-tiny_5.19.bb
@@ -5,7 +5,7 @@
 
 require recipes-kernel/linux/linux-yocto.inc
 
-LINUX_VERSION ?= "5.19.3"
+LINUX_VERSION ?= "5.19.9"
 LIC_FILES_CHKSUM = "file://COPYING;md5=6bc538ed5bd9a7fc9398086aedcd7e46"
 
 DEPENDS += "${@bb.utils.contains('ARCH', 'x86', 'elfutils-native', '', d)}"
@@ -14,8 +14,8 @@
 KMETA = "kernel-meta"
 KCONF_BSP_AUDIT_LEVEL = "2"
 
-SRCREV_machine ?= "4d933456709d664a55fdda85304c08567265ad4d"
-SRCREV_meta ?= "5eb0fa93f8490a962ff0c36c14d8def271d75128"
+SRCREV_machine ?= "65a9a22786a6710fc7b4edb7cfae80dd83f591f1"
+SRCREV_meta ?= "350b544d077955b599b54ab364f6227d96a90455"
 
 PV = "${LINUX_VERSION}+git${SRCPV}"
 
diff --git a/poky/meta/recipes-kernel/linux/linux-yocto_5.15.bb b/poky/meta/recipes-kernel/linux/linux-yocto_5.15.bb
index 40c430a..2f91fb7 100644
--- a/poky/meta/recipes-kernel/linux/linux-yocto_5.15.bb
+++ b/poky/meta/recipes-kernel/linux/linux-yocto_5.15.bb
@@ -13,24 +13,24 @@
 KBRANCH:qemux86-64 ?= "v5.15/standard/base"
 KBRANCH:qemumips64 ?= "v5.15/standard/mti-malta64"
 
-SRCREV_machine:qemuarm ?= "9b096ff3914926ac68501bf156c2d1368f3ebe6c"
-SRCREV_machine:qemuarm64 ?= "7cb30c5e95067ad12b7c4d371c048c7f5d5c922c"
-SRCREV_machine:qemumips ?= "3210fe826ade54d891cf2120c964d2a0dc3e7393"
-SRCREV_machine:qemuppc ?= "7bfdc3608327b9c471008af370dbffe053f5bed9"
-SRCREV_machine:qemuriscv64 ?= "14879dcc3ca7b24d8650cf117c380a94bb865f40"
-SRCREV_machine:qemuriscv32 ?= "14879dcc3ca7b24d8650cf117c380a94bb865f40"
-SRCREV_machine:qemux86 ?= "14879dcc3ca7b24d8650cf117c380a94bb865f40"
-SRCREV_machine:qemux86-64 ?= "14879dcc3ca7b24d8650cf117c380a94bb865f40"
-SRCREV_machine:qemumips64 ?= "ef125626d718771f11fab19a3f91cca5ec27f887"
-SRCREV_machine ?= "14879dcc3ca7b24d8650cf117c380a94bb865f40"
-SRCREV_meta ?= "59c8898d450152a0875af340e6f0e72d05aafdfa"
+SRCREV_machine:qemuarm ?= "efe28b4b16d4a1a19f59b4650a0bfb23ffc8c40e"
+SRCREV_machine:qemuarm64 ?= "66986670c45f63d2ed2078e07aa817ede88025ad"
+SRCREV_machine:qemumips ?= "aeeb80fd7f684aca830adb7daf32cfd80637cf3a"
+SRCREV_machine:qemuppc ?= "5c6387a562af89ec92546c1374a120ac240f14e6"
+SRCREV_machine:qemuriscv64 ?= "0e51e571701842db33ad96f6ddc8cc6b23230627"
+SRCREV_machine:qemuriscv32 ?= "0e51e571701842db33ad96f6ddc8cc6b23230627"
+SRCREV_machine:qemux86 ?= "0e51e571701842db33ad96f6ddc8cc6b23230627"
+SRCREV_machine:qemux86-64 ?= "0e51e571701842db33ad96f6ddc8cc6b23230627"
+SRCREV_machine:qemumips64 ?= "20ec37851f4ee9965120937dcf2567f15e72e07a"
+SRCREV_machine ?= "0e51e571701842db33ad96f6ddc8cc6b23230627"
+SRCREV_meta ?= "1128d7bcdcde490d4f35cc00c97f5410bb240d99"
 
 # set your preferred provider of linux-yocto to 'linux-yocto-upstream', and you'll
 # get the <version>/base branch, which is pure upstream -stable, and the same
 # meta SRCREV as the linux-yocto-standard builds. Select your version using the
 # normal PREFERRED_VERSION settings.
 BBCLASSEXTEND = "devupstream:target"
-SRCREV_machine:class-devupstream ?= "a0a7e0b2b8b22901945ea2aef1b65871d718accf"
+SRCREV_machine:class-devupstream ?= "dd20085f2a88b6cdb12bdcdbd2d7a761c86b184a"
 PN:class-devupstream = "linux-yocto-upstream"
 KBRANCH:class-devupstream = "v5.15/base"
 
@@ -38,7 +38,7 @@
            git://git.yoctoproject.org/yocto-kernel-cache;type=kmeta;name=meta;branch=yocto-5.15;destsuffix=${KMETA}"
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=6bc538ed5bd9a7fc9398086aedcd7e46"
-LINUX_VERSION ?= "5.15.62"
+LINUX_VERSION ?= "5.15.68"
 
 DEPENDS += "${@bb.utils.contains('ARCH', 'x86', 'elfutils-native', '', d)}"
 DEPENDS += "openssl-native util-linux-native"
diff --git a/poky/meta/recipes-kernel/linux/linux-yocto_5.19.bb b/poky/meta/recipes-kernel/linux/linux-yocto_5.19.bb
index 0ff28aa..f882972 100644
--- a/poky/meta/recipes-kernel/linux/linux-yocto_5.19.bb
+++ b/poky/meta/recipes-kernel/linux/linux-yocto_5.19.bb
@@ -13,24 +13,24 @@
 KBRANCH:qemux86-64 ?= "v5.19/standard/base"
 KBRANCH:qemumips64 ?= "v5.19/standard/mti-malta64"
 
-SRCREV_machine:qemuarm ?= "2cbb2d5097fc44a23da635d2ebbccb33df20a34d"
-SRCREV_machine:qemuarm64 ?= "4d933456709d664a55fdda85304c08567265ad4d"
-SRCREV_machine:qemumips ?= "7741c5b2f536b99815329849cca09799cdb82e62"
-SRCREV_machine:qemuppc ?= "4d933456709d664a55fdda85304c08567265ad4d"
-SRCREV_machine:qemuriscv64 ?= "4d933456709d664a55fdda85304c08567265ad4d"
-SRCREV_machine:qemuriscv32 ?= "4d933456709d664a55fdda85304c08567265ad4d"
-SRCREV_machine:qemux86 ?= "4d933456709d664a55fdda85304c08567265ad4d"
-SRCREV_machine:qemux86-64 ?= "4d933456709d664a55fdda85304c08567265ad4d"
-SRCREV_machine:qemumips64 ?= "4ced38bbd45f6cb623728bd755894928a719edac"
-SRCREV_machine ?= "4d933456709d664a55fdda85304c08567265ad4d"
-SRCREV_meta ?= "5eb0fa93f8490a962ff0c36c14d8def271d75128"
+SRCREV_machine:qemuarm ?= "446661f6a3b07535304497c1a51d9cab95f48f0b"
+SRCREV_machine:qemuarm64 ?= "65a9a22786a6710fc7b4edb7cfae80dd83f591f1"
+SRCREV_machine:qemumips ?= "98da147618fca3da29cf1c6ab9c53f24de2c587c"
+SRCREV_machine:qemuppc ?= "65a9a22786a6710fc7b4edb7cfae80dd83f591f1"
+SRCREV_machine:qemuriscv64 ?= "65a9a22786a6710fc7b4edb7cfae80dd83f591f1"
+SRCREV_machine:qemuriscv32 ?= "65a9a22786a6710fc7b4edb7cfae80dd83f591f1"
+SRCREV_machine:qemux86 ?= "65a9a22786a6710fc7b4edb7cfae80dd83f591f1"
+SRCREV_machine:qemux86-64 ?= "65a9a22786a6710fc7b4edb7cfae80dd83f591f1"
+SRCREV_machine:qemumips64 ?= "53842054198d851b5deb5810afaf126156efbb54"
+SRCREV_machine ?= "65a9a22786a6710fc7b4edb7cfae80dd83f591f1"
+SRCREV_meta ?= "350b544d077955b599b54ab364f6227d96a90455"
 
 # set your preferred provider of linux-yocto to 'linux-yocto-upstream', and you'll
 # get the <version>/base branch, which is pure upstream -stable, and the same
 # meta SRCREV as the linux-yocto-standard builds. Select your version using the
 # normal PREFERRED_VERSION settings.
 BBCLASSEXTEND = "devupstream:target"
-SRCREV_machine:class-devupstream ?= "bf44eed7f2fc9af74eb72f4bc415bdd3d11c4bed"
+SRCREV_machine:class-devupstream ?= "d1105a680e66b0482bd18048534c58ecabb5c284"
 PN:class-devupstream = "linux-yocto-upstream"
 KBRANCH:class-devupstream = "v5.19/base"
 
@@ -38,7 +38,7 @@
            git://git.yoctoproject.org/yocto-kernel-cache;type=kmeta;name=meta;branch=yocto-5.19;destsuffix=${KMETA}"
 
 LIC_FILES_CHKSUM = "file://COPYING;md5=6bc538ed5bd9a7fc9398086aedcd7e46"
-LINUX_VERSION ?= "5.19.3"
+LINUX_VERSION ?= "5.19.9"
 
 DEPENDS += "${@bb.utils.contains('ARCH', 'x86', 'elfutils-native', '', d)}"
 DEPENDS += "openssl-native util-linux-native"
@@ -56,7 +56,7 @@
 # Functionality flags
 KERNEL_EXTRA_FEATURES ?= "features/netfilter/netfilter.scc"
 KERNEL_FEATURES:append = " ${KERNEL_EXTRA_FEATURES}"
-KERNEL_FEATURES:append:qemuall=" cfg/virtio.scc features/drm-bochs/drm-bochs.scc"
+KERNEL_FEATURES:append:qemuall=" cfg/virtio.scc features/drm-bochs/drm-bochs.scc cfg/net/mdio.scc"
 KERNEL_FEATURES:append:qemux86=" cfg/sound.scc cfg/paravirt_kvm.scc"
 KERNEL_FEATURES:append:qemux86-64=" cfg/sound.scc cfg/paravirt_kvm.scc"
 KERNEL_FEATURES:append = " ${@bb.utils.contains("TUNE_FEATURES", "mx32", " cfg/x32.scc", "", d)}"
diff --git a/poky/meta/recipes-kernel/lttng/lttng-platforms.inc b/poky/meta/recipes-kernel/lttng/lttng-platforms.inc
index 933c65d..900e36d 100644
--- a/poky/meta/recipes-kernel/lttng/lttng-platforms.inc
+++ b/poky/meta/recipes-kernel/lttng/lttng-platforms.inc
@@ -15,3 +15,7 @@
 
 COMPATIBLE_HOST:arc:pn-lttng-ust = "null"
 
+# Whether the platform supports lttng-tools
+# lttng-tools requires SYS_ppoll and SYS_pselect6 which are not supported on riscv32.
+# It's also turned off for riscv32 in meta-riscv. See https://github.com/riscv/meta-riscv/blob/master/conf/layer.conf
+COMPATIBLE_HOST:riscv32:pn-lttng-tools = "null"
diff --git a/poky/meta/recipes-kernel/perf/perf.bb b/poky/meta/recipes-kernel/perf/perf.bb
index 9f7c300..31bc046 100644
--- a/poky/meta/recipes-kernel/perf/perf.bb
+++ b/poky/meta/recipes-kernel/perf/perf.bb
@@ -145,6 +145,9 @@
 	# we are checking for this make target to be compatible with older perf versions
 	if ${@bb.utils.contains('PACKAGECONFIG', 'scripting', 'true', 'false', d)} && grep -q install-python_ext ${S}/tools/perf/Makefile*; then
 	    oe_runmake DESTDIR=${D} install-python_ext
+	    if [ -e ${D}${libdir}/python*/site-packages/perf-*/SOURCES.txt ]; then
+		sed -i -e 's#${WORKDIR}##g' ${D}${libdir}/python*/site-packages/perf-*/SOURCES.txt
+	    fi
 	fi
 }
 
@@ -245,6 +248,9 @@
         # change the Makefile line to remove everything before 'tools/perf'
         sed -i -e "s%srcdir_SQ = \$(subst ','\\\'',\$(srcdir))%srcdir_SQ = \$(patsubst \%tools/perf,tools/perf,\$(subst ','\\\'',\$(srcdir)))%g" \
             ${S}/tools/perf/Makefile.config
+        # Avoid hardcoded path to python-native
+        sed -i -e 's#\(PYTHON_WORD := \)$(call shell-wordify,$(PYTHON))#\1 python3#g' \
+            ${S}/tools/perf/Makefile.config
     fi
     if [ -e "${S}/tools/perf/tests/Build" ]; then
         # OUTPUT is the full path, we have python on the path so we remove it from the
diff --git a/poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.bb b/poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.1.bb
similarity index 98%
rename from poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.bb
rename to poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.1.bb
index bb507b4..2306fe4 100644
--- a/poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.bb
+++ b/poky/meta/recipes-multimedia/ffmpeg/ffmpeg_5.1.1.bb
@@ -23,7 +23,7 @@
                     file://COPYING.LGPLv3;md5=e6a600fd5e1d9cbde2d983680233ad02"
 
 SRC_URI = "https://www.ffmpeg.org/releases/${BP}.tar.xz"
-SRC_URI[sha256sum] = "55eb6aab5ee235550fa54a33eaf8bf1b4ec66c01453182b12f6a993d75698b03"
+SRC_URI[sha256sum] = "95bf3ff8c496511e71e958fb249e663c8c9c3de583c5bebc0f5a9745abbc0435"
 
 # Build fails when thumb is enabled: https://bugzilla.yoctoproject.org/show_bug.cgi?id=7717
 ARM_INSTRUCTION_SET:armv4 = "arm"
diff --git a/poky/meta/recipes-multimedia/flac/flac_1.3.4.bb b/poky/meta/recipes-multimedia/flac/flac_1.3.4.bb
deleted file mode 100644
index 012da0a..0000000
--- a/poky/meta/recipes-multimedia/flac/flac_1.3.4.bb
+++ /dev/null
@@ -1,45 +0,0 @@
-SUMMARY = "Free Lossless Audio Codec"
-DESCRIPTION = "FLAC stands for Free Lossless Audio Codec, a lossless audio compression format."
-HOMEPAGE = "https://xiph.org/flac/"
-BUGTRACKER = "http://sourceforge.net/p/flac/bugs/"
-SECTION = "libs"
-LICENSE = "GFDL-1.2 & GPL-2.0-or-later & LGPL-2.1-or-later & BSD-3-Clause"
-LIC_FILES_CHKSUM = "file://COPYING.FDL;md5=ad1419ecc56e060eccf8184a87c4285f \
-                    file://src/Makefile.am;beginline=1;endline=17;md5=09501c864f89dfc7ead65553129817ca \
-                    file://COPYING.GPL;md5=b234ee4d69f5fce4486a80fdaf4a4263 \
-                    file://src/flac/main.c;beginline=1;endline=18;md5=09777e2934947a36f13568d0beb81199 \
-                    file://COPYING.LGPL;md5=fbc093901857fcd118f065f900982c24 \
-                    file://src/plugin_common/all.h;beginline=1;endline=18;md5=f56cb4ba9a3bc9ec6102e8df03215271 \
-                    file://COPYING.Xiph;md5=b59c1b6d7fc0fb7965f821a3d36505e3 \
-                    file://include/FLAC/all.h;beginline=65;endline=70;md5=64474f2b22e9e77b28d8b8b25c983a48"
-DEPENDS = "libogg"
-
-SRC_URI = "http://downloads.xiph.org/releases/flac/${BP}.tar.xz \
-"
-
-SRC_URI[sha256sum] = "8ff0607e75a322dd7cd6ec48f4f225471404ae2730d0ea945127b1355155e737"
-
-CVE_PRODUCT = "libflac flac"
-
-inherit autotools gettext
-
-EXTRA_OECONF = "--disable-oggtest \
-                --with-ogg-libraries=${STAGING_LIBDIR} \
-                --with-ogg-includes=${STAGING_INCDIR} \
-                --disable-xmms-plugin \
-                --without-libiconv-prefix \
-                ac_cv_prog_NASM="" \
-                "
-
-EXTRA_OECONF += "${@bb.utils.contains("TUNE_FEATURES", "altivec", " --enable-altivec", " --disable-altivec", d)}"
-EXTRA_OECONF += "${@bb.utils.contains("TUNE_FEATURES", "vsx", " --enable-vsx", " --disable-vsx", d)}"
-EXTRA_OECONF += "${@bb.utils.contains("TUNE_FEATURES", "core2", " --enable-sse", "", d)}"
-EXTRA_OECONF += "${@bb.utils.contains("TUNE_FEATURES", "corei7", " --enable-sse", "", d)}"
-
-PACKAGES += "libflac libflac++ liboggflac liboggflac++"
-FILES:${PN} = "${bindir}/*"
-FILES:libflac = "${libdir}/libFLAC.so.*"
-FILES:libflac++ = "${libdir}/libFLAC++.so.*"
-FILES:liboggflac = "${libdir}/libOggFLAC.so.*"
-FILES:liboggflac++ = "${libdir}/libOggFLAC++.so.*"
-
diff --git a/poky/meta/recipes-multimedia/flac/flac_1.4.0.bb b/poky/meta/recipes-multimedia/flac/flac_1.4.0.bb
new file mode 100644
index 0000000..3603963
--- /dev/null
+++ b/poky/meta/recipes-multimedia/flac/flac_1.4.0.bb
@@ -0,0 +1,43 @@
+SUMMARY = "Free Lossless Audio Codec"
+DESCRIPTION = "FLAC stands for Free Lossless Audio Codec, a lossless audio compression format."
+HOMEPAGE = "https://xiph.org/flac/"
+BUGTRACKER = "https://github.com/xiph/flac/issues"
+SECTION = "libs"
+LICENSE = "GFDL-1.2 & GPL-2.0-or-later & LGPL-2.1-or-later & BSD-3-Clause"
+LIC_FILES_CHKSUM = "file://COPYING.FDL;md5=ad1419ecc56e060eccf8184a87c4285f \
+                    file://src/Makefile.am;beginline=1;endline=17;md5=146d2c8c2fd287545cc1bd81f31e8758 \
+                    file://COPYING.GPL;md5=b234ee4d69f5fce4486a80fdaf4a4263 \
+                    file://src/flac/main.c;beginline=1;endline=18;md5=893456854ce6bf14a1a7ea77266eebab \
+                    file://COPYING.LGPL;md5=fbc093901857fcd118f065f900982c24 \
+                    file://src/plugin_common/all.h;beginline=1;endline=18;md5=73c74192ce89ee6238d15a171e00c971 \
+                    file://COPYING.Xiph;md5=3d6da238b5b57a0965d6730291119f65 \
+                    file://include/FLAC/all.h;beginline=65;endline=70;md5=64474f2b22e9e77b28d8b8b25c983a48"
+
+SRC_URI = "http://downloads.xiph.org/releases/flac/${BP}.tar.xz"
+SRC_URI[sha256sum] = "af41c0733c93c237c3e52f64dd87e3b0d9af38259f1c7d11e8cbf583c48c2506"
+
+CVE_PRODUCT = "libflac flac"
+
+inherit autotools gettext
+
+EXTRA_OECONF = "--disable-oggtest \
+                --disable-xmms-plugin \
+                --without-libiconv-prefix \
+                ac_cv_prog_NASM="" \
+                "
+
+PACKAGECONFIG ??= " \
+    ${@bb.utils.filter("TUNE_FEATURES", "altivec vsx", d)} \
+    ${@bb.utils.contains_any("TUNE_FEATURES", "core2 corei7", "sse", "", d)} \
+    ogg \
+"
+PACKAGECONFIG[sse] = "--enable-sse,--disable-sse"
+PACKAGECONFIG[altivec] = "--enable-altivec,--disable-altivec"
+PACKAGECONFIG[vsx] = "--enable-vsx,--disable-vsx"
+PACKAGECONFIG[avx] = "--enable-avx,--disable-avx"
+PACKAGECONFIG[ogg] = "--enable-ogg --with-ogg-libraries=${STAGING_LIBDIR} --with-ogg-includes=${STAGING_INCDIR},--disable-ogg,libogg"
+
+PACKAGES += "libflac libflac++"
+FILES:${PN} = "${bindir}/*"
+FILES:libflac = "${libdir}/libFLAC.so.*"
+FILES:libflac++ = "${libdir}/libFLAC++.so.*"
diff --git a/poky/meta/recipes-multimedia/libpng/libpng_1.6.37.bb b/poky/meta/recipes-multimedia/libpng/libpng_1.6.38.bb
similarity index 83%
rename from poky/meta/recipes-multimedia/libpng/libpng_1.6.37.bb
rename to poky/meta/recipes-multimedia/libpng/libpng_1.6.38.bb
index 61e3d92..dc62720 100644
--- a/poky/meta/recipes-multimedia/libpng/libpng_1.6.37.bb
+++ b/poky/meta/recipes-multimedia/libpng/libpng_1.6.38.bb
@@ -5,14 +5,13 @@
 HOMEPAGE = "http://www.libpng.org/"
 SECTION = "libs"
 LICENSE = "Libpng"
-LIC_FILES_CHKSUM = "file://LICENSE;md5=b0085051bf265bac2bfc38bc89f50000"
+LIC_FILES_CHKSUM = "file://LICENSE;md5=5c900cc124ba35a274073b5de7639b13"
 DEPENDS = "zlib"
 
 LIBV = "16"
 
 SRC_URI = "${SOURCEFORGE_MIRROR}/${BPN}/${BPN}${LIBV}/${BP}.tar.xz"
-SRC_URI[md5sum] = "015e8e15db1eecde5f2eb9eb5b6e59e9"
-SRC_URI[sha256sum] = "505e70834d35383537b6491e7ae8641f1a4bed1876dbfe361201fc80868d88ca"
+SRC_URI[sha256sum] = "b3683e8b8111ebf6f1ac004ebb6b0c975cd310ec469d98364388e9cedbfa68be"
 
 MIRRORS += "${SOURCEFORGE_MIRROR}/${BPN}/${BPN}${LIBV}/ ${SOURCEFORGE_MIRROR}/${BPN}/${BPN}${LIBV}/older-releases/"
 
diff --git a/poky/meta/recipes-multimedia/libsamplerate/libsamplerate0_0.2.2.bb b/poky/meta/recipes-multimedia/libsamplerate/libsamplerate0_0.2.2.bb
index ed2b643..29d48fd 100644
--- a/poky/meta/recipes-multimedia/libsamplerate/libsamplerate0_0.2.2.bb
+++ b/poky/meta/recipes-multimedia/libsamplerate/libsamplerate0_0.2.2.bb
@@ -7,18 +7,18 @@
                     file://src/samplerate.c;beginline=1;endline=7;md5=7a4238289dc36bfb70968ccaa5bd0d4f"
 DEPENDS = "libsndfile1"
 
-SRC_URI = "https://github.com/libsndfile/libsamplerate/releases/download/${PV}/libsamplerate-${PV}.tar.xz \
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/libsamplerate-${PV}.tar.xz \
 "
 
 SRC_URI[sha256sum] = "3258da280511d24b49d6b08615bbe824d0cacc9842b0e4caf11c52cf2b043893"
 
 CVE_PRODUCT = "libsamplerate"
 
-UPSTREAM_CHECK_URI = "https://github.com/libsndfile/libsamplerate/releases"
+GITHUB_BASE_URI = "https://github.com/libsndfile/libsamplerate/releases"
 
 S = "${WORKDIR}/libsamplerate-${PV}"
 
-inherit autotools pkgconfig
+inherit autotools pkgconfig github-releases
 
 # FFTW and ALSA are only used in tests and examples, so they don't affect
 # normal builds. It should be safe to ignore these, but explicitly disabling
diff --git a/poky/meta/recipes-multimedia/libsndfile/libsndfile1_1.1.0.bb b/poky/meta/recipes-multimedia/libsndfile/libsndfile1_1.1.0.bb
index 70626b3..1f1a401 100644
--- a/poky/meta/recipes-multimedia/libsndfile/libsndfile1_1.1.0.bb
+++ b/poky/meta/recipes-multimedia/libsndfile/libsndfile1_1.1.0.bb
@@ -8,10 +8,10 @@
 SECTION = "libs/multimedia"
 LICENSE = "LGPL-2.1-only"
 
-SRC_URI = "https://github.com/libsndfile/libsndfile/releases/download/${PV}/libsndfile-${PV}.tar.xz \
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/libsndfile-${PV}.tar.xz \
            file://noopus.patch \
           "
-UPSTREAM_CHECK_URI = "https://github.com/libsndfile/libsndfile/releases/"
+GITHUB_BASE_URI = "https://github.com/libsndfile/libsndfile/releases/"
 
 SRC_URI[sha256sum] = "0f98e101c0f7c850a71225fb5feaf33b106227b3d331333ddc9bacee190bcf41"
 
@@ -25,7 +25,7 @@
 PACKAGECONFIG[alsa] = "--enable-alsa,--disable-alsa,alsa-lib"
 PACKAGECONFIG[regtest] = "--enable-sqlite,--disable-sqlite,sqlite3"
 
-inherit autotools lib_package pkgconfig multilib_header
+inherit autotools lib_package pkgconfig multilib_header github-releases
 
 do_install:append() {
     oe_multilib_header sndfile.h
diff --git a/poky/meta/recipes-multimedia/libtiff/files/CVE-2022-2953.patch b/poky/meta/recipes-multimedia/libtiff/files/CVE-2022-2953.patch
new file mode 100644
index 0000000..98020ff
--- /dev/null
+++ b/poky/meta/recipes-multimedia/libtiff/files/CVE-2022-2953.patch
@@ -0,0 +1,86 @@
+CVE: CVE-2022-2053
+Upstream-Status: Backport
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+
+From 8fe3735942ea1d90d8cef843b55b3efe8ab6feaf Mon Sep 17 00:00:00 2001
+From: Su_Laus <sulau@freenet.de>
+Date: Mon, 15 Aug 2022 22:11:03 +0200
+Subject: [PATCH] =?UTF-8?q?According=20to=20Richard=20Nolde=20https://gitl?=
+ =?UTF-8?q?ab.com/libtiff/libtiff/-/issues/401#note=5F877637400=20the=20ti?=
+ =?UTF-8?q?ffcrop=20option=20=E2=80=9E-S=E2=80=9C=20is=20also=20mutually?=
+ =?UTF-8?q?=20exclusive=20to=20the=20other=20crop=20options=20(-X|-Y),=20-?=
+ =?UTF-8?q?Z=20and=20-z.?=
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+This is now checked and ends tiffcrop if those arguments are not mutually exclusive.
+
+This MR will fix the following tiffcrop issues: #349, #414, #422, #423, #424
+---
+ tools/tiffcrop.c | 31 ++++++++++++++++---------------
+ 1 file changed, 16 insertions(+), 15 deletions(-)
+
+diff --git a/tools/tiffcrop.c b/tools/tiffcrop.c
+index 90286a5e..c3b758ec 100644
+--- a/tools/tiffcrop.c
++++ b/tools/tiffcrop.c
+@@ -173,12 +173,12 @@ static   char tiffcrop_rev_date[] = "02-09-2022";
+ #define ROTATECW_270 32
+ #define ROTATE_ANY (ROTATECW_90 | ROTATECW_180 | ROTATECW_270)
+ 
+-#define CROP_NONE     0
+-#define CROP_MARGINS  1
+-#define CROP_WIDTH    2
+-#define CROP_LENGTH   4
+-#define CROP_ZONES    8
+-#define CROP_REGIONS 16
++#define CROP_NONE     0     /* "-S" -> Page_MODE_ROWSCOLS and page->rows/->cols != 0 */
++#define CROP_MARGINS  1     /* "-m" */
++#define CROP_WIDTH    2     /* "-X" */
++#define CROP_LENGTH   4     /* "-Y" */
++#define CROP_ZONES    8     /* "-Z" */
++#define CROP_REGIONS 16     /* "-z" */
+ #define CROP_ROTATE  32
+ #define CROP_MIRROR  64
+ #define CROP_INVERT 128
+@@ -316,7 +316,7 @@ struct crop_mask {
+ #define PAGE_MODE_RESOLUTION   1
+ #define PAGE_MODE_PAPERSIZE    2
+ #define PAGE_MODE_MARGINS      4
+-#define PAGE_MODE_ROWSCOLS     8
++#define PAGE_MODE_ROWSCOLS     8    /* for -S option */
+ 
+ #define INVERT_DATA_ONLY      10
+ #define INVERT_DATA_AND_TAG   11
+@@ -781,7 +781,7 @@ static const char usage_info[] =
+ "             The four debug/dump options are independent, though it makes little sense to\n"
+ "             specify a dump file without specifying a detail level.\n"
+ "\n"
+-"Note:        The (-X|-Y), -Z and -z options are mutually exclusive.\n"
++"Note:        The (-X|-Y), -Z, -z and -S options are mutually exclusive.\n"
+ "             In no case should the options be applied to a given selection successively.\n"
+ "\n"
+ ;
+@@ -2131,13 +2131,14 @@ void  process_command_opts (int argc, char *argv[], char *mp, char *mode, uint32
+ 		/*NOTREACHED*/
+       }
+     }
+-    /*-- Check for not allowed combinations (e.g. -X, -Y and -Z and -z are mutually exclusive) --*/
+-    char XY, Z, R;
++    /*-- Check for not allowed combinations (e.g. -X, -Y and -Z, -z and -S are mutually exclusive) --*/
++    char XY, Z, R, S;
+     XY = ((crop_data->crop_mode & CROP_WIDTH) || (crop_data->crop_mode & CROP_LENGTH));
+     Z = (crop_data->crop_mode & CROP_ZONES);
+     R = (crop_data->crop_mode & CROP_REGIONS);
+-    if ((XY && Z) || (XY && R) || (Z && R)) {
+-        TIFFError("tiffcrop input error", "The crop options(-X|-Y), -Z and -z are mutually exclusive.->Exit");
++    S = (page->mode & PAGE_MODE_ROWSCOLS);
++    if ((XY && Z) || (XY && R) || (XY && S) || (Z && R) || (Z && S) || (R && S)) {
++        TIFFError("tiffcrop input error", "The crop options(-X|-Y), -Z, -z and -S are mutually exclusive.->Exit");
+         exit(EXIT_FAILURE);
+     }
+   }  /* end process_command_opts */
+-- 
+2.34.1
+
diff --git a/poky/meta/recipes-multimedia/libtiff/tiff_4.4.0.bb b/poky/meta/recipes-multimedia/libtiff/tiff_4.4.0.bb
index e30df0b..caf6f60 100644
--- a/poky/meta/recipes-multimedia/libtiff/tiff_4.4.0.bb
+++ b/poky/meta/recipes-multimedia/libtiff/tiff_4.4.0.bb
@@ -11,6 +11,7 @@
 SRC_URI = "http://download.osgeo.org/libtiff/tiff-${PV}.tar.gz \
            file://0001-fix-the-FPE-in-tiffcrop-415-427-and-428.patch \
            file://CVE-2022-34526.patch \
+           file://CVE-2022-2953.patch \
            "
 
 SRC_URI[sha256sum] = "917223b37538959aca3b790d2d73aa6e626b688e02dcda272aec24c2f498abed"
diff --git a/poky/meta/recipes-sato/webkit/webkitgtk/0d3344e17d258106617b0e6d783d073b188a2548.patch b/poky/meta/recipes-sato/webkit/webkitgtk/0d3344e17d258106617b0e6d783d073b188a2548.patch
new file mode 100644
index 0000000..32f92f7
--- /dev/null
+++ b/poky/meta/recipes-sato/webkit/webkitgtk/0d3344e17d258106617b0e6d783d073b188a2548.patch
@@ -0,0 +1,296 @@
+From 0d3344e17d258106617b0e6d783d073b188a2548 Mon Sep 17 00:00:00 2001
+From: Adrian Perez de Castro <aperez@igalia.com>
+Date: Thu, 2 Jun 2022 11:19:06 +0300
+Subject: [PATCH] [ARM][NEON] FELightningNEON.cpp fails to build, NEON fast
+ path seems unused https://bugs.webkit.org/show_bug.cgi?id=241182
+
+Reviewed by NOBODY (OOPS!).
+
+Move the NEON fast path for the SVG lighting filter effects into
+FELightingSoftwareApplier, and arrange to actually use them by
+forwarding calls to applyPlatformGeneric() into applyPlatformNeon().
+
+Some changes were needed to adapt platformApplyNeon() to the current
+state of filters after r286140. This was not detected because the code
+bitrotted due to it being guarded with CPU(ARM_TRADITIONAL), which does
+not get used much these days: CPU(ARM_THUMB2) is more common. It should
+be possible to use the NEON fast paths also in Thumb mode, but that is
+left for a follow-up fix.
+
+* Source/WebCore/platform/graphics/cpu/arm/filters/FELightingNEON.cpp:
+(WebCore::FELightingSoftwareApplier::platformApplyNeonWorker):
+(WebCore::FELightingSoftwareApplier::getPowerCoefficients):
+(WebCore::FELighting::platformApplyNeonWorker): Deleted.
+(WebCore::FELighting::getPowerCoefficients): Deleted.
+* Source/WebCore/platform/graphics/cpu/arm/filters/FELightingNEON.h:
+(WebCore::FELightingSoftwareApplier::applyPlatformNeon):
+(WebCore::FELighting::platformApplyNeon): Deleted.
+* Source/WebCore/platform/graphics/filters/DistantLightSource.h:
+* Source/WebCore/platform/graphics/filters/FELighting.h:
+* Source/WebCore/platform/graphics/filters/PointLightSource.h:
+* Source/WebCore/platform/graphics/filters/SpotLightSource.h:
+* Source/WebCore/platform/graphics/filters/software/FELightingSoftwareApplier.h:
+---
+Upstream-Status: Submitted [https://github.com/WebKit/WebKit/pull/1233]
+Signed-off-by: Khem Raj <raj.khem@gmail.com>
+
+ .../cpu/arm/filters/FELightingNEON.cpp        |  4 +-
+ .../graphics/cpu/arm/filters/FELightingNEON.h | 54 +++++++++----------
+ .../graphics/filters/DistantLightSource.h     |  4 ++
+ .../platform/graphics/filters/FELighting.h    |  7 ---
+ .../graphics/filters/PointLightSource.h       |  4 ++
+ .../graphics/filters/SpotLightSource.h        |  4 ++
+ .../software/FELightingSoftwareApplier.h      | 16 ++++++
+ 7 files changed, 57 insertions(+), 36 deletions(-)
+
+--- a/Source/WebCore/platform/graphics/cpu/arm/filters/FELightingNEON.cpp
++++ b/Source/WebCore/platform/graphics/cpu/arm/filters/FELightingNEON.cpp
+@@ -49,7 +49,7 @@ short* feLightingConstantsForNeon()
+     return s_FELightingConstantsForNeon;
+ }
+ 
+-void FELighting::platformApplyNeonWorker(FELightingPaintingDataForNeon* parameters)
++void FELightingSoftwareApplier::platformApplyNeonWorker(FELightingPaintingDataForNeon* parameters)
+ {
+     neonDrawLighting(parameters);
+ }
+@@ -464,7 +464,7 @@ TOSTRING(neonDrawLighting) ":" NL
+     "b .lightStrengthCalculated" NL
+ ); // NOLINT
+ 
+-int FELighting::getPowerCoefficients(float exponent)
++int FELightingSoftwareApplier::getPowerCoefficients(float exponent)
+ {
+     // Calling a powf function from the assembly code would require to save
+     // and reload a lot of NEON registers. Since the base is in range [0..1]
+--- a/Source/WebCore/platform/graphics/cpu/arm/filters/FELightingNEON.h
++++ b/Source/WebCore/platform/graphics/cpu/arm/filters/FELightingNEON.h
+@@ -24,14 +24,15 @@
+  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+  */
+ 
+-#ifndef FELightingNEON_h
+-#define FELightingNEON_h
++#pragma once
+ 
+ #if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC_COMPATIBLE)
+ 
+-#include "FELighting.h"
++#include "FELightingSoftwareApplier.h"
++#include "ImageBuffer.h"
+ #include "PointLightSource.h"
+ #include "SpotLightSource.h"
++#include <wtf/ObjectIdentifier.h>
+ #include <wtf/ParallelJobs.h>
+ 
+ namespace WebCore {
+@@ -93,14 +94,14 @@ extern "C" {
+ void neonDrawLighting(FELightingPaintingDataForNeon*);
+ }
+ 
+-inline void FELighting::platformApplyNeon(const LightingData& data, const LightSource::PaintingData& paintingData)
++inline void FELightingSoftwareApplier::applyPlatformNeon(const FELightingSoftwareApplier::LightingData& data, const LightSource::PaintingData& paintingData)
+ {
+-    alignas(16) FELightingFloatArgumentsForNeon floatArguments;
+-    FELightingPaintingDataForNeon neonData = {
++    WebCore::FELightingFloatArgumentsForNeon alignas(16) floatArguments;
++    WebCore::FELightingPaintingDataForNeon neonData = {
+         data.pixels->data(),
+         1,
+-        data.widthDecreasedByOne - 1,
+-        data.heightDecreasedByOne - 1,
++        data.width - 2,
++        data.height - 2,
+         0,
+         0,
+         0,
+@@ -111,23 +112,23 @@ inline void FELighting::platformApplyNeo
+     // Set light source arguments.
+     floatArguments.constOne = 1;
+ 
+-    auto color = m_lightingColor.toColorTypeLossy<SRGBA<uint8_t>>().resolved();
++    auto color = data.lightingColor.toColorTypeLossy<SRGBA<uint8_t>>().resolved();
+ 
+     floatArguments.colorRed = color.red;
+     floatArguments.colorGreen = color.green;
+     floatArguments.colorBlue = color.blue;
+     floatArguments.padding4 = 0;
+ 
+-    if (m_lightSource->type() == LS_POINT) {
++    if (data.lightSource->type() == LS_POINT) {
+         neonData.flags |= FLAG_POINT_LIGHT;
+-        PointLightSource& pointLightSource = static_cast<PointLightSource&>(m_lightSource.get());
++        const auto& pointLightSource = *static_cast<const PointLightSource*>(data.lightSource);
+         floatArguments.lightX = pointLightSource.position().x();
+         floatArguments.lightY = pointLightSource.position().y();
+         floatArguments.lightZ = pointLightSource.position().z();
+         floatArguments.padding2 = 0;
+-    } else if (m_lightSource->type() == LS_SPOT) {
++    } else if (data.lightSource->type() == LS_SPOT) {
+         neonData.flags |= FLAG_SPOT_LIGHT;
+-        SpotLightSource& spotLightSource = static_cast<SpotLightSource&>(m_lightSource.get());
++        const auto& spotLightSource = *static_cast<const SpotLightSource*>(data.lightSource);
+         floatArguments.lightX = spotLightSource.position().x();
+         floatArguments.lightY = spotLightSource.position().y();
+         floatArguments.lightZ = spotLightSource.position().z();
+@@ -145,7 +146,7 @@ inline void FELighting::platformApplyNeo
+         if (spotLightSource.specularExponent() == 1)
+             neonData.flags |= FLAG_CONE_EXPONENT_IS_1;
+     } else {
+-        ASSERT(m_lightSource->type() == LS_DISTANT);
++        ASSERT(data.lightSource->type() == LS_DISTANT);
+         floatArguments.lightX = paintingData.initialLightingData.lightVector.x();
+         floatArguments.lightY = paintingData.initialLightingData.lightVector.y();
+         floatArguments.lightZ = paintingData.initialLightingData.lightVector.z();
+@@ -155,38 +156,39 @@ inline void FELighting::platformApplyNeo
+     // Set lighting arguments.
+     floatArguments.surfaceScale = data.surfaceScale;
+     floatArguments.minusSurfaceScaleDividedByFour = -data.surfaceScale / 4;
+-    if (m_lightingType == FELighting::DiffuseLighting)
+-        floatArguments.diffuseConstant = m_diffuseConstant;
++    if (data.filterType == FilterEffect::Type::FEDiffuseLighting)
++        floatArguments.diffuseConstant = data.diffuseConstant;
+     else {
+         neonData.flags |= FLAG_SPECULAR_LIGHT;
+-        floatArguments.diffuseConstant = m_specularConstant;
+-        neonData.specularExponent = getPowerCoefficients(m_specularExponent);
+-        if (m_specularExponent == 1)
++        floatArguments.diffuseConstant = data.specularConstant;
++        neonData.specularExponent = getPowerCoefficients(data.specularExponent);
++        if (data.specularExponent == 1)
+             neonData.flags |= FLAG_SPECULAR_EXPONENT_IS_1;
+     }
+     if (floatArguments.diffuseConstant == 1)
+         neonData.flags |= FLAG_DIFFUSE_CONST_IS_1;
+ 
+-    int optimalThreadNumber = ((data.widthDecreasedByOne - 1) * (data.heightDecreasedByOne - 1)) / s_minimalRectDimension;
++    static constexpr int minimalRectDimension = 100 * 100; // Empirical data limit for parallel jobs
++    int optimalThreadNumber = ((data.width - 2) * (data.height - 2)) / minimalRectDimension;
+     if (optimalThreadNumber > 1) {
+         // Initialize parallel jobs
+-        ParallelJobs<FELightingPaintingDataForNeon> parallelJobs(&WebCore::FELighting::platformApplyNeonWorker, optimalThreadNumber);
++        ParallelJobs<FELightingPaintingDataForNeon> parallelJobs(&FELightingSoftwareApplier::platformApplyNeonWorker, optimalThreadNumber);
+ 
+         // Fill the parameter array
+         int job = parallelJobs.numberOfJobs();
+         if (job > 1) {
+             int yStart = 1;
+-            int yStep = (data.heightDecreasedByOne - 1) / job;
++            int yStep = (data.height - 2) / job;
+             for (--job; job >= 0; --job) {
+                 FELightingPaintingDataForNeon& params = parallelJobs.parameter(job);
+                 params = neonData;
+                 params.yStart = yStart;
+-                params.pixels += (yStart - 1) * (data.widthDecreasedByOne + 1) * 4;
++                params.pixels += (yStart - 1) * data.width * 4;
+                 if (job > 0) {
+                     params.absoluteHeight = yStep;
+                     yStart += yStep;
+                 } else
+-                    params.absoluteHeight = data.heightDecreasedByOne - yStart;
++                    params.absoluteHeight = (data.height - 1) - yStart;
+             }
+             parallelJobs.execute();
+             return;
+@@ -199,5 +201,3 @@ inline void FELighting::platformApplyNeo
+ } // namespace WebCore
+ 
+ #endif // CPU(ARM_NEON) && COMPILER(GCC_COMPATIBLE)
+-
+-#endif // FELightingNEON_h
+--- a/Source/WebCore/platform/graphics/filters/DistantLightSource.h
++++ b/Source/WebCore/platform/graphics/filters/DistantLightSource.h
+@@ -25,6 +25,10 @@
+ #include "LightSource.h"
+ #include <wtf/Ref.h>
+ 
++namespace WTF {
++class TextStream;
++} // namespace WTF
++
+ namespace WebCore {
+ 
+ class DistantLightSource : public LightSource {
+--- a/Source/WebCore/platform/graphics/filters/FELighting.h
++++ b/Source/WebCore/platform/graphics/filters/FELighting.h
+@@ -35,8 +35,6 @@
+ 
+ namespace WebCore {
+ 
+-struct FELightingPaintingDataForNeon;
+-
+ class FELighting : public FilterEffect {
+ public:
+     const Color& lightingColor() const { return m_lightingColor; }
+@@ -67,11 +65,6 @@ protected:
+ 
+     std::unique_ptr<FilterEffectApplier> createSoftwareApplier() const override;
+ 
+-#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC_COMPATIBLE)
+-    static int getPowerCoefficients(float exponent);
+-    inline void platformApplyNeon(const LightingData&, const LightSource::PaintingData&);
+-#endif
+-
+     Color m_lightingColor;
+     float m_surfaceScale;
+     float m_diffuseConstant;
+--- a/Source/WebCore/platform/graphics/filters/PointLightSource.h
++++ b/Source/WebCore/platform/graphics/filters/PointLightSource.h
+@@ -26,6 +26,10 @@
+ #include "LightSource.h"
+ #include <wtf/Ref.h>
+ 
++namespace WTF {
++class TextStream;
++} // namespace WTF
++
+ namespace WebCore {
+ 
+ class PointLightSource : public LightSource {
+--- a/Source/WebCore/platform/graphics/filters/SpotLightSource.h
++++ b/Source/WebCore/platform/graphics/filters/SpotLightSource.h
+@@ -26,6 +26,10 @@
+ #include "LightSource.h"
+ #include <wtf/Ref.h>
+ 
++namespace WTF {
++class TextStream;
++} // namespace WTF
++
+ namespace WebCore {
+ 
+ class SpotLightSource : public LightSource {
+--- a/Source/WebCore/platform/graphics/filters/software/FELightingSoftwareApplier.h
++++ b/Source/WebCore/platform/graphics/filters/software/FELightingSoftwareApplier.h
+@@ -36,6 +36,7 @@
+ namespace WebCore {
+ 
+ class FELighting;
++struct FELightingPaintingDataForNeon;
+ 
+ class FELightingSoftwareApplier final : public FilterEffectConcreteApplier<FELighting> {
+     WTF_MAKE_FAST_ALLOCATED;
+@@ -132,8 +133,23 @@ private:
+ 
+     static void applyPlatformGenericPaint(const LightingData&, const LightSource::PaintingData&, int startY, int endY);
+     static void applyPlatformGenericWorker(ApplyParameters*);
++
++#if CPU(ARM_NEON) && CPU(ARM_TRADITIONAL) && COMPILER(GCC_COMPATIBLE)
++    static int getPowerCoefficients(float exponent);
++    static void platformApplyNeonWorker(FELightingPaintingDataForNeon*);
++    inline static void applyPlatformNeon(const LightingData&, const LightSource::PaintingData&);
++
++    inline static void applyPlatformGeneric(const LightingData& data, const LightSource::PaintingData& paintingData)
++    {
++        applyPlatformNeon(data, paintingData);
++    }
++#else
+     static void applyPlatformGeneric(const LightingData&, const LightSource::PaintingData&);
++#endif
++
+     static void applyPlatform(const LightingData&);
+ };
+ 
+ } // namespace WebCore
++
++#include "FELightingNEON.h"
diff --git a/poky/meta/recipes-sato/webkit/webkitgtk/fix-gstreamer-include-paths.patch b/poky/meta/recipes-sato/webkit/webkitgtk/fix-gstreamer-include-paths.patch
new file mode 100644
index 0000000..2c6be87
--- /dev/null
+++ b/poky/meta/recipes-sato/webkit/webkitgtk/fix-gstreamer-include-paths.patch
@@ -0,0 +1,51 @@
+From 25efd1478b48b5406fdab3b7b9c1f7e0ef95a7ed Mon Sep 17 00:00:00 2001
+From: Pablo Saavedra <psaavedra@igalia.com>
+Date: Mon, 27 Jun 2022 16:56:04 -0700
+Subject: [PATCH] Fix include gstreamer path on cross compiler toolchains
+ https://bugs.webkit.org/show_bug.cgi?id=241483
+
+Reviewed by Adrian Perez de Castro.
+
+Set the include paths for the gstreamer components to the full path
+using the find_path(). This function relies in CMAKE_FIND_ROOT_PATH to
+find the right place where the includes they are. This fixes possible
+warnings/errors on cross toolchains using -Wpoison-system-directories
+and -Werror=poison-system-directories.
+
+* Source/cmake/FindGStreamer.cmake:
+
+Canonical link: https://commits.webkit.org/251895@main
+
+Upstream-Status: Backport [2.37.1 https://github.com/WebKit/WebKit/commit/25efd1478b48b5406fdab3b7b9c1f7e0ef95a7ed]
+---
+ Source/cmake/FindGStreamer.cmake | 14 +++++++++++---
+ 1 file changed, 11 insertions(+), 3 deletions(-)
+
+diff --git a/Source/cmake/FindGStreamer.cmake b/Source/cmake/FindGStreamer.cmake
+index 3216f6974b53..b65a11f30871 100644
+--- a/Source/cmake/FindGStreamer.cmake
++++ b/Source/cmake/FindGStreamer.cmake
+@@ -75,12 +75,20 @@ macro(FIND_GSTREAMER_COMPONENT _component_prefix _pkgconfig_name _library)
+     # ${includedir}/gstreamer-1.0 which remains correct. The issue here is that
+     # we don't rely on the `Cflags`, cmake fails to generate a proper
+     # `.._INCLUDE_DIRS` variable in this case. So we need to do it here...
++
++    # Populate the list initially from the _INCLUDE_DIRS result variable.
++    set(${_component_prefix}_INCLUDE_DIRS ${PC_${_component_prefix}_INCLUDE_DIRS})
++
+     set(_include_dir "${PC_${_component_prefix}_INCLUDEDIR}")
+     string(REGEX MATCH "(.*)/gstreamer-1.0" _dummy "${_include_dir}")
++
+     if ("${CMAKE_MATCH_1}" STREQUAL "")
+-        set(${_component_prefix}_INCLUDE_DIRS "${_include_dir}/gstreamer-1.0;${PC_${_component_prefix}_INCLUDE_DIRS}")
+-    else ()
+-        set(${_component_prefix}_INCLUDE_DIRS "${PC_${_component_prefix}_INCLUDE_DIRS}")
++        find_path(${_component_prefix}_RESOLVED_INCLUDEDIR NAMES "${_include_dir}/gstreamer-1.0")
++        # Only add the resolved path from `_INCLUDEDIR` if found.
++        if (${_component_prefix}_RESOLVED_INCLUDEDIR)
++            list(APPEND ${_component_prefix}_INCLUDE_DIRS
++                 "${${_component_prefix}_RESOLVED_INCLUDEDIR}")
++        endif ()
+     endif ()
+ 
+     find_library(${_component_prefix}_LIBRARIES
diff --git a/poky/meta/recipes-sato/webkit/webkitgtk_2.36.6.bb b/poky/meta/recipes-sato/webkit/webkitgtk_2.36.7.bb
similarity index 96%
rename from poky/meta/recipes-sato/webkit/webkitgtk_2.36.6.bb
rename to poky/meta/recipes-sato/webkit/webkitgtk_2.36.7.bb
index 37b977f..76b119c 100644
--- a/poky/meta/recipes-sato/webkit/webkitgtk_2.36.6.bb
+++ b/poky/meta/recipes-sato/webkit/webkitgtk_2.36.7.bb
@@ -15,9 +15,10 @@
            file://0001-Fix-build-without-opengl-or-es.patch \
            file://reproducibility.patch \
            file://0001-When-building-introspection-files-do-not-quote-CFLAG.patch \
+           file://fix-gstreamer-include-paths.patch \
+           file://0d3344e17d258106617b0e6d783d073b188a2548.patch \
            "
-
-SRC_URI[sha256sum] = "1193bc821946336776f0dfa5e0dca5651f1e57157eda12da4721d2441f24a61a"
+SRC_URI[sha256sum] = "0c260cf2b32f0481d017670dfed1b61e554967cd067195606c9f9eb5fe731743"
 
 inherit cmake pkgconfig gobject-introspection perlnative features_check upstream-version-is-even gtk-doc
 
diff --git a/poky/meta/recipes-support/bash-completion/bash-completion_2.11.bb b/poky/meta/recipes-support/bash-completion/bash-completion_2.11.bb
index d981dd0..19bc816 100644
--- a/poky/meta/recipes-support/bash-completion/bash-completion_2.11.bb
+++ b/poky/meta/recipes-support/bash-completion/bash-completion_2.11.bb
@@ -12,16 +12,15 @@
 
 SECTION = "console/utils"
 
-SRC_URI = "https://github.com/scop/bash-completion/releases/download/${PV}/${BPN}-${PV}.tar.xz"
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/${BPN}-${PV}.tar.xz"
 
 SRC_URI[md5sum] = "2514c6772d0de6254758b98c53f91861"
 SRC_URI[sha256sum] = "73a8894bad94dee83ab468fa09f628daffd567e8bef1a24277f1e9a0daf911ac"
-UPSTREAM_CHECK_REGEX = "bash-completion-(?P<pver>(?!2008).+)\.tar"
-UPSTREAM_CHECK_URI = "https://github.com/scop/bash-completion/releases"
+GITHUB_BASE_URI = "https://github.com/scop/bash-completion/releases"
 
 PARALLEL_MAKE = ""
 
-inherit autotools
+inherit autotools github-releases
 
 do_install:append() {
 	# compatdir
diff --git a/poky/meta/recipes-support/boost/boost.inc b/poky/meta/recipes-support/boost/boost.inc
index 402f159..4263e64 100644
--- a/poky/meta/recipes-support/boost/boost.inc
+++ b/poky/meta/recipes-support/boost/boost.inc
@@ -151,6 +151,7 @@
 BJAM_OPTS    = '${BOOST_PARALLEL_MAKE} -d+2 -q \
 		${BJAM_TOOLS} \
 		-sBOOST_BUILD_USER_CONFIG=${WORKDIR}/user-config.jam \
+		-sICU_PATH=${STAGING_EXECPREFIXDIR} \
 		--build-dir=${B} \
 		--disable-icu \
 		${BJAM_EXTRA}'
diff --git a/poky/meta/recipes-support/curl/curl_7.85.0.bb b/poky/meta/recipes-support/curl/curl_7.85.0.bb
index 3b55830..ad6a517 100644
--- a/poky/meta/recipes-support/curl/curl_7.85.0.bb
+++ b/poky/meta/recipes-support/curl/curl_7.85.0.bb
@@ -100,7 +100,8 @@
 	cp -rf ${D}${bindir}/curl-config ${D}${PTEST_PATH}
 }
 
-RDEPENDS:${PN}-ptest += "bash perl-modules"
+RDEPENDS:${PN}-ptest += "bash perl-modules perl-module-time-hires perl-module-digest-md5 \
+                         perl-module-digest perl-module-ipc-open2"
 
 PACKAGES =+ "lib${BPN}"
 
diff --git a/poky/meta/recipes-support/diffoscope/diffoscope_220.bb b/poky/meta/recipes-support/diffoscope/diffoscope_221.bb
similarity index 92%
rename from poky/meta/recipes-support/diffoscope/diffoscope_220.bb
rename to poky/meta/recipes-support/diffoscope/diffoscope_221.bb
index dc55647..c801200 100644
--- a/poky/meta/recipes-support/diffoscope/diffoscope_220.bb
+++ b/poky/meta/recipes-support/diffoscope/diffoscope_221.bb
@@ -12,7 +12,7 @@
 
 inherit pypi setuptools3
 
-SRC_URI[sha256sum] = "7873e13ac8b11b634ee3490b70b056c6a6bae9cfb794d6ba7cb43e7797b2a829"
+SRC_URI[sha256sum] = "7b1724e9b1e5ac6597cac07d0acd2661db1bcf0d6a9b2e87be21b5e3806be9f8"
 
 RDEPENDS:${PN} += "binutils vim squashfs-tools python3-libarchive-c python3-magic python3-rpm"
 
diff --git a/poky/meta/recipes-support/enchant/enchant2_2.3.3.bb b/poky/meta/recipes-support/enchant/enchant2_2.3.3.bb
index 96d56c2..c892f57 100644
--- a/poky/meta/recipes-support/enchant/enchant2_2.3.3.bb
+++ b/poky/meta/recipes-support/enchant/enchant2_2.3.3.bb
@@ -9,12 +9,12 @@
 
 DEPENDS = "glib-2.0"
 
-inherit autotools pkgconfig
+inherit autotools pkgconfig github-releases
 
-SRC_URI = "https://github.com/AbiWord/enchant/releases/download/v${PV}/enchant-${PV}.tar.gz"
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/enchant-${PV}.tar.gz"
 SRC_URI[sha256sum] = "3da12103f11cf49c3cf2fd2ce3017575c5321a489e5b9bfa81dd91ec413f3891"
 
-UPSTREAM_CHECK_URI = "https://github.com/AbiWord/enchant/releases"
+GITHUB_BASE_URI = "https://github.com/AbiWord/enchant/releases"
 
 S = "${WORKDIR}/enchant-${PV}"
 
diff --git a/poky/meta/recipes-support/fribidi/fribidi_1.0.12.bb b/poky/meta/recipes-support/fribidi/fribidi_1.0.12.bb
index b29c478..9e46d95 100644
--- a/poky/meta/recipes-support/fribidi/fribidi_1.0.12.bb
+++ b/poky/meta/recipes-support/fribidi/fribidi_1.0.12.bb
@@ -9,13 +9,11 @@
 LICENSE = "LGPL-2.1-or-later"
 LIC_FILES_CHKSUM = "file://COPYING;md5=a916467b91076e631dd8edb7424769c7"
 
-SRC_URI = "https://github.com/${BPN}/${BPN}/releases/download/v${PV}/${BP}.tar.xz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/${BP}.tar.xz \
            "
 SRC_URI[sha256sum] = "0cd233f97fc8c67bb3ac27ce8440def5d3ffacf516765b91c2cc654498293495"
 
-UPSTREAM_CHECK_URI = "https://github.com/${BPN}/${BPN}/releases"
-
-inherit meson lib_package pkgconfig
+inherit meson lib_package pkgconfig github-releases
 
 CVE_PRODUCT = "gnu_fribidi fribidi"
 
diff --git a/poky/meta/recipes-support/icu/icu_71.1.bb b/poky/meta/recipes-support/icu/icu_71.1.bb
index b39633c..df999dc 100644
--- a/poky/meta/recipes-support/icu/icu_71.1.bb
+++ b/poky/meta/recipes-support/icu/icu_71.1.bb
@@ -17,7 +17,7 @@
 
 ICU_MAJOR_VER = "${@d.getVar('PV').split('.')[0]}"
 
-inherit autotools pkgconfig
+inherit autotools pkgconfig github-releases
 
 # ICU needs the native build directory as an argument to its --with-cross-build option when
 # cross-compiling. Taken the situation that different builds may share a common sstate-cache
@@ -96,8 +96,8 @@
 ARM_INSTRUCTION_SET:armv4 = "arm"
 ARM_INSTRUCTION_SET:armv5 = "arm"
 
-BASE_SRC_URI = "https://github.com/unicode-org/icu/releases/download/release-${ICU_FOLDER}/icu4c-${ICU_PV}-src.tgz"
-DATA_SRC_URI = "https://github.com/unicode-org/icu/releases/download/release-${ICU_FOLDER}/icu4c-${ICU_PV}-data.zip"
+BASE_SRC_URI = "${GITHUB_BASE_URI}/download/release-${ICU_FOLDER}/icu4c-${ICU_PV}-src.tgz"
+DATA_SRC_URI = "${GITHUB_BASE_URI}/download/release-${ICU_FOLDER}/icu4c-${ICU_PV}-data.zip"
 SRC_URI = "${BASE_SRC_URI};name=code \
            ${DATA_SRC_URI};name=data \
            file://filter.json \
@@ -111,8 +111,8 @@
 SRC_URI[code.sha256sum] = "67a7e6e51f61faf1306b6935333e13b2c48abd8da6d2f46ce6adca24b1e21ebf"
 SRC_URI[data.sha256sum] = "e3882b4fece6e5e039f22c3189b7ba224180fd26fdbfa9db284617455b93e804"
 
-UPSTREAM_CHECK_REGEX = "icu4c-(?P<pver>\d+(_\d+)+)-src"
-UPSTREAM_CHECK_URI = "https://github.com/unicode-org/icu/releases"
+UPSTREAM_CHECK_REGEX = "releases/tag/release-(?P<pver>(?!.+rc).+)"
+GITHUB_BASE_URI = "https://github.com/unicode-org/icu/releases"
 
 EXTRA_OECONF:append:libc-musl = " ac_cv_func_strtod_l=no"
 
diff --git a/poky/meta/recipes-support/libatomic-ops/libatomic-ops_7.6.14.bb b/poky/meta/recipes-support/libatomic-ops/libatomic-ops_7.6.14.bb
index fad92df..fbfbdac 100644
--- a/poky/meta/recipes-support/libatomic-ops/libatomic-ops_7.6.14.bb
+++ b/poky/meta/recipes-support/libatomic-ops/libatomic-ops_7.6.14.bb
@@ -8,8 +8,8 @@
                     file://doc/LICENSING.txt;md5=dfc50c7cea7b66935844587a0f7389e7 \
                     "
 
-SRC_URI = "https://github.com/ivmai/libatomic_ops/releases/download/v${PV}/libatomic_ops-${PV}.tar.gz"
-UPSTREAM_CHECK_URI = "https://github.com/ivmai/libatomic_ops/releases"
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/libatomic_ops-${PV}.tar.gz"
+GITHUB_BASE_URI = "https://github.com/ivmai/libatomic_ops/releases"
 
 SRC_URI[sha256sum] = "390f244d424714735b7050d056567615b3b8f29008a663c262fb548f1802d292"
 
@@ -17,6 +17,6 @@
 
 ALLOW_EMPTY:${PN} = "1"
 
-inherit autotools pkgconfig
+inherit autotools pkgconfig github-releases
 
 BBCLASSEXTEND = "native nativesdk"
diff --git a/poky/meta/recipes-support/libcheck/libcheck_0.15.2.bb b/poky/meta/recipes-support/libcheck/libcheck_0.15.2.bb
index 1393aa2..83f3c3f 100644
--- a/poky/meta/recipes-support/libcheck/libcheck_0.15.2.bb
+++ b/poky/meta/recipes-support/libcheck/libcheck_0.15.2.bb
@@ -10,14 +10,14 @@
 LICENSE  = "LGPL-2.1-or-later"
 LIC_FILES_CHKSUM = "file://COPYING.LESSER;md5=2d5025d4aa3495befef8f17206a5b0a1"
 
-SRC_URI = "https://github.com/${BPN}/check/releases/download/${PV}/check-${PV}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/check-${PV}.tar.gz \
            file://not-echo-compiler-info-to-check_stdint.h.patch"
 SRC_URI[sha256sum] = "a8de4e0bacfb4d76dd1c618ded263523b53b85d92a146d8835eb1a52932fa20a"
-UPSTREAM_CHECK_URI = "https://github.com/libcheck/check/releases/"
+GITHUB_BASE_URI = "https://github.com/libcheck/check/releases/"
 
 S = "${WORKDIR}/check-${PV}"
 
-inherit autotools pkgconfig texinfo
+inherit autotools pkgconfig texinfo github-releases
 
 CACHED_CONFIGUREVARS += "ac_cv_path_AWK_PATH=${bindir}/gawk"
 
diff --git a/poky/meta/recipes-support/libevent/libevent_2.1.12.bb b/poky/meta/recipes-support/libevent/libevent_2.1.12.bb
index e26e8a9..55a65e6 100644
--- a/poky/meta/recipes-support/libevent/libevent_2.1.12.bb
+++ b/poky/meta/recipes-support/libevent/libevent_2.1.12.bb
@@ -11,7 +11,7 @@
 LICENSE = "BSD-3-Clause & MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=17f20574c0b154d12236d5fbe964f549"
 
-SRC_URI = "https://github.com/libevent/libevent/releases/download/release-${PV}-stable/${BP}-stable.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/release-${PV}-stable/${BP}-stable.tar.gz \
            file://Makefile-missing-test-dir.patch \
            file://run-ptest \
            file://0001-test-regress_dns.c-patch-out-tests-that-require-a-wo.patch \
@@ -21,15 +21,14 @@
            "
 
 SRC_URI[sha256sum] = "92e6de1be9ec176428fd2367677e61ceffc2ee1cb119035037a27d346b0403bb"
-
-UPSTREAM_CHECK_URI = "http://libevent.org/"
+UPSTREAM_CHECK_REGEX = "releases/tag/release-(?P<pver>.+)-stable"
 
 S = "${WORKDIR}/${BPN}-${PV}-stable"
 
 PACKAGECONFIG ??= ""
 PACKAGECONFIG[openssl] = "--enable-openssl,--disable-openssl,openssl"
 
-inherit autotools
+inherit autotools github-releases
 
 # Needed for Debian packaging
 LEAD_SONAME = "libevent-2.1.so"
diff --git a/poky/meta/recipes-support/libexif/libexif_0.6.24.bb b/poky/meta/recipes-support/libexif/libexif_0.6.24.bb
index 20ba0ef..367f012 100644
--- a/poky/meta/recipes-support/libexif/libexif_0.6.24.bb
+++ b/poky/meta/recipes-support/libexif/libexif_0.6.24.bb
@@ -10,14 +10,12 @@
 def version_underscore(v):
     return "_".join(v.split("."))
 
-SRC_URI = "https://github.com/libexif/libexif/releases/download/v${PV}/libexif-${PV}.tar.bz2 \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/libexif-${PV}.tar.bz2 \
            "
 
 SRC_URI[sha256sum] = "d47564c433b733d83b6704c70477e0a4067811d184ec565258ac563d8223f6ae"
 
-UPSTREAM_CHECK_URI = "https://github.com/libexif/libexif/releases/"
-
-inherit autotools gettext
+inherit autotools gettext github-releases
 
 EXTRA_OECONF += "--disable-docs"
 
diff --git a/poky/meta/recipes-support/libffi/libffi_3.4.2.bb b/poky/meta/recipes-support/libffi/libffi_3.4.2.bb
index 71d9518..41c3cad 100644
--- a/poky/meta/recipes-support/libffi/libffi_3.4.2.bb
+++ b/poky/meta/recipes-support/libffi/libffi_3.4.2.bb
@@ -10,18 +10,16 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=679b5c9bdc79a2b93ee574e193e7a7bc"
 
-SRC_URI = "https://github.com/libffi/libffi/releases/download/v${PV}/${BPN}-${PV}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/${BPN}-${PV}.tar.gz \
            file://not-win32.patch \
            file://0001-arm-sysv-reverted-clang-VFP-mitigation.patch \
            "
 SRC_URI[sha256sum] = "540fb721619a6aba3bdeef7d940d8e9e0e6d2c193595bc243241b77ff9e93620"
-UPSTREAM_CHECK_URI = "https://github.com/libffi/libffi/releases/"
-UPSTREAM_CHECK_REGEX = "libffi-(?P<pver>\d+(\.\d+)+)\.tar"
 
 EXTRA_OECONF += "--disable-builddir --disable-exec-static-tramp"
 EXTRA_OECONF:class-native += "--with-gcc-arch=generic"
 EXTRA_OEMAKE:class-target = "LIBTOOLFLAGS='--tag=CC'"
-inherit autotools texinfo multilib_header
+inherit autotools texinfo multilib_header github-releases
 
 do_install:append() {
 	oe_multilib_header ffi.h ffitarget.h
diff --git a/poky/meta/recipes-support/libgcrypt/files/no-bench-slope.patch b/poky/meta/recipes-support/libgcrypt/files/no-bench-slope.patch
new file mode 100644
index 0000000..8de3c67
--- /dev/null
+++ b/poky/meta/recipes-support/libgcrypt/files/no-bench-slope.patch
@@ -0,0 +1,20 @@
+The bench-slope test appears to be aborting fairly frequently, which causes
+failures on the autobuilder.
+
+Until this has been root-caused, disable the test.
+
+Upstream-Status: Inappropriate
+Signed-off-by: Ross Burton <ross.burton@arm.com>
+
+diff --git a/tests/testdrv.c b/tests/testdrv.c
+index 0ccde326..d3455186 100644
+--- a/tests/testdrv.c
++++ b/tests/testdrv.c
+@@ -77,7 +77,6 @@ static struct {
+    { "t-x448"      },
+    { "t-ed448"     },
+    { "benchmark"   },
+-   { "bench-slope" },
+    { "hashtest-256g",  "hashtest", "--gigs 256 SHA1 SHA256 SHA512 SM3",
+      LONG_RUNNING },
+    { NULL }
diff --git a/poky/meta/recipes-support/libgcrypt/libgcrypt_1.10.1.bb b/poky/meta/recipes-support/libgcrypt/libgcrypt_1.10.1.bb
index b0d88de..bf9d7cb 100644
--- a/poky/meta/recipes-support/libgcrypt/libgcrypt_1.10.1.bb
+++ b/poky/meta/recipes-support/libgcrypt/libgcrypt_1.10.1.bb
@@ -24,6 +24,7 @@
            file://0002-libgcrypt-fix-building-error-with-O2-in-sysroot-path.patch \
            file://0004-tests-Makefile.am-fix-undefined-reference-to-pthread.patch \
            file://no-native-gpg-error.patch \
+           file://no-bench-slope.patch \
            file://run-ptest \
            "
 SRC_URI[sha256sum] = "ef14ae546b0084cd84259f61a55e07a38c3b53afc0f546bffcef2f01baffe9de"
diff --git a/poky/meta/recipes-support/libical/libical_3.0.14.bb b/poky/meta/recipes-support/libical/libical_3.0.14.bb
index 58baf3f..44030fd 100644
--- a/poky/meta/recipes-support/libical/libical_3.0.14.bb
+++ b/poky/meta/recipes-support/libical/libical_3.0.14.bb
@@ -12,13 +12,12 @@
                     "
 SECTION = "libs"
 
-SRC_URI = "https://github.com/${BPN}/${BPN}/releases/download/v${PV}/${BP}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/${BP}.tar.gz \
            file://0001-cmake-Do-not-export-CC-into-gir-compiler.patch \
           "
 SRC_URI[sha256sum] = "4284b780356f1dc6a01f16083e7b836e63d3815e27ed0eaaad684712357ccc8f"
-UPSTREAM_CHECK_URI = "https://github.com/libical/libical/releases"
 
-inherit cmake pkgconfig gobject-introspection vala
+inherit cmake pkgconfig gobject-introspection vala github-releases
 
 DEPENDS += "libical-native"
 
diff --git a/poky/meta/recipes-support/libjitterentropy/libjitterentropy_3.4.0.bb b/poky/meta/recipes-support/libjitterentropy/libjitterentropy_3.4.1.bb
similarity index 95%
rename from poky/meta/recipes-support/libjitterentropy/libjitterentropy_3.4.0.bb
rename to poky/meta/recipes-support/libjitterentropy/libjitterentropy_3.4.1.bb
index 1d291cc..3fb8c6b 100644
--- a/poky/meta/recipes-support/libjitterentropy/libjitterentropy_3.4.0.bb
+++ b/poky/meta/recipes-support/libjitterentropy/libjitterentropy_3.4.1.bb
@@ -10,7 +10,7 @@
                     file://LICENSE.bsd;md5=66a5cedaf62c4b2637025f049f9b826f \
                     "
 SRC_URI = "git://github.com/smuellerDD/jitterentropy-library.git;branch=master;protocol=https"
-SRCREV = "2e5019cfe63038faaa405ce53715effe4ea580e4"
+SRCREV = "4544e11320138ac02797af81766f4476a71bb09f"
 S = "${WORKDIR}/git"
 
 # remove at next version upgrade or when output changes
diff --git a/poky/meta/recipes-support/libnl/libnl_3.7.0.bb b/poky/meta/recipes-support/libnl/libnl_3.7.0.bb
index ddcc83a..ef286a6 100644
--- a/poky/meta/recipes-support/libnl/libnl_3.7.0.bb
+++ b/poky/meta/recipes-support/libnl/libnl_3.7.0.bb
@@ -14,17 +14,17 @@
 
 DEPENDS = "flex-native bison-native"
 
-SRC_URI = "https://github.com/thom311/${BPN}/releases/download/${BPN}${@d.getVar('PV').replace('.','_')}/${BP}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/${BPN}${@d.getVar('PV').replace('.','_')}/${BP}.tar.gz \
            file://enable-serial-tests.patch \
            file://run-ptest \
            "
 
 SRC_URI[sha256sum] = "9fe43ccbeeea72c653bdcf8c93332583135cda46a79507bfd0a483bb57f65939"
 
+GITHUB_BASE_URI = "https://github.com/thom311/${BPN}/releases"
+UPSTREAM_CHECK_REGEX = "releases/tag/libnl(?P<pver>.+)"
 
-UPSTREAM_CHECK_URI = "https://github.com/thom311/${BPN}/releases"
-
-inherit autotools pkgconfig ptest
+inherit autotools pkgconfig ptest github-releases
 
 FILES:${PN} = "${libdir}/libnl-3.so.* \
                ${libdir}/libnl.so.* \
diff --git a/poky/meta/recipes-support/libpcre/libpcre2_10.40.bb b/poky/meta/recipes-support/libpcre/libpcre2_10.40.bb
index 8c2cadf..13f7c83 100644
--- a/poky/meta/recipes-support/libpcre/libpcre2_10.40.bb
+++ b/poky/meta/recipes-support/libpcre/libpcre2_10.40.bb
@@ -10,9 +10,10 @@
 LICENSE = "BSD-3-Clause"
 LIC_FILES_CHKSUM = "file://LICENCE;md5=41bfb977e4933c506588724ce69bf5d2"
 
-SRC_URI = "https://github.com/PhilipHazel/pcre2/releases/download/pcre2-${PV}/pcre2-${PV}.tar.bz2"
+SRC_URI = "${GITHUB_BASE_URI}/download/pcre2-${PV}/pcre2-${PV}.tar.bz2"
 
-UPSTREAM_CHECK_URI = "https://github.com/PhilipHazel/pcre2/releases"
+GITHUB_BASE_URI = "https://github.com/PhilipHazel/pcre2/releases"
+UPSTREAM_CHECK_REGEX = "releases/tag/pcre2-(?P<pver>.+)"
 
 SRC_URI[sha256sum] = "14e4b83c4783933dc17e964318e6324f7cae1bc75d8f3c79bc6969f00c159d68"
 
@@ -25,7 +26,7 @@
 
 BINCONFIG = "${bindir}/pcre2-config"
 
-inherit autotools binconfig-disabled
+inherit autotools binconfig-disabled github-releases
 
 EXTRA_OECONF = "\
     --enable-newline-is-lf \
diff --git a/poky/meta/recipes-support/libproxy/libproxy_0.4.18.bb b/poky/meta/recipes-support/libproxy/libproxy_0.4.18.bb
index a9f2bf6..01ba2a6 100644
--- a/poky/meta/recipes-support/libproxy/libproxy_0.4.18.bb
+++ b/poky/meta/recipes-support/libproxy/libproxy_0.4.18.bb
@@ -12,13 +12,10 @@
 
 DEPENDS = "glib-2.0"
 
-SRC_URI = "https://github.com/${BPN}/${BPN}/releases/download/${PV}/${BP}.tar.xz"
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/${BP}.tar.xz"
 SRC_URI[sha256sum] = "69b5856e9ea42c38ac77e6b8c92ffc86a71d341fef74e77bef85f9cc6c47a4b1"
 
-UPSTREAM_CHECK_URI = "https://github.com/libproxy/libproxy/releases"
-UPSTREAM_CHECK_REGEX = "libproxy-(?P<pver>.*)\.tar"
-
-inherit cmake pkgconfig
+inherit cmake pkgconfig github-releases
 
 PACKAGECONFIG ?= "${@bb.utils.contains('DISTRO_FEATURES', 'x11', 'gnome', '', d)} gnome3"
 PACKAGECONFIG[gnome] = "-DWITH_GNOME=yes,-DWITH_GNOME=no,gconf"
diff --git a/poky/meta/recipes-support/libpsl/libpsl_0.21.1.bb b/poky/meta/recipes-support/libpsl/libpsl_0.21.1.bb
index 4fc0ad8..58f56c1 100644
--- a/poky/meta/recipes-support/libpsl/libpsl_0.21.1.bb
+++ b/poky/meta/recipes-support/libpsl/libpsl_0.21.1.bb
@@ -10,13 +10,13 @@
 LIC_FILES_CHKSUM = "file://LICENSE;md5=5437030d9e4fbe7267ced058ddb8a7f5 \
                     file://COPYING;md5=f41d10997a12da5ee3c24ceeb0148d18"
 
-SRC_URI = "https://github.com/rockdaboot/${BPN}/releases/download/${PV}/${BP}.tar.gz \
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/${BP}.tar.gz \
            "
 SRC_URI[sha256sum] = "ac6ce1e1fbd4d0254c4ddb9d37f1fa99dec83619c1253328155206b896210d4c"
 
-UPSTREAM_CHECK_URI = "https://github.com/rockdaboot/libpsl/releases"
+GITHUB_BASE_URI = "https://github.com/rockdaboot/libpsl/releases"
 
-inherit autotools gettext gtk-doc manpages pkgconfig lib_package
+inherit autotools gettext gtk-doc manpages pkgconfig lib_package github-releases
 
 PACKAGECONFIG ?= "icu"
 PACKAGECONFIG[manpages] = "--enable-man,--disable-man,libxslt-native"
diff --git a/poky/meta/recipes-support/libusb/libusb1_1.0.26.bb b/poky/meta/recipes-support/libusb/libusb1_1.0.26.bb
index fd63e7a..7371faf 100644
--- a/poky/meta/recipes-support/libusb/libusb1_1.0.26.bb
+++ b/poky/meta/recipes-support/libusb/libusb1_1.0.26.bb
@@ -10,17 +10,17 @@
 
 BBCLASSEXTEND = "native nativesdk"
 
-SRC_URI = "https://github.com/libusb/libusb/releases/download/v${PV}/libusb-${PV}.tar.bz2 \
+SRC_URI = "${GITHUB_BASE_URI}/download/v${PV}/libusb-${PV}.tar.bz2 \
            file://run-ptest \
           "
 
-UPSTREAM_CHECK_URI = "https://github.com/libusb/libusb/releases"
+GITHUB_BASE_URI = "https://github.com/libusb/libusb/releases"
 
 SRC_URI[sha256sum] = "12ce7a61fc9854d1d2a1ffe095f7b5fac19ddba095c259e6067a46500381b5a5"
 
 S = "${WORKDIR}/libusb-${PV}"
 
-inherit autotools pkgconfig ptest
+inherit autotools pkgconfig ptest github-releases
 
 PACKAGECONFIG:class-target ??= "udev"
 PACKAGECONFIG[udev] = "--enable-udev,--disable-udev,udev"
diff --git a/poky/meta/recipes-support/nghttp2/nghttp2_1.48.0.bb b/poky/meta/recipes-support/nghttp2/nghttp2_1.49.0.bb
similarity index 77%
rename from poky/meta/recipes-support/nghttp2/nghttp2_1.48.0.bb
rename to poky/meta/recipes-support/nghttp2/nghttp2_1.49.0.bb
index 4ff3ab8..6c3d543 100644
--- a/poky/meta/recipes-support/nghttp2/nghttp2_1.48.0.bb
+++ b/poky/meta/recipes-support/nghttp2/nghttp2_1.49.0.bb
@@ -4,15 +4,13 @@
 LICENSE = "MIT"
 LIC_FILES_CHKSUM = "file://COPYING;md5=764abdf30b2eadd37ce47dcbce0ea1ec"
 
-UPSTREAM_CHECK_URI = "https://github.com/nghttp2/nghttp2/releases"
-
 SRC_URI = "\
-    https://github.com/nghttp2/nghttp2/releases/download/v${PV}/nghttp2-${PV}.tar.xz \
+    ${GITHUB_BASE_URI}/download/v${PV}/nghttp2-${PV}.tar.xz \
     file://0001-fetch-ocsp-response-use-python3.patch \
 "
-SRC_URI[sha256sum] = "47d8f30ee4f1bc621566d10362ca1b3ac83a335c63da7144947c806772d016e4"
+SRC_URI[sha256sum] = "b0cfd492bbf0b131c472e8f6501c9f4ee82b51b68130f47b278c0b7c9848a66e"
 
-inherit cmake manpages python3native
+inherit cmake manpages python3native github-releases
 PACKAGECONFIG[manpages] = ""
 
 # examples are never installed, and don't need to be built in the
diff --git a/poky/meta/recipes-support/numactl/numactl_git.bb b/poky/meta/recipes-support/numactl/numactl_git.bb
index 93547ea..712cf02 100644
--- a/poky/meta/recipes-support/numactl/numactl_git.bb
+++ b/poky/meta/recipes-support/numactl/numactl_git.bb
@@ -8,10 +8,10 @@
 
 inherit autotools-brokensep ptest
 
-LIC_FILES_CHKSUM = "file://README.md;beginline=19;endline=32;md5=f8ff2391624f28e481299f3f677b21bb"
+LIC_FILES_CHKSUM = "file://README.md;beginline=19;endline=32;md5=9f34c3af4ed6f3f5df0da5f3c0835a43"
 
-SRCREV = "dd6de072c92c892a86e18c0fd0dfa1ba57a9a05d"
-PV = "2.0.14"
+SRCREV = "01a39cb4edc0dd0f4151b7ad11e0c56d2e612a02"
+PV = "2.0.15"
 
 SRC_URI = "git://github.com/numactl/numactl;branch=master;protocol=https \
            file://Fix-the-test-output-format.patch \
diff --git a/poky/meta/recipes-support/re2c/re2c_3.0.bb b/poky/meta/recipes-support/re2c/re2c_3.0.bb
index ab04760..f8f8512 100644
--- a/poky/meta/recipes-support/re2c/re2c_3.0.bb
+++ b/poky/meta/recipes-support/re2c/re2c_3.0.bb
@@ -7,10 +7,10 @@
 LICENSE = "PD"
 LIC_FILES_CHKSUM = "file://LICENSE;md5=64eca4d8a3b67f9dc7656094731a2c8d"
 
-SRC_URI = "https://github.com/skvadrik/re2c/releases/download/${PV}/${BPN}-${PV}.tar.xz"
+SRC_URI = "${GITHUB_BASE_URI}/download/${PV}/${BPN}-${PV}.tar.xz"
 SRC_URI[sha256sum] = "b3babbbb1461e13fe22c630a40c43885efcfbbbb585830c6f4c0d791cf82ba0b"
-UPSTREAM_CHECK_URI = "https://github.com/skvadrik/re2c/releases"
+GITHUB_BASE_URI = "https://github.com/skvadrik/re2c/releases"
 
 BBCLASSEXTEND = "native nativesdk"
 
-inherit autotools
+inherit autotools github-releases
diff --git a/poky/meta/recipes-support/sqlite/sqlite3_3.39.2.bb b/poky/meta/recipes-support/sqlite/sqlite3_3.39.3.bb
similarity index 86%
rename from poky/meta/recipes-support/sqlite/sqlite3_3.39.2.bb
rename to poky/meta/recipes-support/sqlite/sqlite3_3.39.3.bb
index dfef480..fce497e 100644
--- a/poky/meta/recipes-support/sqlite/sqlite3_3.39.2.bb
+++ b/poky/meta/recipes-support/sqlite/sqlite3_3.39.3.bb
@@ -4,7 +4,7 @@
 LIC_FILES_CHKSUM = "file://sqlite3.h;endline=11;md5=786d3dc581eff03f4fd9e4a77ed00c66"
 
 SRC_URI = "http://www.sqlite.org/2022/sqlite-autoconf-${SQLITE_PV}.tar.gz"
-SRC_URI[sha256sum] = "852be8a6183a17ba47cee0bbff7400b7aa5affd283bf3beefc34fcd088a239de"
+SRC_URI[sha256sum] = "7868fb3082be3f2cf4491c6fba6de2bddcbc293a35fefb0624ee3c13f01422b9"
 
 # -19242 is only an issue in specific development branch commits
 CVE_CHECK_IGNORE += "CVE-2019-19242"
diff --git a/poky/meta/recipes-support/taglib/taglib_1.12.bb b/poky/meta/recipes-support/taglib/taglib_1.12.bb
index 47ad8aa..e6e3ef0 100644
--- a/poky/meta/recipes-support/taglib/taglib_1.12.bb
+++ b/poky/meta/recipes-support/taglib/taglib_1.12.bb
@@ -14,7 +14,7 @@
 SRC_URI[md5sum] = "4313ed2671234e029b7af8f97c84e9af"
 SRC_URI[sha256sum] = "7fccd07669a523b07a15bd24c8da1bbb92206cb19e9366c3692af3d79253b703"
 
-UPSTREAM_CHECK_URI = "http://github.com/taglib/taglib/releases/"
+UPSTREAM_CHECK_URI = "https://taglib.org/"
 
 BINCONFIG = "${bindir}/taglib-config"
 
diff --git a/poky/meta/recipes-support/vim/vim.inc b/poky/meta/recipes-support/vim/vim.inc
index 33a8299..cbc3701 100644
--- a/poky/meta/recipes-support/vim/vim.inc
+++ b/poky/meta/recipes-support/vim/vim.inc
@@ -20,8 +20,8 @@
            file://no-path-adjust.patch \
            "
 
-PV .= ".0341"
-SRCREV = "92a3d20682d46359bb50a452b4f831659e799155"
+PV .= ".0598"
+SRCREV = "8279af514ca7e5fd3c31cf13b0864163d1a0bfeb"
 
 # Remove when 8.3 is out
 UPSTREAM_VERSION_UNKNOWN = "1"
diff --git a/poky/scripts/create-pull-request b/poky/scripts/create-pull-request
index 8eefcf6..2f91a35 100755
--- a/poky/scripts/create-pull-request
+++ b/poky/scripts/create-pull-request
@@ -128,7 +128,7 @@
 GIT_RE="\(^\($PROTO_RE\)\?\)\($USER_RE@\)\?\([^:/]*\)[:/]\(.*\)"
 REMOTE_URL=${REMOTE_URL%.git}
 REMOTE_REPO=$(echo $REMOTE_URL | sed "s#$GIT_RE#\5#")
-REMOTE_URL=$(echo $REMOTE_URL | sed "s#$GIT_RE#git://\4/\5#")
+REMOTE_URL=$(echo $REMOTE_URL | sed "s#$GIT_RE#https://\4/\5#")
 
 if [ -z "$BRANCH" ]; then
 	BRANCH=$(git branch | grep -e "^\* " | cut -d' ' -f2)
diff --git a/poky/scripts/lib/wic/plugins/imager/direct.py b/poky/scripts/lib/wic/plugins/imager/direct.py
index c44159b..da483da 100644
--- a/poky/scripts/lib/wic/plugins/imager/direct.py
+++ b/poky/scripts/lib/wic/plugins/imager/direct.py
@@ -117,7 +117,7 @@
         updated = False
         for part in self.parts:
             if not part.realnum or not part.mountpoint \
-               or not part.mountpoint.startswith('/'):
+               or part.mountpoint == "/" or not part.mountpoint.startswith('/'):
                 continue
 
             if part.use_uuid:
diff --git a/poky/scripts/oe-setup-builddir b/poky/scripts/oe-setup-builddir
index d3c7f94..a13860c 100755
--- a/poky/scripts/oe-setup-builddir
+++ b/poky/scripts/oe-setup-builddir
@@ -7,12 +7,14 @@
 # SPDX-License-Identifier: GPL-2.0-or-later
 #
 
-if [ -z "$BUILDDIR" ]; then
-    echo >&2 "Error: The build directory (BUILDDIR) must be set!"
+die() {
+    echo Error: "$@" >&2
     exit 1
-fi
+}
 
-if [ "$1" = '--help' -o "$1" = '-h' ]; then
+[ -n "$BUILDDIR" ] || die "The build directory (BUILDDIR) must be set!"
+
+if [ "$1" = '--help' ] || [ "$1" = '-h' ]; then
     echo 'Usage: oe-setup-builddir'
     echo ''
     echo "OpenEmbedded setup-builddir - setup build directory $BUILDDIR"
@@ -22,35 +24,22 @@
 
 mkdir -p "$BUILDDIR/conf"
 
-if [ ! -d "$BUILDDIR" ]; then
-    echo >&2 "Error: The builddir ($BUILDDIR) does not exist!"
-    exit 1
-fi
-
-if [ ! -w "$BUILDDIR" ]; then
-    echo >&2 "Error: Cannot write to $BUILDDIR, perhaps try sourcing with a writable path? i.e. . oe-init-build-env ~/my-build"
-    exit 1
-fi
+[ -d "$BUILDDIR" ] || die "The build directory ($BUILDDIR) does not exist!"
+[ -w "$BUILDDIR" ] ||
+    die "Cannot write to $BUILDDIR, perhaps try sourcing with a writable path? i.e. . oe-init-build-env ~/my-build"
 
 # Attempting removal of sticky,setuid bits from BUILDDIR, BUILDDIR/conf
 chmod -st "$BUILDDIR" 2>/dev/null || echo "WARNING: unable to chmod $BUILDDIR"
 chmod -st "$BUILDDIR/conf" 2>/dev/null || echo "WARNING: unable to chmod $BUILDDIR/conf"
 
-cd "$BUILDDIR"
+cd "$BUILDDIR" || die "Failed to change directory to $BUILDDIR!"
 
-if [ -f "$BUILDDIR/conf/templateconf.cfg" -a -z "$TEMPLATECONF" ]; then
-    TEMPLATECONF=$(cat "$BUILDDIR/conf/templateconf.cfg")
-    # The following two are no longer valid; unsetting them will automatically get them replaced
-    # with correct ones.
-    if [ $TEMPLATECONF = "meta/conf" -o $TEMPLATECONF = "meta-poky/conf" ]; then
-        unset TEMPLATECONF
-        rm $BUILDDIR/conf/templateconf.cfg
-    fi
-fi
+. "$OEROOT/.templateconf"
 
-. "$OEROOT"/.templateconf
+# Keep the original TEMPLATECONF before possibly prefixing it with $OEROOT below.
+ORG_TEMPLATECONF=$TEMPLATECONF
 
-# 
+#
 # $TEMPLATECONF can point to a directory for the template local.conf & bblayers.conf
 #
 if [ -n "$TEMPLATECONF" ]; then
@@ -59,15 +48,12 @@
         if [ -d "$OEROOT/$TEMPLATECONF" ]; then
             TEMPLATECONF="$OEROOT/$TEMPLATECONF"
         fi
-        if [ ! -d "$TEMPLATECONF" ]; then
-            echo >&2 "Error: TEMPLATECONF value points to nonexistent directory '$TEMPLATECONF'"
-            exit 1
-        fi
-        templatesdir=$(python3 -c "import sys; print(sys.argv[1].strip('/').split('/')[-2])" $TEMPLATECONF)
-        if [ ! -f "$TEMPLATECONF/../../layer.conf" -o $templatesdir != "templates" ]; then
-            echo >&2 "Error: TEMPLATECONF value (which is $TEMPLATECONF) must point to meta-some-layer/conf/templates/template-name"
-            exit 1
-        fi
+        [ -d "$TEMPLATECONF" ] ||
+            die "TEMPLATECONF value points to nonexistent directory '$TEMPLATECONF'"
+    fi
+    templatesdir=$(python3 -c "import sys; print(sys.argv[1].strip('/').split('/')[-2])" "$TEMPLATECONF")
+    if [ "$templatesdir" != templates ] || [ ! -f "$TEMPLATECONF/../../layer.conf" ]; then
+        die "TEMPLATECONF value (which is $TEMPLATECONF) must point to meta-some-layer/conf/templates/template-name"
     fi
     OECORELAYERCONF="$TEMPLATECONF/bblayers.conf.sample"
     OECORELOCALCONF="$TEMPLATECONF/local.conf.sample"
@@ -83,8 +69,7 @@
 You had no conf/local.conf file. This configuration file has therefore been
 created for you from $OECORELOCALCONF
 You may wish to edit it to, for example, select a different MACHINE (target
-hardware). See conf/local.conf for more information as common configuration
-options are commented.
+hardware).
 
 EOM
     cp -f "$OECORELOCALCONF" "$BUILDDIR/conf/local.conf"
@@ -118,7 +103,7 @@
 unset OECORELAYERCONF
 
 # Ending the first-time run message. Show the YP Documentation banner.
-if [ ! -z "$SHOWYPDOC" ]; then
+if [ -n "$SHOWYPDOC" ]; then
     cat <<EOM
 The Yocto Project has extensive documentation about OE including a reference
 manual which can be found at:
@@ -138,5 +123,5 @@
 unset OECORENOTESCONF
 
 if [ ! -f "$BUILDDIR/conf/templateconf.cfg" ]; then
-    echo "$TEMPLATECONF" >"$BUILDDIR/conf/templateconf.cfg"
+    echo "$ORG_TEMPLATECONF" >"$BUILDDIR/conf/templateconf.cfg"
 fi
diff --git a/poky/scripts/runqemu b/poky/scripts/runqemu
index b6fc212..983f751 100755
--- a/poky/scripts/runqemu
+++ b/poky/scripts/runqemu
@@ -1505,6 +1505,9 @@
         cmd = "%s %s" % (self.qemu_opt, kernel_opts)
         cmds = shlex.split(cmd)
         logger.info('Running %s\n' % cmd)
+        with open('/proc/uptime', 'r') as f:
+            uptime_seconds = f.readline().split()[0]
+        logger.info('Host uptime: %s\n' % uptime_seconds)
         pass_fds = []
         if self.taplock_descriptor:
             pass_fds = [self.taplock_descriptor.fileno()]
@@ -1528,6 +1531,9 @@
         signal.signal(signal.SIGTERM, signal.SIG_IGN)
 
         logger.info("Cleaning up")
+        with open('/proc/uptime', 'r') as f:
+            uptime_seconds = f.readline().split()[0]
+        logger.info('Host uptime: %s\n' % uptime_seconds)
         if self.cleantap:
             cmd = ('sudo', self.qemuifdown, self.tap, self.bindir_native)
             logger.debug('Running %s' % str(cmd))
