mirror of
https://github.com/signalwire/freeswitch.git
synced 2026-07-04 19:31:56 +00:00
update libyuv to hash 5a699df5 from https://chromium.googlesource.com/libyuv/libyuv/
This commit is contained in:
committed by
Andrey Volk
parent
a714dacd7f
commit
5a924d5ef3
@@ -0,0 +1,135 @@
|
||||
cc_library {
|
||||
name: "libyuv",
|
||||
vendor_available: true,
|
||||
vndk: {
|
||||
enabled: true,
|
||||
},
|
||||
|
||||
srcs: [
|
||||
"source/compare.cc",
|
||||
"source/compare_common.cc",
|
||||
"source/compare_gcc.cc",
|
||||
"source/compare_neon.cc",
|
||||
"source/compare_neon64.cc",
|
||||
"source/compare_mmi.cc",
|
||||
"source/compare_msa.cc",
|
||||
"source/convert.cc",
|
||||
"source/convert_argb.cc",
|
||||
"source/convert_from.cc",
|
||||
"source/convert_from_argb.cc",
|
||||
"source/convert_to_argb.cc",
|
||||
"source/convert_to_i420.cc",
|
||||
"source/cpu_id.cc",
|
||||
"source/planar_functions.cc",
|
||||
"source/rotate.cc",
|
||||
"source/rotate_any.cc",
|
||||
"source/rotate_argb.cc",
|
||||
"source/rotate_common.cc",
|
||||
"source/rotate_gcc.cc",
|
||||
"source/rotate_mmi.cc",
|
||||
"source/rotate_msa.cc",
|
||||
"source/rotate_neon.cc",
|
||||
"source/rotate_neon64.cc",
|
||||
"source/row_any.cc",
|
||||
"source/row_common.cc",
|
||||
"source/row_gcc.cc",
|
||||
"source/row_mmi.cc",
|
||||
"source/row_msa.cc",
|
||||
"source/row_neon.cc",
|
||||
"source/row_neon64.cc",
|
||||
"source/scale.cc",
|
||||
"source/scale_any.cc",
|
||||
"source/scale_argb.cc",
|
||||
"source/scale_common.cc",
|
||||
"source/scale_gcc.cc",
|
||||
"source/scale_mmi.cc",
|
||||
"source/scale_msa.cc",
|
||||
"source/scale_neon.cc",
|
||||
"source/scale_neon64.cc",
|
||||
"source/video_common.cc",
|
||||
"source/convert_jpeg.cc",
|
||||
"source/mjpeg_decoder.cc",
|
||||
"source/mjpeg_validate.cc",
|
||||
],
|
||||
|
||||
cflags: [
|
||||
"-Wall",
|
||||
"-Werror",
|
||||
"-Wno-unused-parameter",
|
||||
"-fexceptions",
|
||||
"-DHAVE_JPEG",
|
||||
],
|
||||
|
||||
shared_libs: ["libjpeg"],
|
||||
|
||||
export_include_dirs: ["include"],
|
||||
}
|
||||
|
||||
// compatibilty static library until all uses of libyuv_static are replaced
|
||||
// with libyuv (b/37646797)
|
||||
cc_library_static {
|
||||
name: "libyuv_static",
|
||||
whole_static_libs: ["libyuv"],
|
||||
}
|
||||
|
||||
cc_test {
|
||||
name: "libyuv_unittest",
|
||||
static_libs: ["libyuv"],
|
||||
shared_libs: ["libjpeg"],
|
||||
cflags: ["-Wall", "-Werror"],
|
||||
srcs: [
|
||||
"unit_test/unit_test.cc",
|
||||
"unit_test/basictypes_test.cc",
|
||||
"unit_test/color_test.cc",
|
||||
"unit_test/compare_test.cc",
|
||||
"unit_test/convert_test.cc",
|
||||
"unit_test/cpu_test.cc",
|
||||
"unit_test/cpu_thread_test.cc",
|
||||
"unit_test/math_test.cc",
|
||||
"unit_test/planar_test.cc",
|
||||
"unit_test/rotate_argb_test.cc",
|
||||
"unit_test/rotate_test.cc",
|
||||
"unit_test/scale_argb_test.cc",
|
||||
"unit_test/scale_test.cc",
|
||||
"unit_test/video_common_test.cc",
|
||||
],
|
||||
}
|
||||
|
||||
cc_test {
|
||||
name: "compare",
|
||||
gtest: false,
|
||||
srcs: [
|
||||
"util/compare.cc",
|
||||
],
|
||||
static_libs: ["libyuv"],
|
||||
}
|
||||
|
||||
cc_test {
|
||||
name: "cpuid",
|
||||
gtest: false,
|
||||
srcs: [
|
||||
"util/cpuid.c",
|
||||
],
|
||||
static_libs: ["libyuv"],
|
||||
}
|
||||
|
||||
cc_test {
|
||||
name: "psnr",
|
||||
gtest: false,
|
||||
srcs: [
|
||||
"util/psnr_main.cc",
|
||||
"util/psnr.cc",
|
||||
"util/ssim.cc",
|
||||
],
|
||||
static_libs: ["libyuv"],
|
||||
}
|
||||
|
||||
cc_test {
|
||||
name: "yuvconvert",
|
||||
gtest: false,
|
||||
srcs: [
|
||||
"util/yuvconvert.cc",
|
||||
],
|
||||
static_libs: ["libyuv"],
|
||||
shared_libs: ["libjpeg"],
|
||||
}
|
||||
+61
-26
@@ -1,4 +1,4 @@
|
||||
# This is the Android makefile for libyuv for both platform and NDK.
|
||||
# This is the Android makefile for libyuv for NDK.
|
||||
LOCAL_PATH:= $(call my-dir)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
@@ -8,8 +8,11 @@ LOCAL_CPP_EXTENSION := .cc
|
||||
LOCAL_SRC_FILES := \
|
||||
source/compare.cc \
|
||||
source/compare_common.cc \
|
||||
source/compare_neon64.cc \
|
||||
source/compare_gcc.cc \
|
||||
source/compare_mmi.cc \
|
||||
source/compare_msa.cc \
|
||||
source/compare_neon.cc \
|
||||
source/compare_neon64.cc \
|
||||
source/convert.cc \
|
||||
source/convert_argb.cc \
|
||||
source/convert_from.cc \
|
||||
@@ -22,48 +25,80 @@ LOCAL_SRC_FILES := \
|
||||
source/rotate_any.cc \
|
||||
source/rotate_argb.cc \
|
||||
source/rotate_common.cc \
|
||||
source/rotate_mips.cc \
|
||||
source/rotate_neon64.cc \
|
||||
source/rotate_gcc.cc \
|
||||
source/rotate_mmi.cc \
|
||||
source/rotate_msa.cc \
|
||||
source/rotate_neon.cc \
|
||||
source/rotate_neon64.cc \
|
||||
source/row_any.cc \
|
||||
source/row_common.cc \
|
||||
source/row_mips.cc \
|
||||
source/row_gcc.cc \
|
||||
source/row_mmi.cc \
|
||||
source/row_msa.cc \
|
||||
source/row_neon.cc \
|
||||
source/row_neon64.cc \
|
||||
source/row_gcc.cc \
|
||||
source/scale.cc \
|
||||
source/scale_any.cc \
|
||||
source/scale_argb.cc \
|
||||
source/scale_common.cc \
|
||||
source/scale_mips.cc \
|
||||
source/scale_neon64.cc \
|
||||
source/scale_gcc.cc \
|
||||
source/scale_mmi.cc \
|
||||
source/scale_msa.cc \
|
||||
source/scale_neon.cc \
|
||||
source/scale_neon64.cc \
|
||||
source/video_common.cc
|
||||
|
||||
# TODO(fbarchard): Enable mjpeg encoder.
|
||||
# source/mjpeg_decoder.cc
|
||||
# source/convert_jpeg.cc
|
||||
# source/mjpeg_validate.cc
|
||||
|
||||
ifeq ($(TARGET_ARCH_ABI),armeabi-v7a)
|
||||
LOCAL_CFLAGS += -DLIBYUV_NEON
|
||||
LOCAL_SRC_FILES += \
|
||||
source/compare_neon.cc.neon \
|
||||
source/rotate_neon.cc.neon \
|
||||
source/row_neon.cc.neon \
|
||||
source/scale_neon.cc.neon
|
||||
endif
|
||||
|
||||
ifeq ($(TARGET_ARCH_ABI),mips)
|
||||
LOCAL_CFLAGS += -DLIBYUV_MSA
|
||||
LOCAL_SRC_FILES += \
|
||||
source/row_msa.cc
|
||||
common_CFLAGS := -Wall -fexceptions
|
||||
ifneq ($(LIBYUV_DISABLE_JPEG), "yes")
|
||||
LOCAL_SRC_FILES += \
|
||||
source/convert_jpeg.cc \
|
||||
source/mjpeg_decoder.cc \
|
||||
source/mjpeg_validate.cc
|
||||
common_CFLAGS += -DHAVE_JPEG
|
||||
LOCAL_SHARED_LIBRARIES := libjpeg
|
||||
endif
|
||||
|
||||
LOCAL_CFLAGS += $(common_CFLAGS)
|
||||
LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/include
|
||||
LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
|
||||
LOCAL_EXPORT_C_INCLUDE_DIRS := $(LOCAL_PATH)/include
|
||||
|
||||
LOCAL_MODULE := libyuv_static
|
||||
LOCAL_MODULE_TAGS := optional
|
||||
|
||||
include $(BUILD_STATIC_LIBRARY)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
|
||||
LOCAL_WHOLE_STATIC_LIBRARIES := libyuv_static
|
||||
LOCAL_MODULE := libyuv
|
||||
ifneq ($(LIBYUV_DISABLE_JPEG), "yes")
|
||||
LOCAL_SHARED_LIBRARIES := libjpeg
|
||||
endif
|
||||
|
||||
include $(BUILD_SHARED_LIBRARY)
|
||||
|
||||
include $(CLEAR_VARS)
|
||||
LOCAL_STATIC_LIBRARIES := libyuv_static
|
||||
LOCAL_SHARED_LIBRARIES := libjpeg
|
||||
LOCAL_MODULE_TAGS := tests
|
||||
LOCAL_CPP_EXTENSION := .cc
|
||||
LOCAL_C_INCLUDES += $(LOCAL_PATH)/include
|
||||
LOCAL_SRC_FILES := \
|
||||
unit_test/unit_test.cc \
|
||||
unit_test/basictypes_test.cc \
|
||||
unit_test/color_test.cc \
|
||||
unit_test/compare_test.cc \
|
||||
unit_test/convert_test.cc \
|
||||
unit_test/cpu_test.cc \
|
||||
unit_test/cpu_thread_test.cc \
|
||||
unit_test/math_test.cc \
|
||||
unit_test/planar_test.cc \
|
||||
unit_test/rotate_argb_test.cc \
|
||||
unit_test/rotate_test.cc \
|
||||
unit_test/scale_argb_test.cc \
|
||||
unit_test/scale_test.cc \
|
||||
unit_test/video_common_test.cc
|
||||
|
||||
LOCAL_MODULE := libyuv_unittest
|
||||
include $(BUILD_NATIVE_TEST)
|
||||
|
||||
+166
-41
@@ -9,9 +9,19 @@
|
||||
import("libyuv.gni")
|
||||
import("//testing/test.gni")
|
||||
|
||||
declare_args() {
|
||||
# Set to false to disable building with gflags.
|
||||
libyuv_use_gflags = true
|
||||
|
||||
# When building a shared library using a target in WebRTC or
|
||||
# Chromium projects that depends on libyuv, setting this flag
|
||||
# to true makes libyuv symbols visible inside that library.
|
||||
libyuv_symbols_visible = false
|
||||
}
|
||||
|
||||
config("libyuv_config") {
|
||||
include_dirs = [ "include" ]
|
||||
if (is_android && current_cpu=="arm64") {
|
||||
if (is_android && current_cpu == "arm64") {
|
||||
ldflags = [ "-Wl,--dynamic-linker,/system/bin/linker64" ]
|
||||
}
|
||||
if (is_android && current_cpu != "arm64") {
|
||||
@@ -19,7 +29,61 @@ config("libyuv_config") {
|
||||
}
|
||||
}
|
||||
|
||||
static_library("libyuv") {
|
||||
# This target is built when no specific target is specified on the command line.
|
||||
group("default") {
|
||||
testonly = true
|
||||
deps = [
|
||||
":libyuv",
|
||||
]
|
||||
if (libyuv_include_tests) {
|
||||
deps += [
|
||||
":compare",
|
||||
":cpuid",
|
||||
":libyuv_unittest",
|
||||
":psnr",
|
||||
":yuvconvert",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
group("libyuv") {
|
||||
all_dependent_configs = [ ":libyuv_config" ]
|
||||
deps = []
|
||||
|
||||
if (is_win && target_cpu == "x64") {
|
||||
# Compile with clang in order to get inline assembly
|
||||
public_deps = [
|
||||
":libyuv_internal(//build/toolchain/win:win_clang_x64)",
|
||||
]
|
||||
} else {
|
||||
public_deps = [
|
||||
":libyuv_internal",
|
||||
]
|
||||
}
|
||||
|
||||
if (libyuv_use_neon) {
|
||||
deps += [ ":libyuv_neon" ]
|
||||
}
|
||||
|
||||
if (libyuv_use_msa) {
|
||||
deps += [ ":libyuv_msa" ]
|
||||
}
|
||||
|
||||
if (libyuv_use_mmi) {
|
||||
deps += [ ":libyuv_mmi" ]
|
||||
}
|
||||
|
||||
if (!is_ios) {
|
||||
# Make sure that clients of libyuv link with libjpeg. This can't go in
|
||||
# libyuv_internal because in Windows x64 builds that will generate a clang
|
||||
# build of libjpeg, and we don't want two copies.
|
||||
deps += [ "//third_party:jpeg" ]
|
||||
}
|
||||
}
|
||||
|
||||
static_library("libyuv_internal") {
|
||||
visibility = [ ":*" ]
|
||||
|
||||
sources = [
|
||||
# Headers
|
||||
"include/libyuv.h",
|
||||
@@ -62,47 +126,56 @@ static_library("libyuv") {
|
||||
"source/rotate_any.cc",
|
||||
"source/rotate_argb.cc",
|
||||
"source/rotate_common.cc",
|
||||
"source/rotate_mips.cc",
|
||||
"source/rotate_gcc.cc",
|
||||
"source/rotate_win.cc",
|
||||
"source/row_any.cc",
|
||||
"source/row_common.cc",
|
||||
"source/row_mips.cc",
|
||||
"source/row_gcc.cc",
|
||||
"source/row_win.cc",
|
||||
"source/scale.cc",
|
||||
"source/scale_any.cc",
|
||||
"source/scale_argb.cc",
|
||||
"source/scale_common.cc",
|
||||
"source/scale_mips.cc",
|
||||
"source/scale_gcc.cc",
|
||||
"source/scale_win.cc",
|
||||
"source/video_common.cc",
|
||||
]
|
||||
|
||||
public_configs = [ ":libyuv_config" ]
|
||||
|
||||
configs += [ ":libyuv_config" ]
|
||||
defines = []
|
||||
deps = []
|
||||
|
||||
if (libyuv_symbols_visible) {
|
||||
configs -= [ "//build/config/gcc:symbol_visibility_hidden" ]
|
||||
configs += [ "//build/config/gcc:symbol_visibility_default" ]
|
||||
}
|
||||
|
||||
if (!is_ios) {
|
||||
defines += [ "HAVE_JPEG" ]
|
||||
deps += [ "//third_party:jpeg" ]
|
||||
|
||||
# Needed to pull in libjpeg headers. Can't add //third_party:jpeg to deps
|
||||
# because in Windows x64 build it will get compiled with clang.
|
||||
deps += [ "//third_party:jpeg_includes" ]
|
||||
}
|
||||
|
||||
if (libyuv_use_neon) {
|
||||
deps += [ ":libyuv_neon" ]
|
||||
}
|
||||
|
||||
if (libyuv_use_msa) {
|
||||
deps += [ ":libyuv_msa" ]
|
||||
}
|
||||
|
||||
if (is_nacl) {
|
||||
# Always enable optimization under NaCl to workaround crbug.com/538243 .
|
||||
# Always enable optimization for Release and NaCl builds (to workaround
|
||||
# crbug.com/538243).
|
||||
if (!is_debug || is_nacl) {
|
||||
configs -= [ "//build/config/compiler:default_optimization" ]
|
||||
|
||||
# Enable optimize for speed (-O2) over size (-Os).
|
||||
configs += [ "//build/config/compiler:optimize_max" ]
|
||||
}
|
||||
|
||||
# To enable AVX2 or other cpu optimization, pass flag here
|
||||
if (!is_win) {
|
||||
cflags = [
|
||||
# "-mpopcnt",
|
||||
# "-mavx2",
|
||||
# "-mfma",
|
||||
"-ffp-contract=fast", # Enable fma vectorization for NEON.
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
if (libyuv_use_neon) {
|
||||
@@ -119,8 +192,22 @@ if (libyuv_use_neon) {
|
||||
"source/scale_neon64.cc",
|
||||
]
|
||||
|
||||
deps = [
|
||||
":libyuv_internal",
|
||||
]
|
||||
|
||||
public_configs = [ ":libyuv_config" ]
|
||||
|
||||
# Always enable optimization for Release and NaCl builds (to workaround
|
||||
# crbug.com/538243).
|
||||
if (!is_debug) {
|
||||
configs -= [ "//build/config/compiler:default_optimization" ]
|
||||
|
||||
# Enable optimize for speed (-O2) over size (-Os).
|
||||
# TODO(fbarchard): Consider optimize_speed which is O3.
|
||||
configs += [ "//build/config/compiler:optimize_max" ]
|
||||
}
|
||||
|
||||
if (current_cpu != "arm64") {
|
||||
configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
|
||||
cflags = [ "-mfpu=neon" ]
|
||||
@@ -132,7 +219,32 @@ if (libyuv_use_msa) {
|
||||
static_library("libyuv_msa") {
|
||||
sources = [
|
||||
# MSA Source Files
|
||||
"source/compare_msa.cc",
|
||||
"source/rotate_msa.cc",
|
||||
"source/row_msa.cc",
|
||||
"source/scale_msa.cc",
|
||||
]
|
||||
|
||||
deps = [
|
||||
":libyuv_internal",
|
||||
]
|
||||
|
||||
public_configs = [ ":libyuv_config" ]
|
||||
}
|
||||
}
|
||||
|
||||
if (libyuv_use_mmi) {
|
||||
static_library("libyuv_mmi") {
|
||||
sources = [
|
||||
# MMI Source Files
|
||||
"source/compare_mmi.cc",
|
||||
"source/rotate_mmi.cc",
|
||||
"source/row_mmi.cc",
|
||||
"source/scale_mmi.cc",
|
||||
]
|
||||
|
||||
deps = [
|
||||
":libyuv_internal",
|
||||
]
|
||||
|
||||
public_configs = [ ":libyuv_config" ]
|
||||
@@ -145,13 +257,13 @@ if (libyuv_include_tests) {
|
||||
cflags = [
|
||||
# TODO(fbarchard): Fix sign and unused variable warnings.
|
||||
"-Wno-sign-compare",
|
||||
"-Wno-unused-variable"
|
||||
"-Wno-unused-variable",
|
||||
]
|
||||
}
|
||||
if (is_win) {
|
||||
cflags = [
|
||||
"/wd4245", # signed/unsigned mismatch
|
||||
"/wd4189", # local variable is initialized but not referenced
|
||||
"/wd4245", # signed/unsigned mismatch
|
||||
"/wd4189", # local variable is initialized but not referenced
|
||||
]
|
||||
}
|
||||
}
|
||||
@@ -163,14 +275,14 @@ if (libyuv_include_tests) {
|
||||
testonly = true
|
||||
|
||||
sources = [
|
||||
# headers
|
||||
"unit_test/unit_test.h",
|
||||
# sources
|
||||
# headers
|
||||
"unit_test/basictypes_test.cc",
|
||||
"unit_test/compare_test.cc",
|
||||
"unit_test/color_test.cc",
|
||||
"unit_test/compare_test.cc",
|
||||
"unit_test/convert_test.cc",
|
||||
"unit_test/cpu_test.cc",
|
||||
"unit_test/cpu_thread_test.cc",
|
||||
"unit_test/math_test.cc",
|
||||
"unit_test/planar_test.cc",
|
||||
"unit_test/rotate_argb_test.cc",
|
||||
@@ -178,22 +290,28 @@ if (libyuv_include_tests) {
|
||||
"unit_test/scale_argb_test.cc",
|
||||
"unit_test/scale_test.cc",
|
||||
"unit_test/unit_test.cc",
|
||||
"unit_test/unit_test.h",
|
||||
"unit_test/video_common_test.cc",
|
||||
]
|
||||
|
||||
deps = [
|
||||
":libyuv",
|
||||
"//testing/gtest",
|
||||
"//third_party/gflags",
|
||||
]
|
||||
|
||||
defines = []
|
||||
if (libyuv_use_gflags) {
|
||||
defines += [ "LIBYUV_USE_GFLAGS" ]
|
||||
deps += [ "//third_party/gflags" ]
|
||||
}
|
||||
|
||||
configs += [ ":libyuv_unittest_warnings_config" ]
|
||||
|
||||
public_deps = [ "//testing/gtest" ]
|
||||
public_deps = [
|
||||
"//testing/gtest",
|
||||
]
|
||||
public_configs = [ ":libyuv_unittest_config" ]
|
||||
|
||||
defines = []
|
||||
|
||||
if (is_linux) {
|
||||
cflags = [ "-fexceptions" ]
|
||||
}
|
||||
@@ -211,8 +329,8 @@ if (libyuv_include_tests) {
|
||||
|
||||
# TODO(YangZhang): These lines can be removed when high accuracy
|
||||
# YUV to RGB to Neon is ported.
|
||||
if ((target_cpu=="armv7" || target_cpu=="armv7s" ||
|
||||
(target_cpu=="arm" && arm_version >= 7) || target_cpu=="arm64") &&
|
||||
if ((target_cpu == "armv7" || target_cpu == "armv7s" ||
|
||||
(target_cpu == "arm" && arm_version >= 7) || target_cpu == "arm64") &&
|
||||
(arm_use_neon || arm_optionally_use_neon)) {
|
||||
defines += [ "LIBYUV_NEON" ]
|
||||
}
|
||||
@@ -221,7 +339,6 @@ if (libyuv_include_tests) {
|
||||
# Enable the following 3 macros to turn off assembly for specified CPU.
|
||||
# "LIBYUV_DISABLE_X86",
|
||||
# "LIBYUV_DISABLE_NEON",
|
||||
# "LIBYUV_DISABLE_MIPS",
|
||||
# Enable the following macro to build libyuv as a shared library (dll).
|
||||
# "LIBYUV_USING_SHARED_LIBRARY"
|
||||
]
|
||||
@@ -230,20 +347,24 @@ if (libyuv_include_tests) {
|
||||
executable("compare") {
|
||||
sources = [
|
||||
# sources
|
||||
"util/compare.cc"
|
||||
"util/compare.cc",
|
||||
]
|
||||
deps = [
|
||||
":libyuv",
|
||||
]
|
||||
deps = [ ":libyuv" ]
|
||||
if (is_linux) {
|
||||
cflags = [ "-fexceptions" ]
|
||||
}
|
||||
}
|
||||
|
||||
executable("convert") {
|
||||
executable("yuvconvert") {
|
||||
sources = [
|
||||
# sources
|
||||
"util/convert.cc"
|
||||
"util/yuvconvert.cc",
|
||||
]
|
||||
deps = [
|
||||
":libyuv",
|
||||
]
|
||||
deps = [ ":libyuv" ]
|
||||
if (is_linux) {
|
||||
cflags = [ "-fexceptions" ]
|
||||
}
|
||||
@@ -252,11 +373,13 @@ if (libyuv_include_tests) {
|
||||
executable("psnr") {
|
||||
sources = [
|
||||
# sources
|
||||
"util/psnr_main.cc",
|
||||
"util/psnr.cc",
|
||||
"util/ssim.cc"
|
||||
"util/psnr_main.cc",
|
||||
"util/ssim.cc",
|
||||
]
|
||||
deps = [
|
||||
":libyuv",
|
||||
]
|
||||
deps = [ ":libyuv" ]
|
||||
|
||||
if (!is_ios && !libyuv_disable_jpeg) {
|
||||
defines = [ "HAVE_JPEG" ]
|
||||
@@ -266,8 +389,10 @@ if (libyuv_include_tests) {
|
||||
executable("cpuid") {
|
||||
sources = [
|
||||
# sources
|
||||
"util/cpuid.c"
|
||||
"util/cpuid.c",
|
||||
]
|
||||
deps = [
|
||||
":libyuv",
|
||||
]
|
||||
deps = [ ":libyuv" ]
|
||||
}
|
||||
}
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
# determine the version number from the #define in libyuv/version.h
|
||||
EXECUTE_PROCESS (
|
||||
COMMAND grep --perl-regex --only-matching "(?<=LIBYUV_VERSION )[0-9]+" include/libyuv/version.h
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
OUTPUT_VARIABLE YUV_VERSION_NUMBER
|
||||
OUTPUT_STRIP_TRAILING_WHITESPACE )
|
||||
SET ( YUV_VER_MAJOR 0 )
|
||||
SET ( YUV_VER_MINOR 0 )
|
||||
SET ( YUV_VER_PATCH ${YUV_VERSION_NUMBER} )
|
||||
SET ( YUV_VERSION ${YUV_VER_MAJOR}.${YUV_VER_MINOR}.${YUV_VER_PATCH} )
|
||||
MESSAGE ( "Building ver.: ${YUV_VERSION}" )
|
||||
|
||||
# is this a 32-bit or 64-bit build?
|
||||
IF ( CMAKE_SIZEOF_VOID_P EQUAL 8 )
|
||||
SET ( YUV_BIT_SIZE 64 )
|
||||
ELSEIF ( CMAKE_SIZEOF_VOID_P EQUAL 4 )
|
||||
SET ( YUV_BIT_SIZE 32 )
|
||||
ELSE ()
|
||||
MESSAGE ( FATAL_ERROR "CMAKE_SIZEOF_VOID_P=${CMAKE_SIZEOF_VOID_P}" )
|
||||
ENDIF ()
|
||||
|
||||
# detect if this is a ARM build
|
||||
STRING (FIND "${CMAKE_CXX_COMPILER}" "arm-linux-gnueabihf-g++" pos)
|
||||
IF ( ${pos} EQUAL -1 )
|
||||
SET ( YUV_CROSS_COMPILE_FOR_ARM7 FALSE )
|
||||
ELSE ()
|
||||
MESSAGE ( "Cross compiling for ARM7" )
|
||||
SET ( YUV_CROSS_COMPILE_FOR_ARM7 TRUE )
|
||||
ENDIF ()
|
||||
STRING (FIND "${CMAKE_SYSTEM_PROCESSOR}" "arm" pos)
|
||||
IF ( ${pos} EQUAL -1 )
|
||||
SET ( YUV_COMPILE_FOR_ARM7 FALSE )
|
||||
ELSE ()
|
||||
MESSAGE ( "Compiling for ARM" )
|
||||
SET ( YUV_COMPILE_FOR_ARM7 TRUE )
|
||||
ENDIF ()
|
||||
|
||||
# setup the sytem name, such as "x86-32", "amd-64", and "arm-32
|
||||
IF ( ${YUV_CROSS_COMPILE_FOR_ARM7} OR ${YUV_COMPILE_FOR_ARM7} )
|
||||
SET ( YUV_SYSTEM_NAME "armhf-${YUV_BIT_SIZE}" )
|
||||
ELSE ()
|
||||
IF ( YUV_BIT_SIZE EQUAL 32 )
|
||||
SET ( YUV_SYSTEM_NAME "x86-${YUV_BIT_SIZE}" )
|
||||
ELSE ()
|
||||
SET ( YUV_SYSTEM_NAME "amd-${YUV_BIT_SIZE}" )
|
||||
ENDIF ()
|
||||
ENDIF ()
|
||||
MESSAGE ( "Packaging for: ${YUV_SYSTEM_NAME}" )
|
||||
|
||||
# define all the variables needed by CPack to create .deb and .rpm packages
|
||||
SET ( CPACK_PACKAGE_VENDOR "Frank Barchard" )
|
||||
SET ( CPACK_PACKAGE_CONTACT "fbarchard@chromium.org" )
|
||||
SET ( CPACK_PACKAGE_VERSION ${YUV_VERSION} )
|
||||
SET ( CPACK_PACKAGE_VERSION_MAJOR ${YUV_VER_MAJOR} )
|
||||
SET ( CPACK_PACKAGE_VERSION_MINOR ${YUV_VER_MINOR} )
|
||||
SET ( CPACK_PACKAGE_VERSION_PATCH ${YUV_VER_PATCH} )
|
||||
SET ( CPACK_RESOURCE_FILE_LICENSE ${PROJECT_SOURCE_DIR}/LICENSE )
|
||||
SET ( CPACK_SYSTEM_NAME "linux-${YUV_SYSTEM_NAME}" )
|
||||
SET ( CPACK_PACKAGE_NAME "libyuv" )
|
||||
SET ( CPACK_PACKAGE_DESCRIPTION_SUMMARY "YUV library" )
|
||||
SET ( CPACK_PACKAGE_DESCRIPTION "YUV library and YUV conversion tool" )
|
||||
SET ( CPACK_DEBIAN_PACKAGE_SECTION "other" )
|
||||
SET ( CPACK_DEBIAN_PACKAGE_PRIORITY "optional" )
|
||||
SET ( CPACK_DEBIAN_PACKAGE_MAINTAINER "Frank Barchard <fbarchard@chromium.org>" )
|
||||
SET ( CPACK_GENERATOR "DEB;RPM" )
|
||||
|
||||
# create the .deb and .rpm files (you'll need build-essential and rpm tools)
|
||||
INCLUDE( CPack )
|
||||
|
||||
+51
-104
@@ -1,112 +1,45 @@
|
||||
cmake_minimum_required(VERSION 2.8)
|
||||
|
||||
# CMakeLists for libyuv
|
||||
# Originally created for "roxlu build system" to compile libyuv on windows
|
||||
# Run with -DTEST=ON to build unit tests
|
||||
option(TEST "Built unit tests" OFF)
|
||||
|
||||
set(ly_base_dir ${CMAKE_CURRENT_LIST_DIR})
|
||||
set(ly_src_dir ${ly_base_dir}/source/)
|
||||
set(ly_inc_dir ${ly_base_dir}/include)
|
||||
set(ly_lib_name "yuv")
|
||||
PROJECT ( YUV C CXX ) # "C" is required even for C++ projects
|
||||
CMAKE_MINIMUM_REQUIRED( VERSION 2.8 )
|
||||
OPTION( TEST "Built unit tests" OFF )
|
||||
|
||||
set(ly_source_files
|
||||
${ly_src_dir}/compare.cc
|
||||
${ly_src_dir}/compare_common.cc
|
||||
${ly_src_dir}/compare_neon.cc
|
||||
${ly_src_dir}/compare_neon64.cc
|
||||
${ly_src_dir}/compare_gcc.cc
|
||||
${ly_src_dir}/compare_win.cc
|
||||
${ly_src_dir}/convert.cc
|
||||
${ly_src_dir}/convert_argb.cc
|
||||
${ly_src_dir}/convert_from.cc
|
||||
${ly_src_dir}/convert_from_argb.cc
|
||||
${ly_src_dir}/convert_jpeg.cc
|
||||
${ly_src_dir}/convert_to_argb.cc
|
||||
${ly_src_dir}/convert_to_i420.cc
|
||||
${ly_src_dir}/cpu_id.cc
|
||||
${ly_src_dir}/mjpeg_decoder.cc
|
||||
${ly_src_dir}/mjpeg_validate.cc
|
||||
${ly_src_dir}/planar_functions.cc
|
||||
${ly_src_dir}/rotate.cc
|
||||
${ly_src_dir}/rotate_any.cc
|
||||
${ly_src_dir}/rotate_argb.cc
|
||||
${ly_src_dir}/rotate_common.cc
|
||||
${ly_src_dir}/rotate_mips.cc
|
||||
${ly_src_dir}/rotate_neon.cc
|
||||
${ly_src_dir}/rotate_neon64.cc
|
||||
${ly_src_dir}/rotate_gcc.cc
|
||||
${ly_src_dir}/rotate_win.cc
|
||||
${ly_src_dir}/row_any.cc
|
||||
${ly_src_dir}/row_common.cc
|
||||
${ly_src_dir}/row_mips.cc
|
||||
${ly_src_dir}/row_msa.cc
|
||||
${ly_src_dir}/row_neon.cc
|
||||
${ly_src_dir}/row_neon64.cc
|
||||
${ly_src_dir}/row_gcc.cc
|
||||
${ly_src_dir}/row_win.cc
|
||||
${ly_src_dir}/scale.cc
|
||||
${ly_src_dir}/scale_any.cc
|
||||
${ly_src_dir}/scale_argb.cc
|
||||
${ly_src_dir}/scale_common.cc
|
||||
${ly_src_dir}/scale_mips.cc
|
||||
${ly_src_dir}/scale_neon.cc
|
||||
${ly_src_dir}/scale_neon64.cc
|
||||
${ly_src_dir}/scale_gcc.cc
|
||||
${ly_src_dir}/scale_win.cc
|
||||
${ly_src_dir}/video_common.cc
|
||||
)
|
||||
SET ( ly_base_dir ${PROJECT_SOURCE_DIR} )
|
||||
SET ( ly_src_dir ${ly_base_dir}/source )
|
||||
SET ( ly_inc_dir ${ly_base_dir}/include )
|
||||
SET ( ly_tst_dir ${ly_base_dir}/unit_test )
|
||||
SET ( ly_lib_name yuv )
|
||||
SET ( ly_lib_static ${ly_lib_name} )
|
||||
SET ( ly_lib_shared ${ly_lib_name}_shared )
|
||||
|
||||
set(ly_unittest_sources
|
||||
${ly_base_dir}/unit_test/basictypes_test.cc
|
||||
${ly_base_dir}/unit_test/color_test.cc
|
||||
${ly_base_dir}/unit_test/compare_test.cc
|
||||
${ly_base_dir}/unit_test/convert_test.cc
|
||||
${ly_base_dir}/unit_test/cpu_test.cc
|
||||
${ly_base_dir}/unit_test/math_test.cc
|
||||
${ly_base_dir}/unit_test/planar_test.cc
|
||||
${ly_base_dir}/unit_test/rotate_argb_test.cc
|
||||
${ly_base_dir}/unit_test/rotate_test.cc
|
||||
${ly_base_dir}/unit_test/scale_argb_test.cc
|
||||
${ly_base_dir}/unit_test/scale_test.cc
|
||||
${ly_base_dir}/unit_test/unit_test.cc
|
||||
${ly_base_dir}/unit_test/video_common_test.cc
|
||||
)
|
||||
FILE ( GLOB_RECURSE ly_source_files ${ly_src_dir}/*.cc )
|
||||
LIST ( SORT ly_source_files )
|
||||
|
||||
set(ly_header_files
|
||||
${ly_inc_dir}/libyuv/basic_types.h
|
||||
${ly_inc_dir}/libyuv/compare.h
|
||||
${ly_inc_dir}/libyuv/convert.h
|
||||
${ly_inc_dir}/libyuv/convert_argb.h
|
||||
${ly_inc_dir}/libyuv/convert_from.h
|
||||
${ly_inc_dir}/libyuv/convert_from_argb.h
|
||||
${ly_inc_dir}/libyuv/cpu_id.h
|
||||
${ly_inc_dir}/libyuv/macros_msa.h
|
||||
${ly_inc_dir}/libyuv/planar_functions.h
|
||||
${ly_inc_dir}/libyuv/rotate.h
|
||||
${ly_inc_dir}/libyuv/rotate_argb.h
|
||||
${ly_inc_dir}/libyuv/rotate_row.h
|
||||
${ly_inc_dir}/libyuv/row.h
|
||||
${ly_inc_dir}/libyuv/scale.h
|
||||
${ly_inc_dir}/libyuv/scale_argb.h
|
||||
${ly_inc_dir}/libyuv/scale_row.h
|
||||
${ly_inc_dir}/libyuv/version.h
|
||||
${ly_inc_dir}/libyuv/video_common.h
|
||||
${ly_inc_dir}/libyuv/mjpeg_decoder.h
|
||||
)
|
||||
FILE ( GLOB_RECURSE ly_unittest_sources ${ly_tst_dir}/*.cc )
|
||||
LIST ( SORT ly_unittest_sources )
|
||||
|
||||
include_directories(${ly_inc_dir})
|
||||
INCLUDE_DIRECTORIES( BEFORE ${ly_inc_dir} )
|
||||
|
||||
add_library(${ly_lib_name} STATIC ${ly_source_files})
|
||||
# this creates the static library (.a)
|
||||
ADD_LIBRARY ( ${ly_lib_static} STATIC ${ly_source_files} )
|
||||
|
||||
add_executable(convert ${ly_base_dir}/util/convert.cc)
|
||||
target_link_libraries(convert ${ly_lib_name})
|
||||
# this creates the shared library (.so)
|
||||
ADD_LIBRARY ( ${ly_lib_shared} SHARED ${ly_source_files} )
|
||||
SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES OUTPUT_NAME "${ly_lib_name}" )
|
||||
SET_TARGET_PROPERTIES ( ${ly_lib_shared} PROPERTIES PREFIX "lib" )
|
||||
|
||||
include(FindJPEG)
|
||||
# this creates the conversion tool
|
||||
ADD_EXECUTABLE ( yuvconvert ${ly_base_dir}/util/yuvconvert.cc )
|
||||
TARGET_LINK_LIBRARIES ( yuvconvert ${ly_lib_static} )
|
||||
|
||||
|
||||
INCLUDE ( FindJPEG )
|
||||
if (JPEG_FOUND)
|
||||
include_directories(${JPEG_INCLUDE_DIR})
|
||||
target_link_libraries(convert ${JPEG_LIBRARY})
|
||||
add_definitions(-DHAVE_JPEG)
|
||||
include_directories( ${JPEG_INCLUDE_DIR} )
|
||||
target_link_libraries( yuvconvert ${JPEG_LIBRARY} )
|
||||
add_definitions( -DHAVE_JPEG )
|
||||
endif()
|
||||
|
||||
if(TEST)
|
||||
@@ -126,19 +59,33 @@ if(TEST)
|
||||
endif()
|
||||
|
||||
add_executable(libyuv_unittest ${ly_unittest_sources})
|
||||
target_link_libraries(libyuv_unittest ${ly_lib_name} ${GTEST_LIBRARY} pthread)
|
||||
target_link_libraries(libyuv_unittest ${ly_lib_name} ${GTEST_LIBRARY})
|
||||
find_library(PTHREAD_LIBRARY pthread)
|
||||
if(NOT PTHREAD_LIBRARY STREQUAL "PTHREAD_LIBRARY-NOTFOUND")
|
||||
target_link_libraries(libyuv_unittest pthread)
|
||||
endif()
|
||||
if (JPEG_FOUND)
|
||||
target_link_libraries(libyuv_unittest ${JPEG_LIBRARY})
|
||||
endif()
|
||||
|
||||
|
||||
if(NACL AND NACL_LIBC STREQUAL "newlib")
|
||||
target_link_libraries(libyuv_unittest glibc-compat)
|
||||
endif()
|
||||
|
||||
target_link_libraries(libyuv_unittest gflags)
|
||||
|
||||
find_library(GFLAGS_LIBRARY gflags)
|
||||
if(NOT GFLAGS_LIBRARY STREQUAL "GFLAGS_LIBRARY-NOTFOUND")
|
||||
target_link_libraries(libyuv_unittest gflags)
|
||||
add_definitions(-DLIBYUV_USE_GFLAGS)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
install(TARGETS ${ly_lib_name} DESTINATION lib)
|
||||
install(FILES ${ly_header_files} DESTINATION include/libyuv)
|
||||
install(FILES ${ly_inc_dir}/libyuv.h DESTINATION include/)
|
||||
|
||||
# install the conversion tool, .so, .a, and all the header files
|
||||
INSTALL ( PROGRAMS ${CMAKE_BINARY_DIR}/yuvconvert DESTINATION bin )
|
||||
INSTALL ( TARGETS ${ly_lib_static} DESTINATION lib )
|
||||
INSTALL ( TARGETS ${ly_lib_shared} LIBRARY DESTINATION lib RUNTIME DESTINATION bin )
|
||||
INSTALL ( DIRECTORY ${PROJECT_SOURCE_DIR}/include/ DESTINATION include )
|
||||
|
||||
# create the .deb and .rpm packages using cpack
|
||||
INCLUDE ( CM_linux_packages.cmake )
|
||||
|
||||
|
||||
+1007
-28
File diff suppressed because it is too large
Load Diff
@@ -1,8 +0,0 @@
|
||||
This source tree contains third party source code which is governed by third
|
||||
party licenses. This file contains references to files which are under other
|
||||
licenses than the one provided in the LICENSE file in the root of the source
|
||||
tree.
|
||||
|
||||
Files governed by third party licenses:
|
||||
source/x86inc.asm
|
||||
|
||||
+2
-7
@@ -1,13 +1,8 @@
|
||||
fbarchard@chromium.org
|
||||
magjed@chromium.org
|
||||
torbjorng@chromium.org
|
||||
|
||||
per-file *.gyp=kjellander@chromium.org
|
||||
per-file *.gn=kjellander@chromium.org
|
||||
per-file *.gn=phoglund@chromium.org
|
||||
per-file .gitignore=*
|
||||
per-file AUTHORS=*
|
||||
per-file DEPS=*
|
||||
per-file PRESUBMIT.py=kjellander@chromium.org
|
||||
per-file gyp_libyuv.py=kjellander@chromium.org
|
||||
per-file setup_links.py=*
|
||||
per-file sync_chromium.py=kjellander@chromium.org
|
||||
per-file PRESUBMIT.py=phoglund@chromium.org
|
||||
|
||||
Executable → Regular
+61
-54
@@ -1,4 +1,4 @@
|
||||
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
|
||||
# Copyright 2017 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
@@ -6,60 +6,67 @@
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
|
||||
|
||||
def GetDefaultTryConfigs(bots=None):
|
||||
"""Returns a list of ('bot', set(['tests']), optionally filtered by [bots].
|
||||
def _RunPythonTests(input_api, output_api):
|
||||
def join(*args):
|
||||
return input_api.os_path.join(input_api.PresubmitLocalPath(), *args)
|
||||
|
||||
For WebRTC purposes, we always return an empty list of tests, since we want
|
||||
to run all tests by default on all our trybots.
|
||||
"""
|
||||
return { 'tryserver.libyuv': dict((bot, []) for bot in bots)}
|
||||
|
||||
|
||||
# pylint: disable=W0613
|
||||
def GetPreferredTryMasters(project, change):
|
||||
files = change.LocalPaths()
|
||||
bots = [
|
||||
'win',
|
||||
'win_rel',
|
||||
'win_x64_rel',
|
||||
'win_x64_gn',
|
||||
'win_x64_gn_rel',
|
||||
'win_clang',
|
||||
'win_clang_rel',
|
||||
'win_x64_clang_rel',
|
||||
'mac',
|
||||
'mac_rel',
|
||||
'mac_gn',
|
||||
'mac_gn_rel',
|
||||
'mac_asan',
|
||||
'ios',
|
||||
'ios_rel',
|
||||
'ios_arm64',
|
||||
'ios_arm64_rel',
|
||||
'linux',
|
||||
'linux_rel',
|
||||
'linux_gn',
|
||||
'linux_gn_rel',
|
||||
'linux_memcheck',
|
||||
'linux_tsan2',
|
||||
'linux_asan',
|
||||
'linux_msan',
|
||||
'linux_ubsan',
|
||||
'linux_ubsan_vptr',
|
||||
'android',
|
||||
'android_rel',
|
||||
'android_clang',
|
||||
'android_arm64',
|
||||
'android_mips',
|
||||
'android_x64',
|
||||
'android_x86',
|
||||
'android_gn',
|
||||
'android_gn_rel',
|
||||
test_directories = [
|
||||
root for root, _, files in os.walk(join('tools_libyuv'))
|
||||
if any(f.endswith('_test.py') for f in files)
|
||||
]
|
||||
if not files or all(re.search(r'[\\/]OWNERS$', f) for f in files):
|
||||
return {}
|
||||
return GetDefaultTryConfigs(bots)
|
||||
|
||||
tests = []
|
||||
for directory in test_directories:
|
||||
tests.extend(
|
||||
input_api.canned_checks.GetUnitTestsInDirectory(
|
||||
input_api,
|
||||
output_api,
|
||||
directory,
|
||||
whitelist=[r'.+_test\.py$']))
|
||||
return input_api.RunTests(tests, parallel=True)
|
||||
|
||||
|
||||
def _CommonChecks(input_api, output_api):
|
||||
"""Checks common to both upload and commit."""
|
||||
results = []
|
||||
results.extend(input_api.canned_checks.RunPylint(input_api, output_api,
|
||||
black_list=(r'^base[\\\/].*\.py$',
|
||||
r'^build[\\\/].*\.py$',
|
||||
r'^buildtools[\\\/].*\.py$',
|
||||
r'^ios[\\\/].*\.py$',
|
||||
r'^out.*[\\\/].*\.py$',
|
||||
r'^testing[\\\/].*\.py$',
|
||||
r'^third_party[\\\/].*\.py$',
|
||||
r'^tools[\\\/].*\.py$',
|
||||
# TODO(kjellander): should arguably be checked.
|
||||
r'^tools_libyuv[\\\/]valgrind[\\\/].*\.py$',
|
||||
r'^xcodebuild.*[\\\/].*\.py$',),
|
||||
disabled_warnings=['F0401', # Failed to import x
|
||||
'E0611', # No package y in x
|
||||
'W0232', # Class has no __init__ method
|
||||
],
|
||||
pylintrc='pylintrc'))
|
||||
results.extend(_RunPythonTests(input_api, output_api))
|
||||
return results
|
||||
|
||||
|
||||
def CheckChangeOnUpload(input_api, output_api):
|
||||
results = []
|
||||
results.extend(_CommonChecks(input_api, output_api))
|
||||
results.extend(
|
||||
input_api.canned_checks.CheckGNFormatted(input_api, output_api))
|
||||
return results
|
||||
|
||||
|
||||
def CheckChangeOnCommit(input_api, output_api):
|
||||
results = []
|
||||
results.extend(_CommonChecks(input_api, output_api))
|
||||
results.extend(input_api.canned_checks.CheckOwners(input_api, output_api))
|
||||
results.extend(input_api.canned_checks.CheckChangeWasUploaded(
|
||||
input_api, output_api))
|
||||
results.extend(input_api.canned_checks.CheckChangeHasDescription(
|
||||
input_api, output_api))
|
||||
return results
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
Name: libyuv
|
||||
URL: http://code.google.com/p/libyuv/
|
||||
Version: 1620
|
||||
Version: 1724
|
||||
License: BSD
|
||||
License File: LICENSE
|
||||
|
||||
|
||||
@@ -1,18 +1,18 @@
|
||||
**libyuv** is an open source project that includes YUV scaling and conversion functionality.
|
||||
|
||||
* Scale YUV to prepare content for compression, with point, bilinear or box filter.
|
||||
* Convert to YUV from webcam formats.
|
||||
* Convert from YUV to formats for rendering/effects.
|
||||
* Convert to YUV from webcam formats for compression.
|
||||
* Convert to RGB formats for rendering/effects.
|
||||
* Rotate by 90/180/270 degrees to adjust for mobile devices in portrait mode.
|
||||
* Optimized for SSE2/SSSE3/AVX2 on x86/x64.
|
||||
* Optimized for SSSE3/AVX2 on x86/x64.
|
||||
* Optimized for Neon on Arm.
|
||||
* Optimized for DSP R2 on Mips.
|
||||
* Optimized for MSA on Mips.
|
||||
|
||||
### Development
|
||||
|
||||
See [Getting started] [1] for instructions on how to get started developing.
|
||||
See [Getting started][1] for instructions on how to get started developing.
|
||||
|
||||
You can also browse the [docs directory] [2] for more documentation.
|
||||
You can also browse the [docs directory][2] for more documentation.
|
||||
|
||||
[1]: https://chromium.googlesource.com/libyuv/libyuv/+/master/docs/getting_started.md
|
||||
[2]: https://chromium.googlesource.com/libyuv/libyuv/+/master/docs/
|
||||
[1]: ./docs/getting_started.md
|
||||
[2]: ./docs/
|
||||
|
||||
@@ -6,13 +6,8 @@
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
# Using same overrides as WebRTC
|
||||
# See https://bugs.chromium.org/p/webrtc/issues/detail?id=5453.
|
||||
# Some WebRTC targets require the 10.7 deployment version of the Mac SDK and a
|
||||
# 10.11 min SDK but those targets are only used in non-Chromium builds. We can
|
||||
# remove this when Chromium drops 10.6 support and also requires 10.7.
|
||||
mac_sdk_min_build_override = "10.11"
|
||||
mac_deployment_target_build_override = "10.7"
|
||||
# Some non-Chromium builds don't use Chromium's third_party/binutils.
|
||||
linux_use_bundled_binutils_override = true
|
||||
|
||||
# Variable that can be used to support multiple build scenarios, like having
|
||||
# Chromium specific targets in a client project's GN file etc.
|
||||
@@ -20,3 +15,32 @@ build_with_chromium = false
|
||||
|
||||
# Some non-Chromium builds don't support building java targets.
|
||||
enable_java_templates = true
|
||||
|
||||
# Allow using custom suppressions files (currently not used by libyuv).
|
||||
asan_suppressions_file = "//build/sanitizers/asan_suppressions.cc"
|
||||
lsan_suppressions_file = "//build/sanitizers/lsan_suppressions.cc"
|
||||
tsan_suppressions_file = "//build/sanitizers/tsan_suppressions.cc"
|
||||
|
||||
msan_blacklist_path =
|
||||
rebase_path("//tools_libyuv/msan/blacklist.txt", root_build_dir)
|
||||
ubsan_blacklist_path =
|
||||
rebase_path("//tools_libyuv/ubsan/blacklist.txt", root_build_dir)
|
||||
ubsan_vptr_blacklist_path =
|
||||
rebase_path("//tools_libyuv/ubsan/vptr_blacklist.txt", root_build_dir)
|
||||
|
||||
# For Chromium, Android 32-bit non-component, non-clang builds hit a 4GiB size
|
||||
# limit, making them requiring symbol_level=2. WebRTC doesn't hit that problem
|
||||
# so we just ignore that assert. See https://crbug.com/648948 for more info.
|
||||
ignore_elf32_limitations = true
|
||||
|
||||
# Use bundled hermetic Xcode installation maintained by Chromium,
|
||||
# except for local iOS builds where it is unsupported.
|
||||
if (host_os == "mac") {
|
||||
_result = exec_script("//build/mac/should_use_hermetic_xcode.py",
|
||||
[ target_os ],
|
||||
"value")
|
||||
assert(_result != 2,
|
||||
"Do not allow building targets with the default" +
|
||||
"hermetic toolchain if the minimum OS version is not met.")
|
||||
use_system_xcode = _result == 0
|
||||
}
|
||||
|
||||
@@ -1,20 +0,0 @@
|
||||
solutions = [{
|
||||
'name': 'src',
|
||||
'url': 'https://chromium.googlesource.com/chromium/src.git',
|
||||
'deps_file': '.DEPS.git',
|
||||
'managed': False,
|
||||
'custom_deps': {
|
||||
# Skip syncing some large dependencies Libyuv will never need.
|
||||
'src/third_party/cld_2/src': None,
|
||||
'src/third_party/ffmpeg': None,
|
||||
'src/third_party/hunspell_dictionaries': None,
|
||||
'src/third_party/liblouis/src': None,
|
||||
'src/third_party/pdfium': None,
|
||||
'src/third_party/skia': None,
|
||||
'src/third_party/trace-viewer': None,
|
||||
'src/third_party/webrtc': None,
|
||||
},
|
||||
'safesync_url': ''
|
||||
}]
|
||||
|
||||
cache_dir = None
|
||||
@@ -1,5 +0,0 @@
|
||||
This .gclient file is used to do download a copy of Chromium.
|
||||
Libyuv uses the Chromium build toolchain and a number of shared
|
||||
dependencies by creating symlinks to folders in this checkout,
|
||||
using the ../setup_links.py script.
|
||||
|
||||
Executable
+107
@@ -0,0 +1,107 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright 2017 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
# This is a copy of the file from WebRTC in:
|
||||
# https://chromium.googlesource.com/external/webrtc/+/master/cleanup_links.py
|
||||
|
||||
"""Script to cleanup symlinks created from setup_links.py.
|
||||
|
||||
Before 177567c518b121731e507e9b9c4049c4dc96e4c8 (#15754) we had a Chromium
|
||||
checkout which we created symlinks into. In order to do clean syncs after
|
||||
landing that change, this script cleans up any old symlinks, avoiding annoying
|
||||
manual cleanup needed in order to complete gclient sync.
|
||||
"""
|
||||
|
||||
import logging
|
||||
import optparse
|
||||
import os
|
||||
import shelve
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
LINKS_DB = 'links'
|
||||
|
||||
# Version management to make future upgrades/downgrades easier to support.
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
class WebRTCLinkSetup(object):
|
||||
def __init__(self, links_db, dry_run=False):
|
||||
self._dry_run = dry_run
|
||||
self._links_db = links_db
|
||||
|
||||
def CleanupLinks(self):
|
||||
logging.debug('CleanupLinks')
|
||||
for source, link_path in self._links_db.iteritems():
|
||||
if source == 'SCHEMA_VERSION':
|
||||
continue
|
||||
if os.path.islink(link_path) or sys.platform.startswith('win'):
|
||||
# os.path.islink() always returns false on Windows
|
||||
# See http://bugs.python.org/issue13143.
|
||||
logging.debug('Removing link to %s at %s', source, link_path)
|
||||
if not self._dry_run:
|
||||
if os.path.exists(link_path):
|
||||
if sys.platform.startswith('win') and os.path.isdir(link_path):
|
||||
subprocess.check_call(['rmdir', '/q', '/s', link_path],
|
||||
shell=True)
|
||||
else:
|
||||
os.remove(link_path)
|
||||
del self._links_db[source]
|
||||
|
||||
|
||||
def _initialize_database(filename):
|
||||
links_database = shelve.open(filename)
|
||||
# Wipe the database if this version of the script ends up looking at a
|
||||
# newer (future) version of the links db, just to be sure.
|
||||
version = links_database.get('SCHEMA_VERSION')
|
||||
if version and version != SCHEMA_VERSION:
|
||||
logging.info('Found database with schema version %s while this script only '
|
||||
'supports %s. Wiping previous database contents.', version,
|
||||
SCHEMA_VERSION)
|
||||
links_database.clear()
|
||||
links_database['SCHEMA_VERSION'] = SCHEMA_VERSION
|
||||
return links_database
|
||||
|
||||
|
||||
def main():
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option('-d', '--dry-run', action='store_true', default=False,
|
||||
help='Print what would be done, but don\'t perform any '
|
||||
'operations. This will automatically set logging to '
|
||||
'verbose.')
|
||||
parser.add_option('-v', '--verbose', action='store_const',
|
||||
const=logging.DEBUG, default=logging.INFO,
|
||||
help='Print verbose output for debugging.')
|
||||
options, _ = parser.parse_args()
|
||||
|
||||
if options.dry_run:
|
||||
options.verbose = logging.DEBUG
|
||||
logging.basicConfig(format='%(message)s', level=options.verbose)
|
||||
|
||||
# Work from the root directory of the checkout.
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
os.chdir(script_dir)
|
||||
|
||||
# The database file gets .db appended on some platforms.
|
||||
db_filenames = [LINKS_DB, LINKS_DB + '.db']
|
||||
if any(os.path.isfile(f) for f in db_filenames):
|
||||
links_database = _initialize_database(LINKS_DB)
|
||||
try:
|
||||
symlink_creator = WebRTCLinkSetup(links_database, options.dry_run)
|
||||
symlink_creator.CleanupLinks()
|
||||
finally:
|
||||
for f in db_filenames:
|
||||
if os.path.isfile(f):
|
||||
os.remove(f)
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
@@ -1,11 +1,6 @@
|
||||
# This file is used by gcl to get repository specific information.
|
||||
# This file is used by git cl to get repository specific information.
|
||||
CODE_REVIEW_SERVER: codereview.chromium.org
|
||||
#CC_LIST:
|
||||
VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/
|
||||
#STATUS:
|
||||
FORCE_HTTPS_COMMIT_URL: True
|
||||
GERRIT_HOST: True
|
||||
PROJECT: libyuv
|
||||
TRY_ON_UPLOAD: False
|
||||
TRYSERVER_ROOT: src
|
||||
#GITCL_PREUPLOAD:
|
||||
#GITCL_PREDCOMMIT:
|
||||
VIEW_VC: https://chromium.googlesource.com/libyuv/libyuv/+/
|
||||
|
||||
@@ -0,0 +1,440 @@
|
||||
# Deprecated Builds
|
||||
|
||||
Older documentation on build configs which are no longer supported.
|
||||
|
||||
## Pre-requisites
|
||||
|
||||
You'll need to have depot tools installed: https://www.chromium.org/developers/how-tos/install-depot-tools
|
||||
Refer to chromium instructions for each platform for other prerequisites.
|
||||
|
||||
## Getting the Code
|
||||
|
||||
Create a working directory, enter it, and run:
|
||||
|
||||
gclient config https://chromium.googlesource.com/libyuv/libyuv
|
||||
gclient sync
|
||||
|
||||
|
||||
Then you'll get a .gclient file like:
|
||||
|
||||
solutions = [
|
||||
{ "name" : "libyuv",
|
||||
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
|
||||
"deps_file" : "DEPS",
|
||||
"managed" : True,
|
||||
"custom_deps" : {
|
||||
},
|
||||
"safesync_url": "",
|
||||
},
|
||||
];
|
||||
|
||||
|
||||
For iOS add `;target_os=['ios'];` to your OSX .gclient and run `GYP_DEFINES="OS=ios" gclient sync.`
|
||||
|
||||
Browse the Git reprository: https://chromium.googlesource.com/libyuv/libyuv/+/master
|
||||
|
||||
### Android
|
||||
For Android add `;target_os=['android'];` to your Linux .gclient
|
||||
|
||||
|
||||
solutions = [
|
||||
{ "name" : "libyuv",
|
||||
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
|
||||
"deps_file" : "DEPS",
|
||||
"managed" : True,
|
||||
"custom_deps" : {
|
||||
},
|
||||
"safesync_url": "",
|
||||
},
|
||||
];
|
||||
target_os = ["android", "unix"];
|
||||
|
||||
Then run:
|
||||
|
||||
export GYP_DEFINES="OS=android"
|
||||
gclient sync
|
||||
|
||||
Caveat: Theres an error with Google Play services updates. If you get the error "Your version of the Google Play services library is not up to date", run the following:
|
||||
|
||||
cd chromium/src
|
||||
./build/android/play_services/update.py download
|
||||
cd ../..
|
||||
|
||||
For Windows the gclient sync must be done from an Administrator command prompt.
|
||||
|
||||
The sync will generate native build files for your environment using gyp (Windows: Visual Studio, OSX: XCode, Linux: make). This generation can also be forced manually: `gclient runhooks`
|
||||
|
||||
To get just the source (not buildable):
|
||||
|
||||
git clone https://chromium.googlesource.com/libyuv/libyuv
|
||||
|
||||
|
||||
## Building the Library and Unittests
|
||||
|
||||
### Windows
|
||||
|
||||
set GYP_DEFINES=target_arch=ia32
|
||||
call python gyp_libyuv -fninja -G msvs_version=2013
|
||||
ninja -j7 -C out\Release
|
||||
ninja -j7 -C out\Debug
|
||||
|
||||
set GYP_DEFINES=target_arch=x64
|
||||
call python gyp_libyuv -fninja -G msvs_version=2013
|
||||
ninja -C out\Debug_x64
|
||||
ninja -C out\Release_x64
|
||||
|
||||
#### Building with clangcl
|
||||
set GYP_DEFINES=clang=1 target_arch=ia32
|
||||
call python tools\clang\scripts\update.py
|
||||
call python gyp_libyuv -fninja libyuv_test.gyp
|
||||
ninja -C out\Debug
|
||||
ninja -C out\Release
|
||||
|
||||
### OSX
|
||||
|
||||
Clang 64 bit shown. Remove `clang=1` for GCC and change x64 to ia32 for 32 bit.
|
||||
|
||||
GYP_DEFINES="clang=1 target_arch=x64" ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug
|
||||
ninja -j7 -C out/Release
|
||||
|
||||
GYP_DEFINES="clang=1 target_arch=ia32" ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug
|
||||
ninja -j7 -C out/Release
|
||||
|
||||
### iOS
|
||||
http://www.chromium.org/developers/how-tos/build-instructions-ios
|
||||
|
||||
Add to .gclient last line: `target_os=['ios'];`
|
||||
|
||||
armv7
|
||||
|
||||
GYP_DEFINES="OS=ios target_arch=armv7 target_subarch=arm32" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
|
||||
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
|
||||
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
|
||||
|
||||
arm64
|
||||
|
||||
GYP_DEFINES="OS=ios target_arch=arm64 target_subarch=arm64" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
|
||||
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
|
||||
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
|
||||
|
||||
both armv7 and arm64 (fat)
|
||||
|
||||
GYP_DEFINES="OS=ios target_arch=armv7 target_subarch=both" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
|
||||
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
|
||||
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
|
||||
|
||||
simulator
|
||||
|
||||
GYP_DEFINES="OS=ios target_arch=ia32 target_subarch=arm32" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_sim" ./gyp_libyuv
|
||||
ninja -j7 -C out_sim/Debug-iphonesimulator libyuv_unittest
|
||||
ninja -j7 -C out_sim/Release-iphonesimulator libyuv_unittest
|
||||
|
||||
### Android
|
||||
https://code.google.com/p/chromium/wiki/AndroidBuildInstructions
|
||||
|
||||
Add to .gclient last line: `target_os=['android'];`
|
||||
|
||||
armv7
|
||||
|
||||
GYP_DEFINES="OS=android" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
ninja -j7 -C out/Release yuv_unittest_apk
|
||||
|
||||
arm64
|
||||
|
||||
GYP_DEFINES="OS=android target_arch=arm64 target_subarch=arm64" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
ninja -j7 -C out/Release yuv_unittest_apk
|
||||
|
||||
ia32
|
||||
|
||||
GYP_DEFINES="OS=android target_arch=ia32" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
ninja -j7 -C out/Release yuv_unittest_apk
|
||||
|
||||
GYP_DEFINES="OS=android target_arch=ia32 android_full_debug=1" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
|
||||
mipsel
|
||||
|
||||
GYP_DEFINES="OS=android target_arch=mipsel" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
ninja -j7 -C out/Release yuv_unittest_apk
|
||||
|
||||
arm32 disassembly:
|
||||
|
||||
third_party/android_ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o
|
||||
|
||||
arm64 disassembly:
|
||||
|
||||
third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o
|
||||
|
||||
Running tests:
|
||||
|
||||
build/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=*
|
||||
|
||||
Running test as benchmark:
|
||||
|
||||
build/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* -a "--libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=-1"
|
||||
|
||||
Running test with C code:
|
||||
|
||||
build/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* -a "--libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=1 --libyuv_cpu_info=1"
|
||||
|
||||
#### Building with GN
|
||||
|
||||
gn gen out/Release "--args=is_debug=false target_cpu=\"x86\""
|
||||
gn gen out/Debug "--args=is_debug=true target_cpu=\"x86\""
|
||||
ninja -C out/Release
|
||||
ninja -C out/Debug
|
||||
|
||||
### Building Offical with GN
|
||||
|
||||
gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true"
|
||||
ninja -C out/Official
|
||||
|
||||
#### Building mips with GN
|
||||
|
||||
mipsel
|
||||
gn gen out/Default "--args=is_debug=false target_cpu=\"mipsel\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
|
||||
ninja -C out/Default
|
||||
|
||||
mips64el
|
||||
gn gen out/Default "--args=is_debug=false target_cpu=\"mips64el\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
|
||||
ninja -C out/Default
|
||||
|
||||
### Linux
|
||||
|
||||
GYP_DEFINES="target_arch=x64" ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug
|
||||
ninja -j7 -C out/Release
|
||||
|
||||
GYP_DEFINES="target_arch=ia32" ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug
|
||||
ninja -j7 -C out/Release
|
||||
|
||||
#### CentOS
|
||||
|
||||
On CentOS 32 bit the following work around allows a sync:
|
||||
|
||||
export GYP_DEFINES="host_arch=ia32"
|
||||
gclient sync
|
||||
|
||||
### Windows Shared Library
|
||||
|
||||
Modify libyuv.gyp from 'static_library' to 'shared_library', and add 'LIBYUV_BUILDING_SHARED_LIBRARY' to 'defines'.
|
||||
|
||||
gclient runhooks
|
||||
|
||||
After this command follow the building the library instructions above.
|
||||
|
||||
If you get a compile error for atlthunk.lib on Windows, read http://www.chromium.org/developers/how-tos/build-instructions-windows
|
||||
|
||||
|
||||
### Build targets
|
||||
|
||||
ninja -C out/Debug libyuv
|
||||
ninja -C out/Debug libyuv_unittest
|
||||
ninja -C out/Debug compare
|
||||
ninja -C out/Debug yuvconvert
|
||||
ninja -C out/Debug psnr
|
||||
ninja -C out/Debug cpuid
|
||||
|
||||
|
||||
## Building the Library with make
|
||||
|
||||
### Linux
|
||||
|
||||
make -j7 V=1 -f linux.mk
|
||||
make -j7 V=1 -f linux.mk clean
|
||||
make -j7 V=1 -f linux.mk CXX=clang++
|
||||
|
||||
## Building the Library with cmake
|
||||
|
||||
Install cmake: http://www.cmake.org/
|
||||
|
||||
Default debug build:
|
||||
|
||||
mkdir out
|
||||
cd out
|
||||
cmake ..
|
||||
cmake --build .
|
||||
|
||||
Release build/install
|
||||
|
||||
mkdir out
|
||||
cd out
|
||||
cmake -DCMAKE_INSTALL_PREFIX="/usr/lib" -DCMAKE_BUILD_TYPE="Release" ..
|
||||
cmake --build . --config Release
|
||||
sudo cmake --build . --target install --config Release
|
||||
|
||||
### Windows 8 Phone
|
||||
|
||||
Pre-requisite:
|
||||
|
||||
* Install Visual Studio 2012 and Arm to your environment.<br>
|
||||
|
||||
Then:
|
||||
|
||||
call "c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat"
|
||||
|
||||
or with Visual Studio 2013:
|
||||
|
||||
call "c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_arm\vcvarsx86_arm.bat"
|
||||
nmake /f winarm.mk clean
|
||||
nmake /f winarm.mk
|
||||
|
||||
### Windows Shared Library
|
||||
|
||||
Modify libyuv.gyp from 'static_library' to 'shared_library', and add 'LIBYUV_BUILDING_SHARED_LIBRARY' to 'defines'. Then run this.
|
||||
|
||||
gclient runhooks
|
||||
|
||||
After this command follow the building the library instructions above.
|
||||
|
||||
If you get a compile error for atlthunk.lib on Windows, read http://www.chromium.org/developers/how-tos/build-instructions-windows
|
||||
|
||||
### 64 bit Windows
|
||||
|
||||
set GYP_DEFINES=target_arch=x64
|
||||
gclient runhooks V=1
|
||||
|
||||
### ARM Linux
|
||||
|
||||
export GYP_DEFINES="target_arch=arm"
|
||||
export CROSSTOOL=`<path>`/arm-none-linux-gnueabi
|
||||
export CXX=$CROSSTOOL-g++
|
||||
export CC=$CROSSTOOL-gcc
|
||||
export AR=$CROSSTOOL-ar
|
||||
export AS=$CROSSTOOL-as
|
||||
export RANLIB=$CROSSTOOL-ranlib
|
||||
gclient runhooks
|
||||
|
||||
## Running Unittests
|
||||
|
||||
### Windows
|
||||
|
||||
out\Release\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter="*"
|
||||
|
||||
### OSX
|
||||
|
||||
out/Release/libyuv_unittest --gtest_filter="*"
|
||||
|
||||
### Linux
|
||||
|
||||
out/Release/libyuv_unittest --gtest_filter="*"
|
||||
|
||||
Replace --gtest_filter="*" with specific unittest to run. May include wildcards. e.g.
|
||||
|
||||
out/Release/libyuv_unittest --gtest_filter=libyuvTest.I420ToARGB_Opt
|
||||
|
||||
## CPU Emulator tools
|
||||
|
||||
### Intel SDE (Software Development Emulator)
|
||||
|
||||
Pre-requisite: Install IntelSDE for Windows: http://software.intel.com/en-us/articles/intel-software-development-emulator
|
||||
|
||||
Then run:
|
||||
|
||||
c:\intelsde\sde -hsw -- out\release\libyuv_unittest.exe --gtest_filter=*
|
||||
|
||||
|
||||
## Memory tools
|
||||
|
||||
### Running Dr Memory memcheck for Windows
|
||||
|
||||
Pre-requisite: Install Dr Memory for Windows and add it to your path: http://www.drmemory.org/docs/page_install_windows.html
|
||||
|
||||
set GYP_DEFINES=build_for_tool=drmemory target_arch=ia32
|
||||
call python gyp_libyuv -fninja -G msvs_version=2013
|
||||
ninja -C out\Debug
|
||||
drmemory out\Debug\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter=*
|
||||
|
||||
### Running UBSan
|
||||
|
||||
See Chromium instructions for sanitizers: https://www.chromium.org/developers/testing/undefinedbehaviorsanitizer
|
||||
|
||||
Sanitizers available: TSan, MSan, ASan, UBSan, LSan
|
||||
|
||||
GYP_DEFINES='ubsan=1' gclient runhooks
|
||||
ninja -C out/Release
|
||||
|
||||
### Running Valgrind memcheck
|
||||
|
||||
Memory errors and race conditions can be found by running tests under special memory tools. [Valgrind] [1] is an instrumentation framework for building dynamic analysis tools. Various tests and profilers are built upon it to find memory handling errors and memory leaks, for instance.
|
||||
|
||||
[1]: http://valgrind.org
|
||||
|
||||
solutions = [
|
||||
{ "name" : "libyuv",
|
||||
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
|
||||
"deps_file" : "DEPS",
|
||||
"managed" : True,
|
||||
"custom_deps" : {
|
||||
"libyuv/chromium/src/third_party/valgrind": "https://chromium.googlesource.com/chromium/deps/valgrind/binaries",
|
||||
},
|
||||
"safesync_url": "",
|
||||
},
|
||||
]
|
||||
|
||||
Then run:
|
||||
|
||||
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=memcheck" python gyp_libyuv
|
||||
ninja -C out/Debug
|
||||
valgrind out/Debug/libyuv_unittest
|
||||
|
||||
|
||||
For more information, see http://www.chromium.org/developers/how-tos/using-valgrind
|
||||
|
||||
### Running Thread Sanitizer (TSan)
|
||||
|
||||
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=tsan" python gyp_libyuv
|
||||
ninja -C out/Debug
|
||||
valgrind out/Debug/libyuv_unittest
|
||||
|
||||
For more info, see http://www.chromium.org/developers/how-tos/using-valgrind/threadsanitizer
|
||||
|
||||
### Running Address Sanitizer (ASan)
|
||||
|
||||
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=asan" python gyp_libyuv
|
||||
ninja -C out/Debug
|
||||
valgrind out/Debug/libyuv_unittest
|
||||
|
||||
For more info, see http://dev.chromium.org/developers/testing/addresssanitizer
|
||||
|
||||
## Benchmarking
|
||||
|
||||
The unittests can be used to benchmark.
|
||||
|
||||
### Windows
|
||||
|
||||
set LIBYUV_WIDTH=1280
|
||||
set LIBYUV_HEIGHT=720
|
||||
set LIBYUV_REPEAT=999
|
||||
set LIBYUV_FLAGS=-1
|
||||
out\Release\libyuv_unittest.exe --gtest_filter=*I420ToARGB_Opt
|
||||
|
||||
|
||||
### Linux and Mac
|
||||
|
||||
LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=1000 out/Release/libyuv_unittest --gtest_filter=*I420ToARGB_Opt
|
||||
|
||||
libyuvTest.I420ToARGB_Opt (547 ms)
|
||||
|
||||
Indicates 0.547 ms/frame for 1280 x 720.
|
||||
|
||||
## Making a change
|
||||
|
||||
gclient sync
|
||||
git checkout -b mycl -t origin/master
|
||||
git pull
|
||||
<edit files>
|
||||
git add -u
|
||||
git commit -m "my change"
|
||||
git cl lint
|
||||
git cl try
|
||||
git cl upload -r a-reviewer@chomium.org -s
|
||||
<once approved..>
|
||||
git cl land
|
||||
@@ -6,7 +6,10 @@ For test purposes, environment variables can be set to control libyuv behavior.
|
||||
|
||||
By default the cpu is detected and the most advanced form of SIMD is used. But you can disable instruction sets selectively, or completely, falling back on C code. Set the variable to 1 to disable the specified instruction set.
|
||||
|
||||
## All CPUs
|
||||
LIBYUV_DISABLE_ASM
|
||||
|
||||
## Intel CPUs
|
||||
LIBYUV_DISABLE_X86
|
||||
LIBYUV_DISABLE_SSE2
|
||||
LIBYUV_DISABLE_SSSE3
|
||||
@@ -14,12 +17,25 @@ By default the cpu is detected and the most advanced form of SIMD is used. But
|
||||
LIBYUV_DISABLE_SSE42
|
||||
LIBYUV_DISABLE_AVX
|
||||
LIBYUV_DISABLE_AVX2
|
||||
LIBYUV_DISABLE_AVX3
|
||||
LIBYUV_DISABLE_ERMS
|
||||
LIBYUV_DISABLE_FMA3
|
||||
LIBYUV_DISABLE_DSPR2
|
||||
LIBYUV_DISABLE_F16C
|
||||
LIBYUV_DISABLE_AVX512BW
|
||||
LIBYUV_DISABLE_AVX512VL
|
||||
LIBYUV_DISABLE_AVX512VBMI
|
||||
LIBYUV_DISABLE_AVX512VBMI2
|
||||
LIBYUV_DISABLE_AVX512VBITALG
|
||||
LIBYUV_DISABLE_AVX512VPOPCNTDQ
|
||||
LIBYUV_DISABLE_GFNI
|
||||
|
||||
## ARM CPUs
|
||||
|
||||
LIBYUV_DISABLE_NEON
|
||||
|
||||
## MIPS CPUs
|
||||
LIBYUV_DISABLE_MSA
|
||||
LIBYUV_DISABLE_MMI
|
||||
|
||||
# Test Width/Height/Repeat
|
||||
|
||||
The unittests default to a small image (128x72) to run fast. This can be set by environment variable to test a specific resolutions.
|
||||
|
||||
+39
-18
@@ -35,50 +35,46 @@ This is how OSX formats map to libyuv
|
||||
# FOURCC (Four Charactacter Code) List
|
||||
|
||||
The following is extracted from video_common.h as a complete list of formats supported by libyuv.
|
||||
|
||||
enum FourCC {
|
||||
// 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed.
|
||||
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
|
||||
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
|
||||
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
|
||||
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
|
||||
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
||||
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
|
||||
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
|
||||
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
|
||||
FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb
|
||||
|
||||
// 2 Secondary YUV formats: row biplanar.
|
||||
// 1 Secondary YUV format: row biplanar.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
|
||||
|
||||
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
|
||||
// 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
|
||||
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
|
||||
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
|
||||
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
|
||||
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
|
||||
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
|
||||
|
||||
// 4 Secondary RGB formats: 4 Bayer Patterns.
|
||||
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
|
||||
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
|
||||
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
|
||||
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
|
||||
|
||||
// 1 Primary Compressed YUV format.
|
||||
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
|
||||
|
||||
// 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
|
||||
// 8 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
|
||||
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
|
||||
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
|
||||
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
|
||||
FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420.
|
||||
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
|
||||
FOURCC_J400 = FOURCC('J', '4', '0', '0'),
|
||||
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
|
||||
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
|
||||
FOURCC_H422 = FOURCC('H', '4', '2', '2'), // unofficial fourcc
|
||||
|
||||
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
|
||||
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
|
||||
@@ -99,12 +95,9 @@ The following is extracted from video_common.h as a complete list of formats sup
|
||||
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
|
||||
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
|
||||
|
||||
// 1 Auxiliary compressed YUV format set aside for capturer.
|
||||
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
|
||||
|
||||
# Planar YUV
|
||||
The following formats contains a full size Y plane followed by 1 or 2
|
||||
planes for UV: I420, I422, I444, I411, I400, NV21, NV12, I400
|
||||
planes for UV: I420, I422, I444, I400, NV21, NV12, I400
|
||||
The size (subsampling) of the UV varies.
|
||||
I420, NV12 and NV21 are half width, half height
|
||||
I422, NV16 and NV61 are half width, full height
|
||||
@@ -140,3 +133,31 @@ Some are channel order agnostic (e.g. ARGBScale).
|
||||
Some functions are symmetric (e.g. ARGBToBGRA is the same as BGRAToARGB, so its a macro).
|
||||
|
||||
ARGBBlend expects preattenuated ARGB. The R,G,B are premultiplied by alpha. Other functions don't care.
|
||||
|
||||
# RGB24 and RAW
|
||||
|
||||
There are 2 RGB layouts - RGB24 (aka 24BG) and RAW
|
||||
|
||||
RGB24 is B,G,R in memory
|
||||
RAW is R,G,B in memory
|
||||
|
||||
# AR30 and XR30
|
||||
|
||||
AR30 is 2 10 10 10 ARGB stored in little endian order.
|
||||
The 2 bit alpha has 4 values. Here are the comparable 8 bit alpha values.
|
||||
0 - 0. 00000000b = 0x00 = 0
|
||||
1 - 33%. 01010101b = 0x55 = 85
|
||||
2 - 66%. 10101010b = 0xaa = 170
|
||||
3 - 100%. 11111111b = 0xff = 255
|
||||
The 10 bit RGB values range from 0 to 1023.
|
||||
XR30 is the same as AR30 but with no alpha channel.
|
||||
|
||||
# NV12 and NV21
|
||||
|
||||
NV12 is a biplanar format with a full sized Y plane followed by a single
|
||||
chroma plane with weaved U and V values.
|
||||
NV21 is the same but with weaved V and U values.
|
||||
The 12 in NV12 refers to 12 bits per pixel. NV12 has a half width and half
|
||||
height chroma channel, and therefore is a 420 subsampling.
|
||||
NV16 is 16 bits per pixel, with half width and full height. aka 422.
|
||||
NV24 is 24 bits per pixel with full sized chroma channel. aka 444.
|
||||
|
||||
+142
-298
@@ -11,14 +11,13 @@ Refer to chromium instructions for each platform for other prerequisites.
|
||||
|
||||
Create a working directory, enter it, and run:
|
||||
|
||||
gclient config https://chromium.googlesource.com/libyuv/libyuv
|
||||
gclient config --name src https://chromium.googlesource.com/libyuv/libyuv
|
||||
gclient sync
|
||||
|
||||
|
||||
Then you'll get a .gclient file like:
|
||||
|
||||
solutions = [
|
||||
{ "name" : "libyuv",
|
||||
{ "name" : "src",
|
||||
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
|
||||
"deps_file" : "DEPS",
|
||||
"managed" : True,
|
||||
@@ -28,17 +27,15 @@ Then you'll get a .gclient file like:
|
||||
},
|
||||
];
|
||||
|
||||
|
||||
For iOS add `;target_os=['ios'];` to your OSX .gclient and run `GYP_DEFINES="OS=ios" gclient sync.`
|
||||
For iOS add `;target_os=['ios'];` to your OSX .gclient and run `gclient sync.`
|
||||
|
||||
Browse the Git reprository: https://chromium.googlesource.com/libyuv/libyuv/+/master
|
||||
|
||||
### Android
|
||||
For Android add `;target_os=['android'];` to your Linux .gclient
|
||||
|
||||
|
||||
solutions = [
|
||||
{ "name" : "libyuv",
|
||||
{ "name" : "src",
|
||||
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
|
||||
"deps_file" : "DEPS",
|
||||
"managed" : True,
|
||||
@@ -47,23 +44,12 @@ For Android add `;target_os=['android'];` to your Linux .gclient
|
||||
"safesync_url": "",
|
||||
},
|
||||
];
|
||||
target_os = ["android", "unix"];
|
||||
target_os = ["android", "linux"];
|
||||
|
||||
Then run:
|
||||
|
||||
export GYP_DEFINES="OS=android"
|
||||
gclient sync
|
||||
|
||||
Caveat: Theres an error with Google Play services updates. If you get the error "Your version of the Google Play services library is not up to date", run the following:
|
||||
|
||||
cd chromium/src
|
||||
./build/android/play_services/update.py download
|
||||
cd ../..
|
||||
|
||||
For Windows the gclient sync must be done from an Administrator command prompt.
|
||||
|
||||
The sync will generate native build files for your environment using gyp (Windows: Visual Studio, OSX: XCode, Linux: make). This generation can also be forced manually: `gclient runhooks`
|
||||
|
||||
To get just the source (not buildable):
|
||||
|
||||
git clone https://chromium.googlesource.com/libyuv/libyuv
|
||||
@@ -73,196 +59,151 @@ To get just the source (not buildable):
|
||||
|
||||
### Windows
|
||||
|
||||
set GYP_DEFINES=target_arch=ia32
|
||||
call python gyp_libyuv -fninja -G msvs_version=2013
|
||||
ninja -j7 -C out\Release
|
||||
ninja -j7 -C out\Debug
|
||||
call gn gen out\Release "--args=is_debug=false target_cpu=\"x64\""
|
||||
call gn gen out\Debug "--args=is_debug=true target_cpu=\"x64\""
|
||||
ninja -v -C out\Release
|
||||
ninja -v -C out\Debug
|
||||
|
||||
set GYP_DEFINES=target_arch=x64
|
||||
call python gyp_libyuv -fninja -G msvs_version=2013
|
||||
ninja -C out\Debug_x64
|
||||
ninja -C out\Release_x64
|
||||
call gn gen out\Release "--args=is_debug=false target_cpu=\"x86\""
|
||||
call gn gen out\Debug "--args=is_debug=true target_cpu=\"x86\""
|
||||
ninja -v -C out\Release
|
||||
ninja -v -C out\Debug
|
||||
|
||||
#### Building with clangcl
|
||||
set GYP_DEFINES=clang=1 target_arch=ia32
|
||||
call python tools\clang\scripts\update.py
|
||||
call python gyp_libyuv -fninja libyuv_test.gyp
|
||||
ninja -C out\Debug
|
||||
ninja -C out\Release
|
||||
### macOS and Linux
|
||||
|
||||
### OSX
|
||||
|
||||
Clang 64 bit shown. Remove `clang=1` for GCC and change x64 to ia32 for 32 bit.
|
||||
|
||||
GYP_DEFINES="clang=1 target_arch=x64" ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug
|
||||
ninja -j7 -C out/Release
|
||||
|
||||
GYP_DEFINES="clang=1 target_arch=ia32" ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug
|
||||
ninja -j7 -C out/Release
|
||||
|
||||
### iOS
|
||||
http://www.chromium.org/developers/how-tos/build-instructions-ios
|
||||
|
||||
Add to .gclient last line: `target_os=['ios'];`
|
||||
|
||||
armv7
|
||||
|
||||
GYP_DEFINES="OS=ios target_arch=armv7 target_subarch=arm32" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
|
||||
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
|
||||
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
|
||||
|
||||
arm64
|
||||
|
||||
GYP_DEFINES="OS=ios target_arch=arm64 target_subarch=arm64" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
|
||||
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
|
||||
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
|
||||
|
||||
both armv7 and arm64 (fat)
|
||||
|
||||
GYP_DEFINES="OS=ios target_arch=armv7 target_subarch=both" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_ios" ./gyp_libyuv
|
||||
ninja -j7 -C out_ios/Debug-iphoneos libyuv_unittest
|
||||
ninja -j7 -C out_ios/Release-iphoneos libyuv_unittest
|
||||
|
||||
simulator
|
||||
|
||||
GYP_DEFINES="OS=ios target_arch=ia32 target_subarch=arm32" GYP_CROSSCOMPILE=1 GYP_GENERATOR_FLAGS="output_dir=out_sim" ./gyp_libyuv
|
||||
ninja -j7 -C out_sim/Debug-iphonesimulator libyuv_unittest
|
||||
ninja -j7 -C out_sim/Release-iphonesimulator libyuv_unittest
|
||||
|
||||
### Android
|
||||
https://code.google.com/p/chromium/wiki/AndroidBuildInstructions
|
||||
|
||||
Add to .gclient last line: `target_os=['android'];`
|
||||
|
||||
armv7
|
||||
|
||||
GYP_DEFINES="OS=android" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
ninja -j7 -C out/Release yuv_unittest_apk
|
||||
|
||||
arm64
|
||||
|
||||
GYP_DEFINES="OS=android target_arch=arm64 target_subarch=arm64" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
ninja -j7 -C out/Release yuv_unittest_apk
|
||||
|
||||
ia32
|
||||
|
||||
GYP_DEFINES="OS=android target_arch=ia32" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
ninja -j7 -C out/Release yuv_unittest_apk
|
||||
|
||||
GYP_DEFINES="OS=android target_arch=ia32 android_full_debug=1" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
|
||||
mipsel
|
||||
|
||||
GYP_DEFINES="OS=android target_arch=mipsel" GYP_CROSSCOMPILE=1 ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug yuv_unittest_apk
|
||||
ninja -j7 -C out/Release yuv_unittest_apk
|
||||
|
||||
arm32 disassembly:
|
||||
|
||||
third_party/android_tools/ndk/toolchains/arm-linux-androideabi-4.9/prebuilt/linux-x86_64/bin/arm-linux-androideabi-objdump -d out/Release/obj/source/libyuv.row_neon.o
|
||||
|
||||
arm64 disassembly:
|
||||
|
||||
third_party/android_tools/ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d out/Release/obj/source/libyuv.row_neon64.o
|
||||
|
||||
Running tests:
|
||||
|
||||
util/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=*
|
||||
|
||||
Running test as benchmark:
|
||||
|
||||
util/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* -a "--libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=-1"
|
||||
|
||||
Running test with C code:
|
||||
|
||||
util/android/test_runner.py gtest -s libyuv_unittest -t 7200 --verbose --release --gtest_filter=* -a "--libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=1 --libyuv_cpu_info=1"
|
||||
|
||||
#### Building with GN
|
||||
|
||||
gn gen out/Release "--args=is_debug=false target_cpu=\"x86\""
|
||||
gn gen out/Debug "--args=is_debug=true target_cpu=\"x86\""
|
||||
ninja -C out/Release
|
||||
ninja -C out/Debug
|
||||
gn gen out/Release "--args=is_debug=false"
|
||||
gn gen out/Debug "--args=is_debug=true"
|
||||
ninja -v -C out/Release
|
||||
ninja -v -C out/Debug
|
||||
|
||||
### Building Offical with GN
|
||||
|
||||
gn gen out/Official "--args=is_debug=false is_official_build=true is_chrome_branded=true"
|
||||
ninja -C out/Official
|
||||
|
||||
#### Building mips with GN
|
||||
### iOS
|
||||
http://www.chromium.org/developers/how-tos/build-instructions-ios
|
||||
|
||||
mipsel
|
||||
gn gen out/Default "--args=is_debug=false target_cpu=\"mipsel\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
|
||||
ninja -C out/Default
|
||||
Add to .gclient last line: `target_os=['ios'];`
|
||||
|
||||
mips64el
|
||||
gn gen out/Default "--args=is_debug=false target_cpu=\"mips64el\" target_os = \"android\" mips_arch_variant = \"r6\" mips_use_msa = true is_component_build = true is_clang = false"
|
||||
ninja -C out/Default
|
||||
arm64
|
||||
|
||||
### Linux
|
||||
gn gen out/Release "--args=is_debug=false target_os=\"ios\" ios_enable_code_signing=false target_cpu=\"arm64\""
|
||||
gn gen out/Debug "--args=is_debug=true target_os=\"ios\" ios_enable_code_signing=false target_cpu=\"arm64\""
|
||||
ninja -v -C out/Debug libyuv_unittest
|
||||
ninja -v -C out/Release libyuv_unittest
|
||||
|
||||
GYP_DEFINES="target_arch=x64" ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug
|
||||
ninja -j7 -C out/Release
|
||||
ios simulator
|
||||
|
||||
GYP_DEFINES="target_arch=ia32" ./gyp_libyuv
|
||||
ninja -j7 -C out/Debug
|
||||
ninja -j7 -C out/Release
|
||||
gn gen out/Release "--args=is_debug=false target_os=\"ios\" ios_enable_code_signing=false use_xcode_clang=true target_cpu=\"x86\""
|
||||
gn gen out/Debug "--args=is_debug=true target_os=\"ios\" ios_enable_code_signing=false use_xcode_clang=true target_cpu=\"x86\""
|
||||
ninja -v -C out/Debug libyuv_unittest
|
||||
ninja -v -C out/Release libyuv_unittest
|
||||
|
||||
#### CentOS
|
||||
ios disassembly
|
||||
|
||||
On CentOS 32 bit the following work around allows a sync:
|
||||
otool -tV ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
|
||||
|
||||
export GYP_DEFINES="host_arch=ia32"
|
||||
gclient sync
|
||||
### Android
|
||||
https://code.google.com/p/chromium/wiki/AndroidBuildInstructions
|
||||
|
||||
### Windows Shared Library
|
||||
Add to .gclient last line: `target_os=['android'];`
|
||||
|
||||
Modify libyuv.gyp from 'static_library' to 'shared_library', and add 'LIBYUV_BUILDING_SHARED_LIBRARY' to 'defines'.
|
||||
arm64
|
||||
|
||||
gclient runhooks
|
||||
gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"arm64\""
|
||||
gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"arm64\""
|
||||
ninja -v -C out/Debug libyuv_unittest
|
||||
ninja -v -C out/Release libyuv_unittest
|
||||
|
||||
After this command follow the building the library instructions above.
|
||||
armv7
|
||||
|
||||
If you get a compile error for atlthunk.lib on Windows, read http://www.chromium.org/developers/how-tos/build-instructions-windows
|
||||
gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"arm\""
|
||||
gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"arm\""
|
||||
ninja -v -C out/Debug libyuv_unittest
|
||||
ninja -v -C out/Release libyuv_unittest
|
||||
|
||||
ia32
|
||||
|
||||
gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"x86\""
|
||||
gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"x86\""
|
||||
ninja -v -C out/Debug libyuv_unittest
|
||||
ninja -v -C out/Release libyuv_unittest
|
||||
|
||||
mips
|
||||
|
||||
gn gen out/Release "--args=is_debug=false target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true"
|
||||
gn gen out/Debug "--args=is_debug=true target_os=\"android\" target_cpu=\"mips64el\" mips_arch_variant=\"r6\" mips_use_msa=true is_component_build=true"
|
||||
ninja -v -C out/Debug libyuv_unittest
|
||||
ninja -v -C out/Release libyuv_unittest
|
||||
|
||||
arm disassembly:
|
||||
|
||||
third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv/row_common.o >row_common.txt
|
||||
|
||||
third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon.o >row_neon.txt
|
||||
|
||||
third_party/android_ndk/toolchains/aarch64-linux-android-4.9/prebuilt/linux-x86_64/bin/aarch64-linux-android-objdump -d ./out/Release/obj/libyuv_neon/row_neon64.o >row_neon64.txt
|
||||
|
||||
Caveat: Disassembly may require optimize_max be disabled in BUILD.gn
|
||||
|
||||
Running tests:
|
||||
|
||||
out/Release/bin/run_libyuv_unittest -vv --gtest_filter=*
|
||||
|
||||
Running test as benchmark:
|
||||
|
||||
out/Release/bin/run_libyuv_unittest -vv --gtest_filter=* --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=-1 --libyuv_cpu_info=-1
|
||||
|
||||
Running test with C code:
|
||||
|
||||
out/Release/bin/run_libyuv_unittest -vv --gtest_filter=* --libyuv_width=1280 --libyuv_height=720 --libyuv_repeat=999 --libyuv_flags=1 --libyuv_cpu_info=1
|
||||
|
||||
### Build targets
|
||||
|
||||
ninja -C out/Debug libyuv
|
||||
ninja -C out/Debug libyuv_unittest
|
||||
ninja -C out/Debug compare
|
||||
ninja -C out/Debug convert
|
||||
ninja -C out/Debug yuvconvert
|
||||
ninja -C out/Debug psnr
|
||||
ninja -C out/Debug cpuid
|
||||
|
||||
### ARM Linux
|
||||
|
||||
gn gen out/Release "--args=is_debug=false target_cpu=\"arm64\""
|
||||
gn gen out/Debug "--args=is_debug=true target_cpu=\"arm64\""
|
||||
ninja -v -C out/Debug libyuv_unittest
|
||||
ninja -v -C out/Release libyuv_unittest
|
||||
|
||||
### MIPS Linux
|
||||
|
||||
mips
|
||||
|
||||
gn gen out/Release "--args=is_debug=false target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false use_sysroot=false use_gold=false"
|
||||
gn gen out/Debug "--args=is_debug=true target_os=\"linux\" target_cpu=\"mips64el\" mips_arch_variant=\"loongson3\" mips_use_mmi=true is_component_build=false use_sysroot=false use_gold=false"
|
||||
ninja -v -C out/Debug libyuv_unittest
|
||||
ninja -v -C out/Release libyuv_unittest
|
||||
|
||||
## Building the Library with make
|
||||
|
||||
### Linux
|
||||
|
||||
make -j7 V=1 -f linux.mk
|
||||
make -j7 V=1 -f linux.mk clean
|
||||
make -j7 V=1 -f linux.mk CXX=clang++
|
||||
make V=1 -f linux.mk
|
||||
make V=1 -f linux.mk clean
|
||||
make V=1 -f linux.mk CXX=clang++
|
||||
|
||||
## Building the Library with cmake
|
||||
## Building the library with cmake
|
||||
|
||||
Install cmake: http://www.cmake.org/
|
||||
|
||||
Default debug build:
|
||||
### Default debug build:
|
||||
|
||||
mkdir out
|
||||
cd out
|
||||
cmake ..
|
||||
cmake --build .
|
||||
|
||||
Release build/install
|
||||
### Release build/install
|
||||
|
||||
mkdir out
|
||||
cd out
|
||||
@@ -270,47 +211,31 @@ Release build/install
|
||||
cmake --build . --config Release
|
||||
sudo cmake --build . --target install --config Release
|
||||
|
||||
### Windows 8 Phone
|
||||
### Build RPM/DEB packages
|
||||
|
||||
Pre-requisite:
|
||||
mkdir out
|
||||
cd out
|
||||
cmake -DCMAKE_BUILD_TYPE=Release ..
|
||||
make -j4
|
||||
make package
|
||||
|
||||
* Install Visual Studio 2012 and Arm to your environment.<br>
|
||||
## Setup for Arm Cross compile
|
||||
|
||||
Then:
|
||||
See also https://www.ccoderun.ca/programming/2015-12-20_CrossCompiling/index.html
|
||||
|
||||
call "c:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin\x86_arm\vcvarsx86_arm.bat"
|
||||
sudo apt-get install ssh dkms build-essential linux-headers-generic
|
||||
sudo apt-get install kdevelop cmake git subversion
|
||||
sudo apt-get install graphviz doxygen doxygen-gui
|
||||
sudo apt-get install manpages manpages-dev manpages-posix manpages-posix-dev
|
||||
sudo apt-get install libboost-all-dev libboost-dev libssl-dev
|
||||
sudo apt-get install rpm terminator fish
|
||||
sudo apt-get install g++-arm-linux-gnueabihf gcc-arm-linux-gnueabihf
|
||||
|
||||
or with Visual Studio 2013:
|
||||
### Build psnr tool
|
||||
|
||||
call "c:\Program Files (x86)\Microsoft Visual Studio 12.0\VC\bin\x86_arm\vcvarsx86_arm.bat"
|
||||
nmake /f winarm.mk clean
|
||||
nmake /f winarm.mk
|
||||
|
||||
### Windows Shared Library
|
||||
|
||||
Modify libyuv.gyp from 'static_library' to 'shared_library', and add 'LIBYUV_BUILDING_SHARED_LIBRARY' to 'defines'. Then run this.
|
||||
|
||||
gclient runhooks
|
||||
|
||||
After this command follow the building the library instructions above.
|
||||
|
||||
If you get a compile error for atlthunk.lib on Windows, read http://www.chromium.org/developers/how-tos/build-instructions-windows
|
||||
|
||||
### 64 bit Windows
|
||||
|
||||
set GYP_DEFINES=target_arch=x64
|
||||
gclient runhooks V=1
|
||||
|
||||
### ARM Linux
|
||||
|
||||
export GYP_DEFINES="target_arch=arm"
|
||||
export CROSSTOOL=`<path>`/arm-none-linux-gnueabi
|
||||
export CXX=$CROSSTOOL-g++
|
||||
export CC=$CROSSTOOL-gcc
|
||||
export AR=$CROSSTOOL-ar
|
||||
export AS=$CROSSTOOL-as
|
||||
export RANLIB=$CROSSTOOL-ranlib
|
||||
gclient runhooks
|
||||
cd util
|
||||
arm-linux-gnueabihf-g++ psnr_main.cc psnr.cc ssim.cc -o psnr
|
||||
arm-linux-gnueabihf-objdump -d psnr
|
||||
|
||||
## Running Unittests
|
||||
|
||||
@@ -318,123 +243,42 @@ If you get a compile error for atlthunk.lib on Windows, read http://www.chromium
|
||||
|
||||
out\Release\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter="*"
|
||||
|
||||
### OSX
|
||||
### macOS and Linux
|
||||
|
||||
out/Release/libyuv_unittest --gtest_filter="*"
|
||||
|
||||
### Linux
|
||||
|
||||
out/Release/libyuv_unittest --gtest_filter="*"
|
||||
|
||||
Replace --gtest_filter="*" with specific unittest to run. May include wildcards. e.g.
|
||||
|
||||
out/Release/libyuv_unittest --gtest_filter=libyuvTest.I420ToARGB_Opt
|
||||
Replace --gtest_filter="*" with specific unittest to run. May include wildcards.
|
||||
out/Release/libyuv_unittest --gtest_filter=*I420ToARGB_Opt
|
||||
|
||||
## CPU Emulator tools
|
||||
|
||||
### Intel SDE (Software Development Emulator)
|
||||
|
||||
Pre-requisite: Install IntelSDE for Windows: http://software.intel.com/en-us/articles/intel-software-development-emulator
|
||||
Pre-requisite: Install IntelSDE: http://software.intel.com/en-us/articles/intel-software-development-emulator
|
||||
|
||||
Then run:
|
||||
|
||||
c:\intelsde\sde -hsw -- out\release\libyuv_unittest.exe --gtest_filter=*
|
||||
c:\intelsde\sde -hsw -- out\Release\libyuv_unittest.exe --gtest_filter=*
|
||||
|
||||
~/intelsde/sde -skx -- out/Release/libyuv_unittest --gtest_filter=**I420ToARGB_Opt
|
||||
|
||||
## Memory tools
|
||||
### Intel Architecture Code Analyzer
|
||||
|
||||
Inset these 2 macros into assembly code to be analyzed:
|
||||
IACA_ASM_START
|
||||
IACA_ASM_END
|
||||
Build the code as usual, then run iaca on the object file.
|
||||
~/iaca-lin64/bin/iaca.sh -reduceout -arch HSW out/Release/obj/libyuv_internal/compare_gcc.o
|
||||
|
||||
## Sanitizers
|
||||
|
||||
gn gen out/Release "--args=is_debug=false is_msan=true"
|
||||
ninja -v -C out/Release
|
||||
|
||||
Sanitizers available: asan, msan, tsan, ubsan, lsan, ubsan_vptr
|
||||
|
||||
### Running Dr Memory memcheck for Windows
|
||||
|
||||
Pre-requisite: Install Dr Memory for Windows and add it to your path: http://www.drmemory.org/docs/page_install_windows.html
|
||||
|
||||
set GYP_DEFINES=build_for_tool=drmemory target_arch=ia32
|
||||
call python gyp_libyuv -fninja -G msvs_version=2013
|
||||
ninja -C out\Debug
|
||||
drmemory out\Debug\libyuv_unittest.exe --gtest_catch_exceptions=0 --gtest_filter=*
|
||||
|
||||
### Running UBSan
|
||||
|
||||
See Chromium instructions for sanitizers: https://www.chromium.org/developers/testing/undefinedbehaviorsanitizer
|
||||
|
||||
Sanitizers available: TSan, MSan, ASan, UBSan, LSan
|
||||
|
||||
GYP_DEFINES='ubsan=1' gclient runhooks
|
||||
ninja -C out/Release
|
||||
|
||||
### Running Valgrind memcheck
|
||||
|
||||
Memory errors and race conditions can be found by running tests under special memory tools. [Valgrind] [1] is an instrumentation framework for building dynamic analysis tools. Various tests and profilers are built upon it to find memory handling errors and memory leaks, for instance.
|
||||
|
||||
[1]: http://valgrind.org
|
||||
|
||||
solutions = [
|
||||
{ "name" : "libyuv",
|
||||
"url" : "https://chromium.googlesource.com/libyuv/libyuv",
|
||||
"deps_file" : "DEPS",
|
||||
"managed" : True,
|
||||
"custom_deps" : {
|
||||
"libyuv/chromium/src/third_party/valgrind": "https://chromium.googlesource.com/chromium/deps/valgrind/binaries",
|
||||
},
|
||||
"safesync_url": "",
|
||||
},
|
||||
]
|
||||
|
||||
Then run:
|
||||
|
||||
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=memcheck" python gyp_libyuv
|
||||
ninja -C out/Debug
|
||||
valgrind out/Debug/libyuv_unittest
|
||||
|
||||
|
||||
For more information, see http://www.chromium.org/developers/how-tos/using-valgrind
|
||||
|
||||
### Running Thread Sanitizer (TSan)
|
||||
|
||||
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=tsan" python gyp_libyuv
|
||||
ninja -C out/Debug
|
||||
valgrind out/Debug/libyuv_unittest
|
||||
|
||||
For more info, see http://www.chromium.org/developers/how-tos/using-valgrind/threadsanitizer
|
||||
|
||||
### Running Address Sanitizer (ASan)
|
||||
|
||||
GYP_DEFINES="clang=0 target_arch=x64 build_for_tool=asan" python gyp_libyuv
|
||||
ninja -C out/Debug
|
||||
valgrind out/Debug/libyuv_unittest
|
||||
|
||||
For more info, see http://dev.chromium.org/developers/testing/addresssanitizer
|
||||
|
||||
## Benchmarking
|
||||
|
||||
The unittests can be used to benchmark.
|
||||
|
||||
### Windows
|
||||
|
||||
set LIBYUV_WIDTH=1280
|
||||
set LIBYUV_HEIGHT=720
|
||||
set LIBYUV_REPEAT=999
|
||||
set LIBYUV_FLAGS=-1
|
||||
out\Release\libyuv_unittest.exe --gtest_filter=*I420ToARGB_Opt
|
||||
|
||||
|
||||
### Linux and Mac
|
||||
|
||||
LIBYUV_WIDTH=1280 LIBYUV_HEIGHT=720 LIBYUV_REPEAT=1000 out/Release/libyuv_unittest --gtest_filter=*I420ToARGB_Opt
|
||||
|
||||
libyuvTest.I420ToARGB_Opt (547 ms)
|
||||
|
||||
Indicates 0.547 ms/frame for 1280 x 720.
|
||||
|
||||
## Making a change
|
||||
|
||||
gclient sync
|
||||
git checkout -b mycl -t origin/master
|
||||
git pull
|
||||
<edit files>
|
||||
git add -u
|
||||
git commit -m "my change"
|
||||
git cl lint
|
||||
git cl try
|
||||
git cl upload -r a-reviewer@chomium.org -s
|
||||
<once approved..>
|
||||
git cl land
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
# This script is used to run GYP for libyuv. It contains selected parts of the
|
||||
# main function from the src/build/gyp_chromium file.
|
||||
|
||||
import glob
|
||||
import os
|
||||
import shlex
|
||||
import sys
|
||||
|
||||
checkout_root = os.path.dirname(os.path.realpath(__file__))
|
||||
|
||||
sys.path.insert(0, os.path.join(checkout_root, 'build'))
|
||||
import gyp_chromium
|
||||
import gyp_helper
|
||||
import vs_toolchain
|
||||
|
||||
sys.path.insert(0, os.path.join(checkout_root, 'tools', 'gyp', 'pylib'))
|
||||
import gyp
|
||||
|
||||
def GetSupplementalFiles():
|
||||
"""Returns a list of the supplemental files that are included in all GYP
|
||||
sources."""
|
||||
# Can't use the one in gyp_chromium since the directory location of the root
|
||||
# is different.
|
||||
return glob.glob(os.path.join(checkout_root, '*', 'supplement.gypi'))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
args = sys.argv[1:]
|
||||
|
||||
if int(os.environ.get('GYP_CHROMIUM_NO_ACTION', 0)):
|
||||
print 'Skipping gyp_libyuv due to GYP_CHROMIUM_NO_ACTION env var.'
|
||||
sys.exit(0)
|
||||
|
||||
# This could give false positives since it doesn't actually do real option
|
||||
# parsing. Oh well.
|
||||
gyp_file_specified = False
|
||||
for arg in args:
|
||||
if arg.endswith('.gyp'):
|
||||
gyp_file_specified = True
|
||||
break
|
||||
|
||||
# If we didn't get a file, assume 'all.gyp' in the root of the checkout.
|
||||
if not gyp_file_specified:
|
||||
# Because of a bug in gyp, simply adding the abspath to all.gyp doesn't
|
||||
# work, but chdir'ing and adding the relative path does. Spooky :/
|
||||
os.chdir(checkout_root)
|
||||
args.append('all.gyp')
|
||||
|
||||
# There shouldn't be a circular dependency relationship between .gyp files,
|
||||
args.append('--no-circular-check')
|
||||
|
||||
# Default to ninja unless GYP_GENERATORS is set.
|
||||
if not os.environ.get('GYP_GENERATORS'):
|
||||
os.environ['GYP_GENERATORS'] = 'ninja'
|
||||
|
||||
vs2013_runtime_dll_dirs = None
|
||||
if int(os.environ.get('DEPOT_TOOLS_WIN_TOOLCHAIN', '1')):
|
||||
vs2013_runtime_dll_dirs = vs_toolchain.SetEnvironmentAndGetRuntimeDllDirs()
|
||||
|
||||
# Enforce gyp syntax checking. This adds about 20% execution time.
|
||||
args.append('--check')
|
||||
|
||||
supplemental_includes = gyp_chromium.GetSupplementalFiles()
|
||||
gyp_vars_dict = gyp_chromium.GetGypVars(supplemental_includes)
|
||||
|
||||
# Automatically turn on crosscompile support for platforms that need it.
|
||||
if all(('ninja' in os.environ.get('GYP_GENERATORS', ''),
|
||||
gyp_vars_dict.get('OS') in ['android', 'ios'],
|
||||
'GYP_CROSSCOMPILE' not in os.environ)):
|
||||
os.environ['GYP_CROSSCOMPILE'] = '1'
|
||||
|
||||
args.extend(['-I' + i for i in
|
||||
gyp_chromium.additional_include_files(supplemental_includes,
|
||||
args)])
|
||||
|
||||
# Set the gyp depth variable to the root of the checkout.
|
||||
args.append('--depth=' + os.path.relpath(checkout_root))
|
||||
|
||||
print 'Updating projects from gyp files...'
|
||||
sys.stdout.flush()
|
||||
|
||||
# Off we go...
|
||||
gyp_rc = gyp.main(args)
|
||||
|
||||
if vs2013_runtime_dll_dirs:
|
||||
x64_runtime, x86_runtime = vs2013_runtime_dll_dirs
|
||||
vs_toolchain.CopyVsRuntimeDlls(
|
||||
os.path.join(checkout_root, gyp_chromium.GetOutputDirectory()),
|
||||
(x86_runtime, x64_runtime))
|
||||
|
||||
sys.exit(gyp_rc)
|
||||
@@ -1,28 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
#
|
||||
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
|
||||
# This script is a modified copy of the src/build/gyp_chromium.py file.
|
||||
# It is needed for parallel processing.
|
||||
|
||||
# This file is (possibly, depending on python version) imported by
|
||||
# gyp_libyuv when GYP_PARALLEL=1 and it creates sub-processes
|
||||
# through the multiprocessing library.
|
||||
|
||||
# Importing in Python 2.6 (fixed in 2.7) on Windows doesn't search for
|
||||
# imports that don't end in .py (and aren't directories with an
|
||||
# __init__.py). This wrapper makes "import gyp_libyuv" work with
|
||||
# those old versions and makes it possible to execute gyp_libyuv.py
|
||||
# directly on Windows where the extension is useful.
|
||||
|
||||
import os
|
||||
|
||||
path = os.path.abspath(os.path.split(__file__)[0])
|
||||
execfile(os.path.join(path, 'gyp_libyuv'))
|
||||
@@ -11,79 +11,36 @@
|
||||
#ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
#define INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
|
||||
#include <stddef.h> // for NULL, size_t
|
||||
#include <stddef.h> // For size_t and NULL
|
||||
|
||||
#if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG)
|
||||
#define INT_TYPES_DEFINED
|
||||
|
||||
#if defined(_MSC_VER) && (_MSC_VER < 1600)
|
||||
#include <sys/types.h> // for uintptr_t on x86
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef __int64 int64_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef int int32_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef short int16_t;
|
||||
typedef unsigned char uint8_t;
|
||||
typedef signed char int8_t;
|
||||
#else
|
||||
#include <stdint.h> // for uintptr_t
|
||||
#endif
|
||||
|
||||
#ifndef GG_LONGLONG
|
||||
#ifndef INT_TYPES_DEFINED
|
||||
#define INT_TYPES_DEFINED
|
||||
#ifdef COMPILER_MSVC
|
||||
typedef unsigned __int64 uint64;
|
||||
typedef __int64 int64;
|
||||
#ifndef INT64_C
|
||||
#define INT64_C(x) x ## I64
|
||||
#endif
|
||||
#ifndef UINT64_C
|
||||
#define UINT64_C(x) x ## UI64
|
||||
#endif
|
||||
#define INT64_F "I64"
|
||||
#else // COMPILER_MSVC
|
||||
#if defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
|
||||
typedef unsigned long uint64; // NOLINT
|
||||
typedef long int64; // NOLINT
|
||||
#ifndef INT64_C
|
||||
#define INT64_C(x) x ## L
|
||||
#endif
|
||||
#ifndef UINT64_C
|
||||
#define UINT64_C(x) x ## UL
|
||||
#endif
|
||||
#define INT64_F "l"
|
||||
#else // defined(__LP64__) && !defined(__OpenBSD__) && !defined(__APPLE__)
|
||||
typedef unsigned long long uint64; // NOLINT
|
||||
typedef long long int64; // NOLINT
|
||||
#ifndef INT64_C
|
||||
#define INT64_C(x) x ## LL
|
||||
#endif
|
||||
#ifndef UINT64_C
|
||||
#define UINT64_C(x) x ## ULL
|
||||
#endif
|
||||
#define INT64_F "ll"
|
||||
#endif // __LP64__
|
||||
#endif // COMPILER_MSVC
|
||||
typedef unsigned int uint32;
|
||||
typedef int int32;
|
||||
typedef unsigned short uint16; // NOLINT
|
||||
typedef short int16; // NOLINT
|
||||
typedef unsigned char uint8;
|
||||
typedef signed char int8;
|
||||
#include <stdint.h> // for uintptr_t and C99 types
|
||||
#endif // defined(_MSC_VER) && (_MSC_VER < 1600)
|
||||
// Types are deprecated. Enable this macro for legacy types.
|
||||
#ifdef LIBYUV_LEGACY_TYPES
|
||||
typedef uint64_t uint64;
|
||||
typedef int64_t int64;
|
||||
typedef uint32_t uint32;
|
||||
typedef int32_t int32;
|
||||
typedef uint16_t uint16;
|
||||
typedef int16_t int16;
|
||||
typedef uint8_t uint8;
|
||||
typedef int8_t int8;
|
||||
#endif // LIBYUV_LEGACY_TYPES
|
||||
#endif // INT_TYPES_DEFINED
|
||||
#endif // GG_LONGLONG
|
||||
|
||||
// Detect compiler is for x86 or x64.
|
||||
#if defined(__x86_64__) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(_M_IX86)
|
||||
#define CPU_X86 1
|
||||
#endif
|
||||
// Detect compiler is for ARM.
|
||||
#if defined(__arm__) || defined(_M_ARM)
|
||||
#define CPU_ARM 1
|
||||
#endif
|
||||
|
||||
#ifndef ALIGNP
|
||||
#ifdef __cplusplus
|
||||
#define ALIGNP(p, t) \
|
||||
(reinterpret_cast<uint8*>(((reinterpret_cast<uintptr_t>(p) + \
|
||||
((t) - 1)) & ~((t) - 1))))
|
||||
#else
|
||||
#define ALIGNP(p, t) \
|
||||
((uint8*)((((uintptr_t)(p) + ((t) - 1)) & ~((t) - 1)))) /* NOLINT */
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_API)
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
@@ -95,24 +52,17 @@ typedef signed char int8;
|
||||
#define LIBYUV_API
|
||||
#endif // LIBYUV_BUILDING_SHARED_LIBRARY
|
||||
#elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \
|
||||
(defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
|
||||
defined(LIBYUV_USING_SHARED_LIBRARY))
|
||||
#define LIBYUV_API __attribute__ ((visibility ("default")))
|
||||
(defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \
|
||||
defined(LIBYUV_USING_SHARED_LIBRARY))
|
||||
#define LIBYUV_API __attribute__((visibility("default")))
|
||||
#else
|
||||
#define LIBYUV_API
|
||||
#endif // __GNUC__
|
||||
#endif // LIBYUV_API
|
||||
|
||||
// TODO(fbarchard): Remove bool macros.
|
||||
#define LIBYUV_BOOL int
|
||||
#define LIBYUV_FALSE 0
|
||||
#define LIBYUV_TRUE 1
|
||||
|
||||
// Visual C x86 or GCC little endian.
|
||||
#if defined(__x86_64__) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(_M_IX86) || \
|
||||
defined(__arm__) || defined(_M_ARM) || \
|
||||
(defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__)
|
||||
#define LIBYUV_LITTLE_ENDIAN
|
||||
#endif
|
||||
|
||||
#endif // INCLUDE_LIBYUV_BASIC_TYPES_H_
|
||||
|
||||
@@ -20,55 +20,88 @@ extern "C" {
|
||||
|
||||
// Compute a hash for specified memory. Seed of 5381 recommended.
|
||||
LIBYUV_API
|
||||
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed);
|
||||
uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed);
|
||||
|
||||
// Hamming Distance
|
||||
LIBYUV_API
|
||||
uint64_t ComputeHammingDistance(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
|
||||
// Scan an opaque argb image and return fourcc based on alpha offset.
|
||||
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
|
||||
LIBYUV_API
|
||||
uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height);
|
||||
uint32_t ARGBDetect(const uint8_t* argb,
|
||||
int stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Sum Square Error - used to compute Mean Square Error or PSNR.
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareError(const uint8* src_a,
|
||||
const uint8* src_b, int count);
|
||||
uint64_t ComputeSumSquareError(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
static const int kMaxPsnr = 128;
|
||||
|
||||
LIBYUV_API
|
||||
double SumSquareErrorToPsnr(uint64 sse, uint64 count);
|
||||
double SumSquareErrorToPsnr(uint64_t sse, uint64_t count);
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFramePsnr(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
double CalcFramePsnr(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double I420Psnr(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height);
|
||||
double I420Psnr(const uint8_t* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8_t* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8_t* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8_t* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8_t* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8_t* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFrameSsim(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height);
|
||||
double CalcFrameSsim(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
double I420Ssim(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height);
|
||||
double I420Ssim(const uint8_t* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8_t* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8_t* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8_t* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8_t* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8_t* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -18,20 +18,23 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__native_client__) && defined(__x86_64__)) || \
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#if defined(__native_client__)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if defined(_M_IX86) && !defined(__clang__) && \
|
||||
defined(_MSC_VER) && _MSC_VER >= 1700
|
||||
#if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \
|
||||
_MSC_VER >= 1700
|
||||
#define VISUALC_HAS_AVX2 1
|
||||
#endif // VisualStudio >= 2012
|
||||
|
||||
@@ -42,39 +45,93 @@ extern "C" {
|
||||
#endif // clang >= 3.4
|
||||
#endif // __clang__
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
defined(_M_IX86) && (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||
#define HAS_HASHDJB2_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for Visual C and GCC:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) || defined(_M_IX86)))
|
||||
(defined(__x86_64__) || defined(__i386__) || defined(_M_IX86))
|
||||
#define HAS_HASHDJB2_SSE41
|
||||
#define HAS_SUMSQUAREERROR_SSE2
|
||||
#define HAS_HAMMINGDISTANCE_SSE42
|
||||
#endif
|
||||
|
||||
// The following are available for Visual C and clangcl 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && \
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \
|
||||
(defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2))
|
||||
#define HAS_HASHDJB2_AVX2
|
||||
#define HAS_SUMSQUAREERROR_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for GCC and clangcl 64 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
#define HAS_HAMMINGDISTANCE_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for GCC and clangcl 64 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(CLANG_HAS_AVX2) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
#define HAS_HAMMINGDISTANCE_AVX2
|
||||
#endif
|
||||
|
||||
// The following are available for Neon:
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
#define HAS_SUMSQUAREERROR_NEON
|
||||
#define HAS_HAMMINGDISTANCE_NEON
|
||||
#endif
|
||||
|
||||
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count);
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count);
|
||||
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count);
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count);
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#define HAS_HAMMINGDISTANCE_MSA
|
||||
#define HAS_SUMSQUAREERROR_MSA
|
||||
#endif
|
||||
|
||||
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed);
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed);
|
||||
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed);
|
||||
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
#define HAS_HAMMINGDISTANCE_MMI
|
||||
#define HAS_SUMSQUAREERROR_MMI
|
||||
#endif
|
||||
|
||||
uint32_t HammingDistance_C(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_MSA(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t HammingDistance_MMI(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_C(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_SSE2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_AVX2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_MSA(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
uint32_t SumSquareError_MMI(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count);
|
||||
|
||||
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed);
|
||||
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed);
|
||||
uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -16,8 +16,8 @@
|
||||
#include "libyuv/rotate.h" // For enum RotationMode.
|
||||
|
||||
// TODO(fbarchard): fix WebRTC source to include following libyuv headers:
|
||||
#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620
|
||||
#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620
|
||||
#include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620
|
||||
#include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620
|
||||
#include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618
|
||||
|
||||
#ifdef __cplusplus
|
||||
@@ -27,195 +27,391 @@ extern "C" {
|
||||
|
||||
// Convert I444 to I420.
|
||||
LIBYUV_API
|
||||
int I444ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I444ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I444 to NV21.
|
||||
LIBYUV_API
|
||||
int I444ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to I420.
|
||||
LIBYUV_API
|
||||
int I422ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I422ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I411 to I420.
|
||||
// Convert I422 to NV21.
|
||||
LIBYUV_API
|
||||
int I411ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I422ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I420 to I420.
|
||||
#define I420ToI420 I420Copy
|
||||
LIBYUV_API
|
||||
int I420Copy(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I420Copy(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy I010 to I010
|
||||
#define I010ToI010 I010Copy
|
||||
#define H010ToH010 I010Copy
|
||||
LIBYUV_API
|
||||
int I010Copy(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert 10 bit YUV to 8 bit
|
||||
#define H010ToH420 I010ToI420
|
||||
LIBYUV_API
|
||||
int I010ToI420(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I400 (grey) to I420.
|
||||
LIBYUV_API
|
||||
int I400ToI420(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I400ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I400 (grey) to NV21.
|
||||
LIBYUV_API
|
||||
int I400ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#define J400ToJ420 I400ToI420
|
||||
|
||||
// Convert NV12 to I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int NV12ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to I420.
|
||||
LIBYUV_API
|
||||
int NV21ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_vu, int src_stride_vu,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int NV21ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert YUY2 to I420.
|
||||
LIBYUV_API
|
||||
int YUY2ToI420(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int YUY2ToI420(const uint8_t* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert UYVY to I420.
|
||||
LIBYUV_API
|
||||
int UYVYToI420(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int UYVYToI420(const uint8_t* src_uyvy,
|
||||
int src_stride_uyvy,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert M420 to I420.
|
||||
LIBYUV_API
|
||||
int M420ToI420(const uint8* src_m420, int src_stride_m420,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int M420ToI420(const uint8_t* src_m420,
|
||||
int src_stride_m420,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert Android420 to I420.
|
||||
LIBYUV_API
|
||||
int Android420ToI420(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int pixel_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int Android420ToI420(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_pixel_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ARGB little endian (bgra in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToI420(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int BGRAToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int BGRAToI420(const uint8_t* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ABGR little endian (rgba in memory) to I420.
|
||||
LIBYUV_API
|
||||
int ABGRToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ABGRToI420(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGBA little endian (abgr in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RGBAToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int RGBAToI420(const uint8_t* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB little endian (bgr in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RGB24ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int RGB24ToI420(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to I420.
|
||||
LIBYUV_API
|
||||
int RAWToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int RAWToI420(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB16 (RGBP fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int RGB565ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int RGB565ToI420(const uint8_t* src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB15 (RGBO fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int ARGB1555ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGB1555ToI420(const uint8_t* src_argb1555,
|
||||
int src_stride_argb1555,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB12 (R444 fourcc) little endian to I420.
|
||||
LIBYUV_API
|
||||
int ARGB4444ToI420(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGB4444ToI420(const uint8_t* src_argb4444,
|
||||
int src_stride_argb4444,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
// src_mjpg is pointer to raw jpeg bytes in memory
|
||||
// src_size_mjpg is size of jpeg in bytes
|
||||
// src_width/height provided by capture.
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8* sample, size_t sample_size,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height,
|
||||
int dst_width, int dst_height);
|
||||
int MJPGToI420(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// JPEG to NV21
|
||||
LIBYUV_API
|
||||
int MJPGToNV21(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
int* width, int* height);
|
||||
int MJPGSize(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int* width,
|
||||
int* height);
|
||||
#endif
|
||||
|
||||
// Convert camera sample to I420 with cropping, rotation and vertical flip.
|
||||
@@ -238,18 +434,25 @@ int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
// Must be less than or equal to src_width/src_height
|
||||
// Cropping parameters are pre-rotation.
|
||||
// "rotation" can be 0, 90, 180 or 270.
|
||||
// "format" is a fourcc. ie 'I420', 'YUY2'
|
||||
// "fourcc" is a fourcc. ie 'I420', 'YUY2'
|
||||
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8* src_frame, size_t src_size,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
int ConvertToI420(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 format);
|
||||
uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -30,258 +30,643 @@ extern "C" {
|
||||
|
||||
// Copy ARGB to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBCopy(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to ARGB.
|
||||
LIBYUV_API
|
||||
int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Duplicate prototype for function in convert_from.h for remoting.
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I010 to ARGB.
|
||||
LIBYUV_API
|
||||
int I010ToARGB(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I010 to ARGB.
|
||||
LIBYUV_API
|
||||
int I010ToARGB(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I010 to ABGR.
|
||||
LIBYUV_API
|
||||
int I010ToABGR(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H010 to ARGB.
|
||||
LIBYUV_API
|
||||
int H010ToARGB(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H010 to ABGR.
|
||||
LIBYUV_API
|
||||
int H010ToABGR(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I422 to ARGB.
|
||||
LIBYUV_API
|
||||
int I422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I422ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I444 to ARGB.
|
||||
LIBYUV_API
|
||||
int I444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I444ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J444 to ARGB.
|
||||
LIBYUV_API
|
||||
int J444ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int J444ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I444 to ABGR.
|
||||
LIBYUV_API
|
||||
int I444ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
|
||||
// Convert I411 to ARGB.
|
||||
LIBYUV_API
|
||||
int I411ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I444ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ARGB.
|
||||
LIBYUV_API
|
||||
int I420AlphaToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height, int attenuate);
|
||||
int I420AlphaToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
const uint8_t* src_a,
|
||||
int src_stride_a,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height,
|
||||
int attenuate);
|
||||
|
||||
// Convert I420 with Alpha to preattenuated ABGR.
|
||||
LIBYUV_API
|
||||
int I420AlphaToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
const uint8* src_a, int src_stride_a,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height, int attenuate);
|
||||
int I420AlphaToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
const uint8_t* src_a,
|
||||
int src_stride_a,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height,
|
||||
int attenuate);
|
||||
|
||||
// Convert I400 (grey) to ARGB. Reverse of ARGBToI400.
|
||||
LIBYUV_API
|
||||
int I400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I400ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J400 (jpeg grey) to ARGB.
|
||||
LIBYUV_API
|
||||
int J400ToARGB(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int J400ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Alias.
|
||||
#define YToARGB I400ToARGB
|
||||
|
||||
// Convert NV12 to ARGB.
|
||||
LIBYUV_API
|
||||
int NV12ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int NV12ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to ARGB.
|
||||
LIBYUV_API
|
||||
int NV21ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_vu, int src_stride_vu,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int NV21ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV12 to ABGR.
|
||||
int NV12ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to ABGR.
|
||||
LIBYUV_API
|
||||
int NV21ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV12 to RGB24.
|
||||
LIBYUV_API
|
||||
int NV12ToRGB24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to RGB24.
|
||||
LIBYUV_API
|
||||
int NV21ToRGB24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV12 to RAW.
|
||||
LIBYUV_API
|
||||
int NV12ToRAW(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_raw,
|
||||
int dst_stride_raw,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert NV21 to RAW.
|
||||
LIBYUV_API
|
||||
int NV21ToRAW(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_vu,
|
||||
int src_stride_vu,
|
||||
uint8_t* dst_raw,
|
||||
int dst_stride_raw,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert M420 to ARGB.
|
||||
LIBYUV_API
|
||||
int M420ToARGB(const uint8* src_m420, int src_stride_m420,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int M420ToARGB(const uint8_t* src_m420,
|
||||
int src_stride_m420,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert YUY2 to ARGB.
|
||||
LIBYUV_API
|
||||
int YUY2ToARGB(const uint8* src_yuy2, int src_stride_yuy2,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int YUY2ToARGB(const uint8_t* src_yuy2,
|
||||
int src_stride_yuy2,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert UYVY to ARGB.
|
||||
LIBYUV_API
|
||||
int UYVYToARGB(const uint8* src_uyvy, int src_stride_uyvy,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int UYVYToARGB(const uint8_t* src_uyvy,
|
||||
int src_stride_uyvy,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J420 to ARGB.
|
||||
LIBYUV_API
|
||||
int J420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int J420ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J422 to ARGB.
|
||||
LIBYUV_API
|
||||
int J422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int J422ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J420 to ABGR.
|
||||
LIBYUV_API
|
||||
int J420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int J420ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert J422 to ABGR.
|
||||
LIBYUV_API
|
||||
int J422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int J422ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H420 to ARGB.
|
||||
LIBYUV_API
|
||||
int H420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int H420ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H422 to ARGB.
|
||||
LIBYUV_API
|
||||
int H422ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int H422ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H420 to ABGR.
|
||||
LIBYUV_API
|
||||
int H420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int H420ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H422 to ABGR.
|
||||
LIBYUV_API
|
||||
int H422ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int H422ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H010 to ARGB.
|
||||
LIBYUV_API
|
||||
int H010ToARGB(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I010 to AR30.
|
||||
LIBYUV_API
|
||||
int I010ToAR30(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H010 to AR30.
|
||||
LIBYUV_API
|
||||
int H010ToAR30(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I010 to AB30.
|
||||
LIBYUV_API
|
||||
int I010ToAB30(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_ab30,
|
||||
int dst_stride_ab30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H010 to AB30.
|
||||
LIBYUV_API
|
||||
int H010ToAB30(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_ab30,
|
||||
int dst_stride_ab30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// BGRA little endian (argb in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int BGRAToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int BGRAToARGB(const uint8_t* src_bgra,
|
||||
int src_stride_bgra,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// ABGR little endian (rgba in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int ABGRToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ABGRToARGB(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGBA little endian (abgr in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RGBAToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int RGBAToARGB(const uint8_t* src_rgba,
|
||||
int src_stride_rgba,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Deprecated function name.
|
||||
#define BG24ToARGB RGB24ToARGB
|
||||
|
||||
// RGB little endian (bgr in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RGB24ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int RGB24ToARGB(const uint8_t* src_rgb24,
|
||||
int src_stride_rgb24,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB big endian (rgb in memory) to ARGB.
|
||||
LIBYUV_API
|
||||
int RAWToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int RAWToARGB(const uint8_t* src_raw,
|
||||
int src_stride_raw,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB16 (RGBP fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int RGB565ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int RGB565ToARGB(const uint8_t* src_rgb565,
|
||||
int src_stride_rgb565,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB15 (RGBO fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGB1555ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGB1555ToARGB(const uint8_t* src_argb1555,
|
||||
int src_stride_argb1555,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// RGB12 (R444 fourcc) little endian to ARGB.
|
||||
LIBYUV_API
|
||||
int ARGB4444ToARGB(const uint8* src_frame, int src_stride_frame,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGB4444ToARGB(const uint8_t* src_argb4444,
|
||||
int src_stride_argb4444,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Aliases
|
||||
#define AB30ToARGB AR30ToABGR
|
||||
#define AB30ToABGR AR30ToARGB
|
||||
#define AB30ToAR30 AR30ToAB30
|
||||
|
||||
// Convert AR30 To ARGB.
|
||||
LIBYUV_API
|
||||
int AR30ToARGB(const uint8_t* src_ar30,
|
||||
int src_stride_ar30,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AR30 To ABGR.
|
||||
LIBYUV_API
|
||||
int AR30ToABGR(const uint8_t* src_ar30,
|
||||
int src_stride_ar30,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert AR30 To AB30.
|
||||
LIBYUV_API
|
||||
int AR30ToAB30(const uint8_t* src_ar30,
|
||||
int src_stride_ar30,
|
||||
uint8_t* dst_ab30,
|
||||
int dst_stride_ab30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
// src_width/height provided by capture
|
||||
// dst_width/height for clipping determine final size.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8* sample, size_t sample_size,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width, int src_height,
|
||||
int dst_width, int dst_height);
|
||||
int MJPGToARGB(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
#endif
|
||||
|
||||
// Convert Android420 to ARGB.
|
||||
LIBYUV_API
|
||||
int Android420ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_pixel_stride_uv,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert Android420 to ABGR.
|
||||
LIBYUV_API
|
||||
int Android420ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_pixel_stride_uv,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert camera sample to ARGB with cropping, rotation and vertical flip.
|
||||
// "src_size" is needed to parse MJPG.
|
||||
// "sample_size" is needed to parse MJPG.
|
||||
// "dst_stride_argb" number of bytes in a row of the dst_argb plane.
|
||||
// Normally this would be the same as dst_width, with recommended alignment
|
||||
// to 16 bytes for better efficiency.
|
||||
@@ -300,16 +685,21 @@ int MJPGToARGB(const uint8* sample, size_t sample_size,
|
||||
// Must be less than or equal to src_width/src_height
|
||||
// Cropping parameters are pre-rotation.
|
||||
// "rotation" can be 0, 90, 180 or 270.
|
||||
// "format" is a fourcc. ie 'I420', 'YUY2'
|
||||
// "fourcc" is a fourcc. ie 'I420', 'YUY2'
|
||||
// Returns 0 for successful; -1 for invalid parameter. Non-zero for failure.
|
||||
LIBYUV_API
|
||||
int ConvertToARGB(const uint8* src_frame, size_t src_size,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
int ConvertToARGB(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 format);
|
||||
uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -21,155 +21,342 @@ extern "C" {
|
||||
|
||||
// See Also convert.h for conversions from formats to I420.
|
||||
|
||||
// I420Copy in convert to I420ToI420.
|
||||
// Convert 8 bit YUV to 10 bit.
|
||||
#define H420ToH010 I420ToI010
|
||||
int I420ToI010(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI422(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I420ToI422(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToI411(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int I420ToI444(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21.
|
||||
LIBYUV_API
|
||||
int I400Copy(const uint8* src_y, int src_stride_y,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int I400Copy(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV12(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
int I420ToNV12(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToNV21(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
int I420ToNV21(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToYUY2(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToYUY2(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToUYVY(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToUYVY(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToARGB(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToBGRA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToBGRA(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_bgra,
|
||||
int dst_stride_bgra,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToABGR(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int I420ToABGR(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGBA(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height);
|
||||
int I420ToRGBA(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB24(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToRGB24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRAW(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToRAW(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_raw,
|
||||
int dst_stride_raw,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB565(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int H420ToRGB24(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int H420ToRAW(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_raw,
|
||||
int dst_stride_raw,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB565(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int J420ToRGB565(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int H420ToRGB565(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I422ToRGB565(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 To RGB565 with 4x4 dither matrix (16 bytes).
|
||||
// Values in dither matrix from 0 to 7 recommended.
|
||||
// The order of the dither matrix is first byte is upper left.
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToRGB565Dither(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
const uint8* dither4x4, int width, int height);
|
||||
int I420ToRGB565Dither(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
const uint8_t* dither4x4,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB1555(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToARGB1555(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb1555,
|
||||
int dst_stride_argb1555,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
int I420ToARGB4444(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_frame, int dst_stride_frame,
|
||||
int width, int height);
|
||||
int I420ToARGB4444(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_argb4444,
|
||||
int dst_stride_argb4444,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to AR30.
|
||||
LIBYUV_API
|
||||
int I420ToAR30(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert H420 to AR30.
|
||||
LIBYUV_API
|
||||
int H420ToAR30(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert I420 to specified format.
|
||||
// "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the
|
||||
// buffer has contiguous rows. Can be negative. A multiple of 16 is optimal.
|
||||
LIBYUV_API
|
||||
int ConvertFromI420(const uint8* y, int y_stride,
|
||||
const uint8* u, int u_stride,
|
||||
const uint8* v, int v_stride,
|
||||
uint8* dst_sample, int dst_sample_stride,
|
||||
int width, int height,
|
||||
uint32 format);
|
||||
int ConvertFromI420(const uint8_t* y,
|
||||
int y_stride,
|
||||
const uint8_t* u,
|
||||
int u_stride,
|
||||
const uint8_t* v,
|
||||
int v_stride,
|
||||
uint8_t* dst_sample,
|
||||
int dst_sample_stride,
|
||||
int width,
|
||||
int height,
|
||||
uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -21,166 +21,263 @@ extern "C" {
|
||||
// Copy ARGB to ARGB.
|
||||
#define ARGBToARGB ARGBCopy
|
||||
LIBYUV_API
|
||||
int ARGBCopy(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int width, int height);
|
||||
int ARGBCopy(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To BGRA.
|
||||
LIBYUV_API
|
||||
int ARGBToBGRA(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_bgra, int dst_stride_bgra,
|
||||
int width, int height);
|
||||
int ARGBToBGRA(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_bgra,
|
||||
int dst_stride_bgra,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ABGR.
|
||||
LIBYUV_API
|
||||
int ARGBToABGR(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_abgr, int dst_stride_abgr,
|
||||
int width, int height);
|
||||
int ARGBToABGR(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_abgr,
|
||||
int dst_stride_abgr,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGBA.
|
||||
LIBYUV_API
|
||||
int ARGBToRGBA(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgba, int dst_stride_rgba,
|
||||
int width, int height);
|
||||
int ARGBToRGBA(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_rgba,
|
||||
int dst_stride_rgba,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Aliases
|
||||
#define ARGBToAB30 ABGRToAR30
|
||||
#define ABGRToAB30 ARGBToAR30
|
||||
|
||||
// Convert ABGR To AR30.
|
||||
LIBYUV_API
|
||||
int ABGRToAR30(const uint8_t* src_abgr,
|
||||
int src_stride_abgr,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To AR30.
|
||||
LIBYUV_API
|
||||
int ARGBToAR30(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_ar30,
|
||||
int dst_stride_ar30,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGB24.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB24(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb24, int dst_stride_rgb24,
|
||||
int width, int height);
|
||||
int ARGBToRGB24(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_rgb24,
|
||||
int dst_stride_rgb24,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RAW.
|
||||
LIBYUV_API
|
||||
int ARGBToRAW(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb, int dst_stride_rgb,
|
||||
int width, int height);
|
||||
int ARGBToRAW(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_raw,
|
||||
int dst_stride_raw,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGB565.
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
int width, int height);
|
||||
int ARGBToRGB565(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes).
|
||||
// Values in dither matrix from 0 to 7 recommended.
|
||||
// The order of the dither matrix is first byte is upper left.
|
||||
// TODO(fbarchard): Consider pointer to 2d array for dither4x4.
|
||||
// const uint8(*dither)[4][4];
|
||||
// const uint8_t(*dither)[4][4];
|
||||
LIBYUV_API
|
||||
int ARGBToRGB565Dither(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_rgb565, int dst_stride_rgb565,
|
||||
const uint8* dither4x4, int width, int height);
|
||||
int ARGBToRGB565Dither(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_rgb565,
|
||||
int dst_stride_rgb565,
|
||||
const uint8_t* dither4x4,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ARGB1555.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB1555(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb1555, int dst_stride_argb1555,
|
||||
int width, int height);
|
||||
int ARGBToARGB1555(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb1555,
|
||||
int dst_stride_argb1555,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To ARGB4444.
|
||||
LIBYUV_API
|
||||
int ARGBToARGB4444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb4444, int dst_stride_argb4444,
|
||||
int width, int height);
|
||||
int ARGBToARGB4444(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb4444,
|
||||
int dst_stride_argb4444,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To I444.
|
||||
LIBYUV_API
|
||||
int ARGBToI444(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToI444(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To I422.
|
||||
LIBYUV_API
|
||||
int ARGBToI422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToI422(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To I420. (also in convert.h)
|
||||
LIBYUV_API
|
||||
int ARGBToI420(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToI420(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J420. (JPeg full range I420).
|
||||
LIBYUV_API
|
||||
int ARGBToJ420(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToJ420(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J422.
|
||||
LIBYUV_API
|
||||
int ARGBToJ422(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
|
||||
// Convert ARGB To I411.
|
||||
LIBYUV_API
|
||||
int ARGBToI411(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height);
|
||||
int ARGBToJ422(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to J400. (JPeg full range).
|
||||
LIBYUV_API
|
||||
int ARGBToJ400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yj, int dst_stride_yj,
|
||||
int width, int height);
|
||||
int ARGBToJ400(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_yj,
|
||||
int dst_stride_yj,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to I400.
|
||||
LIBYUV_API
|
||||
int ARGBToI400(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
int width, int height);
|
||||
int ARGBToI400(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB)
|
||||
LIBYUV_API
|
||||
int ARGBToG(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_g, int dst_stride_g,
|
||||
int width, int height);
|
||||
int ARGBToG(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_g,
|
||||
int dst_stride_g,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV12.
|
||||
LIBYUV_API
|
||||
int ARGBToNV12(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_uv, int dst_stride_uv,
|
||||
int width, int height);
|
||||
int ARGBToNV12(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_uv,
|
||||
int dst_stride_uv,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV21.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
int ARGBToNV21(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To NV21.
|
||||
LIBYUV_API
|
||||
int ARGBToNV21(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_vu, int dst_stride_vu,
|
||||
int width, int height);
|
||||
int ARGBToNV21(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To YUY2.
|
||||
LIBYUV_API
|
||||
int ARGBToYUY2(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_yuy2, int dst_stride_yuy2,
|
||||
int width, int height);
|
||||
int ARGBToYUY2(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_yuy2,
|
||||
int dst_stride_yuy2,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Convert ARGB To UYVY.
|
||||
LIBYUV_API
|
||||
int ARGBToUYVY(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_uyvy, int dst_stride_uyvy,
|
||||
int width, int height);
|
||||
int ARGBToUYVY(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_uyvy,
|
||||
int dst_stride_uyvy,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -31,47 +31,86 @@ static const int kCpuHasX86 = 0x10;
|
||||
static const int kCpuHasSSE2 = 0x20;
|
||||
static const int kCpuHasSSSE3 = 0x40;
|
||||
static const int kCpuHasSSE41 = 0x80;
|
||||
static const int kCpuHasSSE42 = 0x100;
|
||||
static const int kCpuHasSSE42 = 0x100; // unused at this time.
|
||||
static const int kCpuHasAVX = 0x200;
|
||||
static const int kCpuHasAVX2 = 0x400;
|
||||
static const int kCpuHasERMS = 0x800;
|
||||
static const int kCpuHasFMA3 = 0x1000;
|
||||
static const int kCpuHasAVX3 = 0x2000;
|
||||
// 0x2000, 0x4000, 0x8000 reserved for future X86 flags.
|
||||
static const int kCpuHasF16C = 0x2000;
|
||||
static const int kCpuHasGFNI = 0x4000;
|
||||
static const int kCpuHasAVX512BW = 0x8000;
|
||||
static const int kCpuHasAVX512VL = 0x10000;
|
||||
static const int kCpuHasAVX512VBMI = 0x20000;
|
||||
static const int kCpuHasAVX512VBMI2 = 0x40000;
|
||||
static const int kCpuHasAVX512VBITALG = 0x80000;
|
||||
static const int kCpuHasAVX512VPOPCNTDQ = 0x100000;
|
||||
|
||||
// These flags are only valid on MIPS processors.
|
||||
static const int kCpuHasMIPS = 0x10000;
|
||||
static const int kCpuHasDSPR2 = 0x20000;
|
||||
static const int kCpuHasMSA = 0x40000;
|
||||
static const int kCpuHasMIPS = 0x200000;
|
||||
static const int kCpuHasMSA = 0x400000;
|
||||
static const int kCpuHasMMI = 0x800000;
|
||||
|
||||
// Internal function used to auto-init.
|
||||
// Optional init function. TestCpuFlag does an auto-init.
|
||||
// Returns cpu_info flags.
|
||||
LIBYUV_API
|
||||
int InitCpuFlags(void);
|
||||
|
||||
// Detect CPU has SSE2 etc.
|
||||
// Test_flag parameter should be one of kCpuHas constants above.
|
||||
// Returns non-zero if instruction set is detected
|
||||
static __inline int TestCpuFlag(int test_flag) {
|
||||
LIBYUV_API extern int cpu_info_;
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED);
|
||||
#else
|
||||
int cpu_info = cpu_info_;
|
||||
#endif
|
||||
return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag;
|
||||
}
|
||||
|
||||
// Internal function for parsing /proc/cpuinfo.
|
||||
LIBYUV_API
|
||||
int ArmCpuCaps(const char* cpuinfo_name);
|
||||
|
||||
// Detect CPU has SSE2 etc.
|
||||
// Test_flag parameter should be one of kCpuHas constants above.
|
||||
// returns non-zero if instruction set is detected
|
||||
static __inline int TestCpuFlag(int test_flag) {
|
||||
LIBYUV_API extern int cpu_info_;
|
||||
return (!cpu_info_ ? InitCpuFlags() : cpu_info_) & test_flag;
|
||||
}
|
||||
|
||||
// For testing, allow CPU flags to be disabled.
|
||||
// ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3.
|
||||
// MaskCpuFlags(-1) to enable all cpu specific optimizations.
|
||||
// MaskCpuFlags(1) to disable all cpu specific optimizations.
|
||||
// MaskCpuFlags(0) to reset state so next call will auto init.
|
||||
// Returns cpu_info flags.
|
||||
LIBYUV_API
|
||||
void MaskCpuFlags(int enable_flags);
|
||||
int MaskCpuFlags(int enable_flags);
|
||||
|
||||
// Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags|
|
||||
// should be a valid combination of the kCpuHas constants above and include
|
||||
// kCpuInitialized. Use this method when running in a sandboxed process where
|
||||
// the detection code might fail (as it might access /proc/cpuinfo). In such
|
||||
// cases the cpu_info can be obtained from a non sandboxed process by calling
|
||||
// InitCpuFlags() and passed to the sandboxed process (via command line
|
||||
// parameters, IPC...) which can then call this method to initialize the CPU
|
||||
// flags.
|
||||
// Notes:
|
||||
// - when specifying 0 for |cpu_flags|, the auto initialization is enabled
|
||||
// again.
|
||||
// - enabling CPU features that are not supported by the CPU will result in
|
||||
// undefined behavior.
|
||||
// TODO(fbarchard): consider writing a helper function that translates from
|
||||
// other library CPU info to libyuv CPU info and add a .md doc that explains
|
||||
// CPU detection.
|
||||
static __inline void SetCpuFlags(int cpu_flags) {
|
||||
LIBYUV_API extern int cpu_info_;
|
||||
#ifdef __ATOMIC_RELAXED
|
||||
__atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED);
|
||||
#else
|
||||
cpu_info_ = cpu_flags;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Low level cpuid for X86. Returns zeros on other CPUs.
|
||||
// eax is the info type that you want.
|
||||
// ecx is typically the cpu number, and should normally be zero.
|
||||
LIBYUV_API
|
||||
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info);
|
||||
void CpuId(int info_eax, int info_ecx, int* cpu_info);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -12,15 +12,140 @@
|
||||
#define INCLUDE_LIBYUV_MACROS_MSA_H_
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include <stdint.h>
|
||||
#include <msa.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
|
||||
#define LD_UB(...) LD_B(v16u8, __VA_ARGS__)
|
||||
#if (__mips_isa_rev >= 6)
|
||||
#define LW(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
|
||||
uint32_t val_m; \
|
||||
asm volatile("lw %[val_m], %[psrc_lw_m] \n" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_lw_m] "m"(*psrc_lw_m)); \
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
|
||||
uint64_t val_m = 0; \
|
||||
asm volatile("ld %[val_m], %[psrc_ld_m] \n" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_ld_m] "m"(*psrc_ld_m)); \
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
|
||||
uint32_t val0_m, val1_m; \
|
||||
uint64_t val_m = 0; \
|
||||
val0_m = LW(psrc_ld_m); \
|
||||
val1_m = LW(psrc_ld_m + 4); \
|
||||
val_m = (uint64_t)(val1_m); /* NOLINT */ \
|
||||
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
||||
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
|
||||
#define SW(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint32_t val_m = (val); \
|
||||
asm volatile("sw %[val_m], %[pdst_sw_m] \n" \
|
||||
: [pdst_sw_m] "=m"(*pdst_sw_m) \
|
||||
: [val_m] "r"(val_m)); \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define SD(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint64_t val_m = (val); \
|
||||
asm volatile("sd %[val_m], %[pdst_sd_m] \n" \
|
||||
: [pdst_sd_m] "=m"(*pdst_sd_m) \
|
||||
: [val_m] "r"(val_m)); \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define SD(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint32_t val0_m, val1_m; \
|
||||
val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
|
||||
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
|
||||
SW(val0_m, pdst_sd_m); \
|
||||
SW(val1_m, pdst_sd_m + 4); \
|
||||
})
|
||||
#endif // !(__mips == 64)
|
||||
#else // !(__mips_isa_rev >= 6)
|
||||
#define LW(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \
|
||||
uint32_t val_m; \
|
||||
asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_lw_m] "m"(*psrc_lw_m)); \
|
||||
val_m; \
|
||||
})
|
||||
|
||||
#if (__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
|
||||
uint64_t val_m = 0; \
|
||||
asm volatile("uld %[val_m], %[psrc_ld_m] \n" \
|
||||
: [val_m] "=r"(val_m) \
|
||||
: [psrc_ld_m] "m"(*psrc_ld_m)); \
|
||||
val_m; \
|
||||
})
|
||||
#else // !(__mips == 64)
|
||||
#define LD(psrc) \
|
||||
({ \
|
||||
const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \
|
||||
uint32_t val0_m, val1_m; \
|
||||
uint64_t val_m = 0; \
|
||||
val0_m = LW(psrc_ld_m); \
|
||||
val1_m = LW(psrc_ld_m + 4); \
|
||||
val_m = (uint64_t)(val1_m); /* NOLINT */ \
|
||||
val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \
|
||||
val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \
|
||||
val_m; \
|
||||
})
|
||||
#endif // (__mips == 64)
|
||||
|
||||
#define SW(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint32_t val_m = (val); \
|
||||
asm volatile("usw %[val_m], %[pdst_sw_m] \n" \
|
||||
: [pdst_sw_m] "=m"(*pdst_sw_m) \
|
||||
: [val_m] "r"(val_m)); \
|
||||
})
|
||||
|
||||
#define SD(val, pdst) \
|
||||
({ \
|
||||
uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \
|
||||
uint32_t val0_m, val1_m; \
|
||||
val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \
|
||||
val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \
|
||||
SW(val0_m, pdst_sd_m); \
|
||||
SW(val1_m, pdst_sd_m + 4); \
|
||||
})
|
||||
#endif // (__mips_isa_rev >= 6)
|
||||
|
||||
// TODO(fbarchard): Consider removing __VAR_ARGS versions.
|
||||
#define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */
|
||||
#define LD_UB(...) LD_B(const v16u8, __VA_ARGS__)
|
||||
|
||||
#define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
|
||||
#define ST_UB(...) ST_B(v16u8, __VA_ARGS__)
|
||||
|
||||
#define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */
|
||||
#define ST_UH(...) ST_H(v8u16, __VA_ARGS__)
|
||||
|
||||
/* Description : Load two vectors with 16 'byte' sized elements
|
||||
Arguments : Inputs - psrc, stride
|
||||
Outputs - out0, out1
|
||||
@@ -28,17 +153,19 @@
|
||||
Details : Load 16 byte elements in 'out0' from (psrc)
|
||||
Load 16 byte elements in 'out1' from (psrc + stride)
|
||||
*/
|
||||
#define LD_B2(RTYPE, psrc, stride, out0, out1) { \
|
||||
out0 = LD_B(RTYPE, (psrc)); \
|
||||
out1 = LD_B(RTYPE, (psrc) + stride); \
|
||||
}
|
||||
#define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__)
|
||||
#define LD_B2(RTYPE, psrc, stride, out0, out1) \
|
||||
{ \
|
||||
out0 = LD_B(RTYPE, (psrc)); \
|
||||
out1 = LD_B(RTYPE, (psrc) + stride); \
|
||||
}
|
||||
#define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__)
|
||||
|
||||
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) { \
|
||||
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||
LD_B2(RTYPE, (psrc) + 2 * stride , stride, out2, out3); \
|
||||
}
|
||||
#define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__)
|
||||
#define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \
|
||||
{ \
|
||||
LD_B2(RTYPE, (psrc), stride, out0, out1); \
|
||||
LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \
|
||||
}
|
||||
#define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__)
|
||||
|
||||
/* Description : Store two vectors with stride each having 16 'byte' sized
|
||||
elements
|
||||
@@ -46,18 +173,33 @@
|
||||
Details : Store 16 byte elements from 'in0' to (pdst)
|
||||
Store 16 byte elements from 'in1' to (pdst + stride)
|
||||
*/
|
||||
#define ST_B2(RTYPE, in0, in1, pdst, stride) { \
|
||||
ST_B(RTYPE, in0, (pdst)); \
|
||||
ST_B(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_B2(RTYPE, in0, in1, pdst, stride) \
|
||||
{ \
|
||||
ST_B(RTYPE, in0, (pdst)); \
|
||||
ST_B(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__)
|
||||
#
|
||||
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) { \
|
||||
ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
||||
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
||||
}
|
||||
|
||||
#define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \
|
||||
{ \
|
||||
ST_B2(RTYPE, in0, in1, (pdst), stride); \
|
||||
ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \
|
||||
}
|
||||
#define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__)
|
||||
#
|
||||
|
||||
/* Description : Store vectors of 8 halfword elements with stride
|
||||
Arguments : Inputs - in0, in1, pdst, stride
|
||||
Details : Store 8 halfword elements from 'in0' to (pdst)
|
||||
Store 8 halfword elements from 'in1' to (pdst + stride)
|
||||
*/
|
||||
#define ST_H2(RTYPE, in0, in1, pdst, stride) \
|
||||
{ \
|
||||
ST_H(RTYPE, in0, (pdst)); \
|
||||
ST_H(RTYPE, in1, (pdst) + stride); \
|
||||
}
|
||||
#define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__)
|
||||
|
||||
// TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly.
|
||||
/* Description : Shuffle byte vector elements as per mask vector
|
||||
Arguments : Inputs - in0, in1, in2, in3, mask0, mask1
|
||||
Outputs - out0, out1
|
||||
@@ -65,12 +207,27 @@
|
||||
Details : Byte elements from 'in0' & 'in1' are copied selectively to
|
||||
'out0' as per control vector 'mask0'
|
||||
*/
|
||||
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) { \
|
||||
out0 = (RTYPE) __msa_vshf_b((v16i8) mask0, (v16i8) in1, (v16i8) in0); \
|
||||
out1 = (RTYPE) __msa_vshf_b((v16i8) mask1, (v16i8) in3, (v16i8) in2); \
|
||||
}
|
||||
#define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \
|
||||
{ \
|
||||
out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \
|
||||
out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \
|
||||
}
|
||||
#define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
|
||||
/* Description : Interleave both left and right half of input vectors
|
||||
Arguments : Inputs - in0, in1
|
||||
Outputs - out0, out1
|
||||
Return Type - as per RTYPE
|
||||
Details : Right half of byte elements from 'in0' and 'in1' are
|
||||
interleaved and written to 'out0'
|
||||
*/
|
||||
#define ILVRL_B2(RTYPE, in0, in1, out0, out1) \
|
||||
{ \
|
||||
out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \
|
||||
out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \
|
||||
}
|
||||
#define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__)
|
||||
|
||||
#endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */
|
||||
|
||||
#endif // INCLUDE_LIBYUV_MACROS_MSA_H_
|
||||
|
||||
@@ -26,25 +26,24 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size);
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg_size);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
#endif
|
||||
|
||||
static const uint32 kUnknownDataSize = 0xFFFFFFFF;
|
||||
static const uint32_t kUnknownDataSize = 0xFFFFFFFF;
|
||||
|
||||
enum JpegSubsamplingType {
|
||||
kJpegYuv420,
|
||||
kJpegYuv422,
|
||||
kJpegYuv411,
|
||||
kJpegYuv444,
|
||||
kJpegYuv400,
|
||||
kJpegUnknown
|
||||
};
|
||||
|
||||
struct Buffer {
|
||||
const uint8* data;
|
||||
const uint8_t* data;
|
||||
int len;
|
||||
};
|
||||
|
||||
@@ -66,7 +65,7 @@ struct SetJmpErrorMgr;
|
||||
class LIBYUV_API MJpegDecoder {
|
||||
public:
|
||||
typedef void (*CallbackFunction)(void* opaque,
|
||||
const uint8* const* data,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows);
|
||||
|
||||
@@ -86,7 +85,7 @@ class LIBYUV_API MJpegDecoder {
|
||||
// If return value is LIBYUV_TRUE, then the values for all the following
|
||||
// getters are populated.
|
||||
// src_len is the size of the compressed mjpeg frame in bytes.
|
||||
LIBYUV_BOOL LoadFrame(const uint8* src, size_t src_len);
|
||||
LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len);
|
||||
|
||||
// Returns width of the last loaded frame in pixels.
|
||||
int GetWidth();
|
||||
@@ -139,18 +138,22 @@ class LIBYUV_API MJpegDecoder {
|
||||
// at least GetComponentSize(i). The pointers in planes are incremented
|
||||
// to point to after the end of the written data.
|
||||
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
|
||||
LIBYUV_BOOL DecodeToBuffers(uint8** planes, int dst_width, int dst_height);
|
||||
LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height);
|
||||
|
||||
// Decodes the entire image and passes the data via repeated calls to a
|
||||
// callback function. Each call will get the data for a whole number of
|
||||
// image scanlines.
|
||||
// TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded.
|
||||
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque,
|
||||
int dst_width, int dst_height);
|
||||
LIBYUV_BOOL DecodeToCallback(CallbackFunction fn,
|
||||
void* opaque,
|
||||
int dst_width,
|
||||
int dst_height);
|
||||
|
||||
// The helper function which recognizes the jpeg sub-sampling type.
|
||||
static JpegSubsamplingType JpegSubsamplingTypeHelper(
|
||||
int* subsample_x, int* subsample_y, int number_of_components);
|
||||
int* subsample_x,
|
||||
int* subsample_y,
|
||||
int number_of_components);
|
||||
|
||||
private:
|
||||
void AllocOutputBuffers(int num_outbufs);
|
||||
@@ -159,7 +162,7 @@ class LIBYUV_API MJpegDecoder {
|
||||
LIBYUV_BOOL StartDecode();
|
||||
LIBYUV_BOOL FinishDecode();
|
||||
|
||||
void SetScanlinePointers(uint8** data);
|
||||
void SetScanlinePointers(uint8_t** data);
|
||||
LIBYUV_BOOL DecodeImcuRow();
|
||||
|
||||
int GetComponentScanlinePadding(int component);
|
||||
@@ -178,11 +181,11 @@ class LIBYUV_API MJpegDecoder {
|
||||
|
||||
// Temporaries used to point to scanline outputs.
|
||||
int num_outbufs_; // Outermost size of all arrays below.
|
||||
uint8*** scanlines_;
|
||||
uint8_t*** scanlines_;
|
||||
int* scanlines_sizes_;
|
||||
// Temporary buffer used for decoding when we can't decode directly to the
|
||||
// output buffers. Large enough for just one iMCU row.
|
||||
uint8** databuf_;
|
||||
uint8_t** databuf_;
|
||||
int* databuf_strides_;
|
||||
};
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -20,8 +20,8 @@ extern "C" {
|
||||
|
||||
// Supported rotation.
|
||||
typedef enum RotationMode {
|
||||
kRotate0 = 0, // No rotation.
|
||||
kRotate90 = 90, // Rotate 90 degrees clockwise.
|
||||
kRotate0 = 0, // No rotation.
|
||||
kRotate90 = 90, // Rotate 90 degrees clockwise.
|
||||
kRotate180 = 180, // Rotate 180 degrees.
|
||||
kRotate270 = 270, // Rotate 270 degrees clockwise.
|
||||
|
||||
@@ -33,81 +33,128 @@ typedef enum RotationMode {
|
||||
|
||||
// Rotate I420 frame.
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
int I420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate NV12 input and store in I420.
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
int NV12ToI420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate a plane by 0, 90, 180, or 270.
|
||||
LIBYUV_API
|
||||
int RotatePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
int RotatePlane(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode);
|
||||
|
||||
// Rotate planes by 90, 180, 270. Deprecated.
|
||||
LIBYUV_API
|
||||
void RotatePlane90(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
void RotatePlane90(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
void RotatePlane180(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane270(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
void RotatePlane270(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV90(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void RotateUV90(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// Rotations for when U and V are interleaved.
|
||||
// These functions take one input pointer and
|
||||
// split the data into two buffers while
|
||||
// rotating them. Deprecated.
|
||||
LIBYUV_API
|
||||
void RotateUV180(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void RotateUV180(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV270(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void RotateUV270(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
// The 90 and 270 functions are based on transposes.
|
||||
// Doing a transpose with reversing the read/write
|
||||
// order will result in a rotation by +- 90 degrees.
|
||||
// Deprecated.
|
||||
LIBYUV_API
|
||||
void TransposePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height);
|
||||
void TransposePlane(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
LIBYUV_API
|
||||
void TransposeUV(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void TransposeUV(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -21,9 +21,13 @@ extern "C" {
|
||||
|
||||
// Rotate ARGB frame
|
||||
LIBYUV_API
|
||||
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int src_width, int src_height, enum RotationMode mode);
|
||||
int ARGBRotate(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
enum RotationMode mode);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -18,10 +18,14 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__i386__) && !defined(__SSE2__))
|
||||
#if defined(__pnacl__) || defined(__CLR_VER) || \
|
||||
(defined(__native_client__) && defined(__x86_64__)) || \
|
||||
(defined(__i386__) && !defined(__SSE__) && !defined(__clang__))
|
||||
#define LIBYUV_DISABLE_X86
|
||||
#endif
|
||||
#if defined(__native_client__)
|
||||
#define LIBYUV_DISABLE_NEON
|
||||
#endif
|
||||
// MemorySanitizer does not support assembly code yet. http://crbug.com/344505
|
||||
#if defined(__has_feature)
|
||||
#if __has_feature(memory_sanitizer)
|
||||
@@ -29,89 +33,187 @@ extern "C" {
|
||||
#endif
|
||||
#endif
|
||||
// The following are available for Visual C and clangcl 32 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
#define HAS_TRANSPOSEUVWX8_SSE2
|
||||
#endif
|
||||
|
||||
// The following are available for GCC 32 or 64 bit but not NaCL for 64 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__i386__) || (defined(__x86_64__) && !defined(__native_client__)))
|
||||
// The following are available for GCC 32 or 64 bit:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__))
|
||||
#define HAS_TRANSPOSEWX8_SSSE3
|
||||
#endif
|
||||
|
||||
// The following are available for 64 bit GCC but not NaCL:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && !defined(__native_client__) && \
|
||||
defined(__x86_64__)
|
||||
// The following are available for 64 bit GCC:
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__)
|
||||
#define HAS_TRANSPOSEWX8_FAST_SSSE3
|
||||
#define HAS_TRANSPOSEUVWX8_SSE2
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
#define HAS_TRANSPOSEWX8_NEON
|
||||
#define HAS_TRANSPOSEUVWX8_NEON
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && !defined(__native_client__) && \
|
||||
defined(__mips__) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
#define HAS_TRANSPOSEWX8_DSPR2
|
||||
#define HAS_TRANSPOSEUVWX8_DSPR2
|
||||
#endif // defined(__mips__)
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#define HAS_TRANSPOSEWX16_MSA
|
||||
#define HAS_TRANSPOSEUVWX16_MSA
|
||||
#endif
|
||||
|
||||
void TransposeWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height);
|
||||
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
#define HAS_TRANSPOSEWX8_MMI
|
||||
#define HAS_TRANSPOSEUVWX8_MMI
|
||||
#endif
|
||||
|
||||
void TransposeWx8_C(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWxH_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeWx8_Any_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Any_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Fast_Any_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_Any_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width);
|
||||
void TransposeWx8_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_MMI(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Fast_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx16_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeUVWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height);
|
||||
void TransposeWx8_Any_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Any_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Any_MMI(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
void TransposeWx16_Any_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width);
|
||||
|
||||
void TransposeUVWx8_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWxH_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height);
|
||||
|
||||
void TransposeUVWx8_Any_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_Any_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_Any_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width);
|
||||
void TransposeUVWx8_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_SSE2(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_MMI(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx16_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
|
||||
void TransposeUVWx8_Any_SSE2(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_Any_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx8_Any_MMI(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
void TransposeUVWx16_Any_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
+3285
-1263
File diff suppressed because it is too large
Load Diff
@@ -20,25 +20,33 @@ extern "C" {
|
||||
|
||||
// Supported filtering.
|
||||
typedef enum FilterMode {
|
||||
kFilterNone = 0, // Point sample; Fastest.
|
||||
kFilterLinear = 1, // Filter horizontally only.
|
||||
kFilterNone = 0, // Point sample; Fastest.
|
||||
kFilterLinear = 1, // Filter horizontally only.
|
||||
kFilterBilinear = 2, // Faster than box, but lower quality scaling down.
|
||||
kFilterBox = 3 // Highest quality.
|
||||
kFilterBox = 3 // Highest quality.
|
||||
} FilterModeEnum;
|
||||
|
||||
// Scale a YUV plane.
|
||||
LIBYUV_API
|
||||
void ScalePlane(const uint8* src, int src_stride,
|
||||
int src_width, int src_height,
|
||||
uint8* dst, int dst_stride,
|
||||
int dst_width, int dst_height,
|
||||
void ScalePlane(const uint8_t* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
void ScalePlane_16(const uint16* src, int src_stride,
|
||||
int src_width, int src_height,
|
||||
uint16* dst, int dst_stride,
|
||||
int dst_width, int dst_height,
|
||||
void ScalePlane_16(const uint16_t* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scales a YUV 4:2:0 image from the src width and height to the
|
||||
@@ -52,44 +60,64 @@ void ScalePlane_16(const uint16* src, int src_stride,
|
||||
// Returns 0 if successful.
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int dst_width, int dst_height,
|
||||
int I420Scale(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
LIBYUV_API
|
||||
int I420Scale_16(const uint16* src_y, int src_stride_y,
|
||||
const uint16* src_u, int src_stride_u,
|
||||
const uint16* src_v, int src_stride_v,
|
||||
int src_width, int src_height,
|
||||
uint16* dst_y, int dst_stride_y,
|
||||
uint16* dst_u, int dst_stride_u,
|
||||
uint16* dst_v, int dst_stride_v,
|
||||
int dst_width, int dst_height,
|
||||
int I420Scale_16(const uint16_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint16_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint16_t* src_v,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint16_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint16_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint16_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
// Legacy API. Deprecated.
|
||||
LIBYUV_API
|
||||
int Scale(const uint8* src_y, const uint8* src_u, const uint8* src_v,
|
||||
int src_stride_y, int src_stride_u, int src_stride_v,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_y, uint8* dst_u, uint8* dst_v,
|
||||
int dst_stride_y, int dst_stride_u, int dst_stride_v,
|
||||
int dst_width, int dst_height,
|
||||
int Scale(const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_y,
|
||||
int src_stride_u,
|
||||
int src_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_y,
|
||||
uint8_t* dst_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_y,
|
||||
int dst_stride_u,
|
||||
int dst_stride_v,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
LIBYUV_BOOL interpolate);
|
||||
|
||||
// Legacy API. Deprecated.
|
||||
LIBYUV_API
|
||||
int ScaleOffset(const uint8* src_i420, int src_width, int src_height,
|
||||
uint8* dst_i420, int dst_width, int dst_height, int dst_yoffset,
|
||||
LIBYUV_BOOL interpolate);
|
||||
|
||||
// For testing, allow disabling of specialized scalers.
|
||||
LIBYUV_API
|
||||
void SetUseReferenceImpl(LIBYUV_BOOL use);
|
||||
|
||||
@@ -20,32 +20,52 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int ARGBScale(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Clipped scale takes destination rectangle coordinates for clip values.
|
||||
LIBYUV_API
|
||||
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
int ARGBScaleClip(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint32 src_fourcc,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
uint32 dst_fourcc,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
int YUVToARGBScaleClip(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint32_t src_fourcc,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
uint32_t dst_fourcc,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -11,6 +11,6 @@
|
||||
#ifndef INCLUDE_LIBYUV_VERSION_H_
|
||||
#define INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
#define LIBYUV_VERSION 1620
|
||||
#define LIBYUV_VERSION 1724
|
||||
|
||||
#endif // INCLUDE_LIBYUV_VERSION_H_
|
||||
|
||||
@@ -28,13 +28,14 @@ extern "C" {
|
||||
// Needs to be a macro otherwise the OS X compiler complains when the kFormat*
|
||||
// constants are used in a switch.
|
||||
#ifdef __cplusplus
|
||||
#define FOURCC(a, b, c, d) ( \
|
||||
(static_cast<uint32>(a)) | (static_cast<uint32>(b) << 8) | \
|
||||
(static_cast<uint32>(c) << 16) | (static_cast<uint32>(d) << 24))
|
||||
#define FOURCC(a, b, c, d) \
|
||||
((static_cast<uint32_t>(a)) | (static_cast<uint32_t>(b) << 8) | \
|
||||
(static_cast<uint32_t>(c) << 16) | /* NOLINT */ \
|
||||
(static_cast<uint32_t>(d) << 24)) /* NOLINT */
|
||||
#else
|
||||
#define FOURCC(a, b, c, d) ( \
|
||||
((uint32)(a)) | ((uint32)(b) << 8) | /* NOLINT */ \
|
||||
((uint32)(c) << 16) | ((uint32)(d) << 24)) /* NOLINT */
|
||||
#define FOURCC(a, b, c, d) \
|
||||
(((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \
|
||||
((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */
|
||||
#endif
|
||||
|
||||
// Some pages discussing FourCC codes:
|
||||
@@ -53,38 +54,33 @@ enum FourCC {
|
||||
FOURCC_I420 = FOURCC('I', '4', '2', '0'),
|
||||
FOURCC_I422 = FOURCC('I', '4', '2', '2'),
|
||||
FOURCC_I444 = FOURCC('I', '4', '4', '4'),
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
|
||||
FOURCC_I400 = FOURCC('I', '4', '0', '0'),
|
||||
FOURCC_NV21 = FOURCC('N', 'V', '2', '1'),
|
||||
FOURCC_NV12 = FOURCC('N', 'V', '1', '2'),
|
||||
FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'),
|
||||
FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'),
|
||||
FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb
|
||||
|
||||
// 2 Secondary YUV formats: row biplanar.
|
||||
// 1 Secondary YUV format: row biplanar.
|
||||
FOURCC_M420 = FOURCC('M', '4', '2', '0'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), // deprecated.
|
||||
|
||||
// 9 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp.
|
||||
// 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc
|
||||
FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'),
|
||||
FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'),
|
||||
FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'),
|
||||
FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010.
|
||||
FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit
|
||||
FOURCC_24BG = FOURCC('2', '4', 'B', 'G'),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RAW = FOURCC('r', 'a', 'w', ' '),
|
||||
FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'),
|
||||
FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE.
|
||||
FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE.
|
||||
FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE.
|
||||
|
||||
// 4 Secondary RGB formats: 4 Bayer Patterns. deprecated.
|
||||
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
|
||||
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
|
||||
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
|
||||
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
|
||||
|
||||
// 1 Primary Compressed YUV format.
|
||||
FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'),
|
||||
|
||||
// 5 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
|
||||
// 8 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias.
|
||||
FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'),
|
||||
FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'),
|
||||
FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'),
|
||||
@@ -92,6 +88,7 @@ enum FourCC {
|
||||
FOURCC_J420 = FOURCC('J', '4', '2', '0'),
|
||||
FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc
|
||||
FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc
|
||||
FOURCC_H422 = FOURCC('H', '4', '2', '2'), // unofficial fourcc
|
||||
|
||||
// 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc.
|
||||
FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420.
|
||||
@@ -112,7 +109,13 @@ enum FourCC {
|
||||
FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP.
|
||||
FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO.
|
||||
|
||||
// 1 Auxiliary compressed YUV format set aside for capturer.
|
||||
// deprecated formats. Not supported, but defined for backward compatibility.
|
||||
FOURCC_I411 = FOURCC('I', '4', '1', '1'),
|
||||
FOURCC_Q420 = FOURCC('Q', '4', '2', '0'),
|
||||
FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'),
|
||||
FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'),
|
||||
FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'),
|
||||
FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'),
|
||||
FOURCC_H264 = FOURCC('H', '2', '6', '4'),
|
||||
|
||||
// Match any fourcc.
|
||||
@@ -136,8 +139,10 @@ enum FourCCBpp {
|
||||
FOURCC_BPP_BGRA = 32,
|
||||
FOURCC_BPP_ABGR = 32,
|
||||
FOURCC_BPP_RGBA = 32,
|
||||
FOURCC_BPP_AR30 = 32,
|
||||
FOURCC_BPP_AB30 = 32,
|
||||
FOURCC_BPP_24BG = 24,
|
||||
FOURCC_BPP_RAW = 24,
|
||||
FOURCC_BPP_RAW = 24,
|
||||
FOURCC_BPP_RGBP = 16,
|
||||
FOURCC_BPP_RGBO = 16,
|
||||
FOURCC_BPP_R444 = 16,
|
||||
@@ -152,6 +157,8 @@ enum FourCCBpp {
|
||||
FOURCC_BPP_J420 = 12,
|
||||
FOURCC_BPP_J400 = 8,
|
||||
FOURCC_BPP_H420 = 12,
|
||||
FOURCC_BPP_H422 = 16,
|
||||
FOURCC_BPP_H010 = 24,
|
||||
FOURCC_BPP_MJPG = 0, // 0 means unknown.
|
||||
FOURCC_BPP_H264 = 0,
|
||||
FOURCC_BPP_IYUV = 12,
|
||||
@@ -170,11 +177,11 @@ enum FourCCBpp {
|
||||
FOURCC_BPP_CM24 = 24,
|
||||
|
||||
// Match any fourcc.
|
||||
FOURCC_BPP_ANY = 0, // 0 means unknown.
|
||||
FOURCC_BPP_ANY = 0, // 0 means unknown.
|
||||
};
|
||||
|
||||
// Converts fourcc aliases into canonical ones.
|
||||
LIBYUV_API uint32 CanonicalFourCC(uint32 fourcc);
|
||||
LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -0,0 +1,3 @@
|
||||
set noparent
|
||||
agable@chromium.org
|
||||
phoglund@chromium.org
|
||||
@@ -1,21 +1,15 @@
|
||||
# Copyright 2013 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
# all.gyp and All target are for benefit of android gyp build.
|
||||
{
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'All',
|
||||
'type': 'none',
|
||||
'dependencies': [
|
||||
'libyuv.gyp:*',
|
||||
'libyuv_test.gyp:*',
|
||||
],
|
||||
},
|
||||
],
|
||||
}
|
||||
# Copyright 2018 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
|
||||
def CheckChangeOnUpload(input_api, output_api):
|
||||
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)
|
||||
|
||||
|
||||
def CheckChangeOnCommit(input_api, output_api):
|
||||
return input_api.canned_checks.CheckChangedLUCIConfigs(input_api, output_api)
|
||||
@@ -0,0 +1 @@
|
||||
This directory contains configuration files for infra services.
|
||||
@@ -0,0 +1,50 @@
|
||||
# Commit Queue configuration file. The documentation of the format can be found
|
||||
# at http://luci-config.appspot.com/schemas/projects/refs:cq.cfg.
|
||||
|
||||
version: 1
|
||||
cq_status_url: "https://chromium-cq-status.appspot.com"
|
||||
git_repo_url: "https://chromium.googlesource.com/libyuv/libyuv.git"
|
||||
|
||||
gerrit {}
|
||||
|
||||
verifiers {
|
||||
gerrit_cq_ability {
|
||||
committer_list: "project-libyuv-committers"
|
||||
dry_run_access_list: "project-libyuv-tryjob-access"
|
||||
}
|
||||
|
||||
try_job {
|
||||
buckets {
|
||||
name: "luci.libyuv.try"
|
||||
builders { name: "win" }
|
||||
builders { name: "win_rel" }
|
||||
builders { name: "win_x64_rel" }
|
||||
builders { name: "win_clang" }
|
||||
builders { name: "win_clang_rel" }
|
||||
builders { name: "win_x64_clang_rel" }
|
||||
builders { name: "mac" }
|
||||
builders { name: "mac_rel" }
|
||||
builders { name: "mac_asan" }
|
||||
builders { name: "ios" }
|
||||
builders { name: "ios_rel" }
|
||||
builders { name: "ios_arm64" }
|
||||
builders { name: "ios_arm64_rel" }
|
||||
builders { name: "linux" }
|
||||
builders { name: "linux_rel" }
|
||||
builders {
|
||||
name: "linux_gcc"
|
||||
experiment_percentage: 100
|
||||
}
|
||||
builders { name: "linux_tsan2" }
|
||||
builders { name: "linux_asan" }
|
||||
builders { name: "linux_msan" }
|
||||
builders { name: "linux_ubsan" }
|
||||
builders { name: "linux_ubsan_vptr" }
|
||||
builders { name: "android" }
|
||||
builders { name: "android_rel" }
|
||||
builders { name: "android_arm64" }
|
||||
builders { name: "android_x86" }
|
||||
builders { name: "android_x64" }
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -13,8 +13,11 @@ import("//build/config/mips.gni")
|
||||
declare_args() {
|
||||
libyuv_include_tests = !build_with_chromium
|
||||
libyuv_disable_jpeg = false
|
||||
libyuv_use_neon = (current_cpu == "arm64" ||
|
||||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon)))
|
||||
libyuv_use_msa = (current_cpu == "mips64el" || current_cpu == "mipsel") &&
|
||||
mips_use_msa
|
||||
libyuv_use_neon =
|
||||
current_cpu == "arm64" ||
|
||||
(current_cpu == "arm" && (arm_use_neon || arm_optionally_use_neon))
|
||||
libyuv_use_msa =
|
||||
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_msa
|
||||
libyuv_use_mmi =
|
||||
(current_cpu == "mips64el" || current_cpu == "mipsel") && mips_use_mmi
|
||||
}
|
||||
|
||||
@@ -1,161 +0,0 @@
|
||||
# Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
{
|
||||
'includes': [
|
||||
'libyuv.gypi',
|
||||
],
|
||||
# Make sure that if we are being compiled to an xcodeproj, nothing tries to
|
||||
# include a .pch.
|
||||
'xcode_settings': {
|
||||
'GCC_PREFIX_HEADER': '',
|
||||
'GCC_PRECOMPILE_PREFIX_HEADER': 'NO',
|
||||
},
|
||||
'variables': {
|
||||
'use_system_libjpeg%': 0,
|
||||
'libyuv_disable_jpeg%': 0,
|
||||
# 'chromium_code' treats libyuv as internal and increases warning level.
|
||||
'chromium_code': 1,
|
||||
# clang compiler default variable usable by other apps that include libyuv.
|
||||
'clang%': 0,
|
||||
# Link-Time Optimizations.
|
||||
'use_lto%': 0,
|
||||
'mips_msa%': 0, # Default to msa off.
|
||||
'build_neon': 0,
|
||||
'build_msa': 0,
|
||||
'conditions': [
|
||||
['(target_arch == "armv7" or target_arch == "armv7s" or \
|
||||
(target_arch == "arm" and arm_version >= 7) or target_arch == "arm64")\
|
||||
and (arm_neon == 1 or arm_neon_optional == 1)', {
|
||||
'build_neon': 1,
|
||||
}],
|
||||
['(target_arch == "mipsel" or target_arch == "mips64el")\
|
||||
and (mips_msa == 1)',
|
||||
{
|
||||
'build_msa': 1,
|
||||
}],
|
||||
],
|
||||
},
|
||||
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'libyuv',
|
||||
# Change type to 'shared_library' to build .so or .dll files.
|
||||
'type': 'static_library',
|
||||
'variables': {
|
||||
'optimize': 'max', # enable O2 and ltcg.
|
||||
},
|
||||
# Allows libyuv.a redistributable library without external dependencies.
|
||||
'standalone_static_library': 1,
|
||||
'conditions': [
|
||||
# Disable -Wunused-parameter
|
||||
['clang == 1', {
|
||||
'cflags': [
|
||||
'-Wno-unused-parameter',
|
||||
],
|
||||
}],
|
||||
['build_neon != 0', {
|
||||
'defines': [
|
||||
'LIBYUV_NEON',
|
||||
],
|
||||
'cflags!': [
|
||||
'-mfpu=vfp',
|
||||
'-mfpu=vfpv3',
|
||||
'-mfpu=vfpv3-d16',
|
||||
# '-mthumb', # arm32 not thumb
|
||||
],
|
||||
'conditions': [
|
||||
# Disable LTO in libyuv_neon target due to gcc 4.9 compiler bug.
|
||||
['clang == 0 and use_lto == 1', {
|
||||
'cflags!': [
|
||||
'-flto',
|
||||
'-ffat-lto-objects',
|
||||
],
|
||||
}],
|
||||
# arm64 does not need -mfpu=neon option as neon is not optional
|
||||
['target_arch != "arm64"', {
|
||||
'cflags': [
|
||||
'-mfpu=neon',
|
||||
# '-marm', # arm32 not thumb
|
||||
],
|
||||
}],
|
||||
],
|
||||
}],
|
||||
['build_msa != 0', {
|
||||
'defines': [
|
||||
'LIBYUV_MSA',
|
||||
],
|
||||
}],
|
||||
['OS != "ios" and libyuv_disable_jpeg != 1', {
|
||||
'defines': [
|
||||
'HAVE_JPEG'
|
||||
],
|
||||
'conditions': [
|
||||
# Caveat system jpeg support may not support motion jpeg
|
||||
[ 'use_system_libjpeg == 1', {
|
||||
'dependencies': [
|
||||
'<(DEPTH)/third_party/libjpeg/libjpeg.gyp:libjpeg',
|
||||
],
|
||||
}, {
|
||||
'dependencies': [
|
||||
'<(DEPTH)/third_party/libjpeg_turbo/libjpeg.gyp:libjpeg',
|
||||
],
|
||||
}],
|
||||
[ 'use_system_libjpeg == 1', {
|
||||
'link_settings': {
|
||||
'libraries': [
|
||||
'-ljpeg',
|
||||
],
|
||||
}
|
||||
}],
|
||||
],
|
||||
}],
|
||||
], #conditions
|
||||
'defines': [
|
||||
# Enable the following 3 macros to turn off assembly for specified CPU.
|
||||
# 'LIBYUV_DISABLE_X86',
|
||||
# 'LIBYUV_DISABLE_NEON',
|
||||
# 'LIBYUV_DISABLE_MIPS',
|
||||
# Enable the following macro to build libyuv as a shared library (dll).
|
||||
# 'LIBYUV_USING_SHARED_LIBRARY',
|
||||
# TODO(fbarchard): Make these into gyp defines.
|
||||
],
|
||||
'include_dirs': [
|
||||
'include',
|
||||
'.',
|
||||
],
|
||||
'direct_dependent_settings': {
|
||||
'include_dirs': [
|
||||
'include',
|
||||
'.',
|
||||
],
|
||||
'conditions': [
|
||||
['OS == "android" and target_arch == "arm64"', {
|
||||
'ldflags': [
|
||||
'-Wl,--dynamic-linker,/system/bin/linker64',
|
||||
],
|
||||
}],
|
||||
['OS == "android" and target_arch != "arm64"', {
|
||||
'ldflags': [
|
||||
'-Wl,--dynamic-linker,/system/bin/linker',
|
||||
],
|
||||
}],
|
||||
], #conditions
|
||||
},
|
||||
'sources': [
|
||||
'<@(libyuv_sources)',
|
||||
],
|
||||
},
|
||||
], # targets.
|
||||
}
|
||||
|
||||
# Local Variables:
|
||||
# tab-width:2
|
||||
# indent-tabs-mode:nil
|
||||
# End:
|
||||
# vim: set expandtab tabstop=2 shiftwidth=2:
|
||||
@@ -1,81 +0,0 @@
|
||||
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
{
|
||||
'variables': {
|
||||
'libyuv_sources': [
|
||||
# includes.
|
||||
'include/libyuv.h',
|
||||
'include/libyuv/basic_types.h',
|
||||
'include/libyuv/compare.h',
|
||||
'include/libyuv/convert.h',
|
||||
'include/libyuv/convert_argb.h',
|
||||
'include/libyuv/convert_from.h',
|
||||
'include/libyuv/convert_from_argb.h',
|
||||
'include/libyuv/cpu_id.h',
|
||||
'include/libyuv/macros_msa.h',
|
||||
'include/libyuv/mjpeg_decoder.h',
|
||||
'include/libyuv/planar_functions.h',
|
||||
'include/libyuv/rotate.h',
|
||||
'include/libyuv/rotate_argb.h',
|
||||
'include/libyuv/rotate_row.h',
|
||||
'include/libyuv/row.h',
|
||||
'include/libyuv/scale.h',
|
||||
'include/libyuv/scale_argb.h',
|
||||
'include/libyuv/scale_row.h',
|
||||
'include/libyuv/version.h',
|
||||
'include/libyuv/video_common.h',
|
||||
|
||||
# sources.
|
||||
'source/compare.cc',
|
||||
'source/compare_common.cc',
|
||||
'source/compare_gcc.cc',
|
||||
'source/compare_neon.cc',
|
||||
'source/compare_neon64.cc',
|
||||
'source/compare_win.cc',
|
||||
'source/convert.cc',
|
||||
'source/convert_argb.cc',
|
||||
'source/convert_from.cc',
|
||||
'source/convert_from_argb.cc',
|
||||
'source/convert_jpeg.cc',
|
||||
'source/convert_to_argb.cc',
|
||||
'source/convert_to_i420.cc',
|
||||
'source/cpu_id.cc',
|
||||
'source/mjpeg_decoder.cc',
|
||||
'source/mjpeg_validate.cc',
|
||||
'source/planar_functions.cc',
|
||||
'source/rotate.cc',
|
||||
'source/rotate_any.cc',
|
||||
'source/rotate_argb.cc',
|
||||
'source/rotate_common.cc',
|
||||
'source/rotate_gcc.cc',
|
||||
'source/rotate_mips.cc',
|
||||
'source/rotate_neon.cc',
|
||||
'source/rotate_neon64.cc',
|
||||
'source/rotate_win.cc',
|
||||
'source/row_any.cc',
|
||||
'source/row_common.cc',
|
||||
'source/row_gcc.cc',
|
||||
'source/row_mips.cc',
|
||||
'source/row_msa.cc',
|
||||
'source/row_neon.cc',
|
||||
'source/row_neon64.cc',
|
||||
'source/row_win.cc',
|
||||
'source/scale.cc',
|
||||
'source/scale_any.cc',
|
||||
'source/scale_argb.cc',
|
||||
'source/scale_common.cc',
|
||||
'source/scale_gcc.cc',
|
||||
'source/scale_mips.cc',
|
||||
'source/scale_neon.cc',
|
||||
'source/scale_neon64.cc',
|
||||
'source/scale_win.cc',
|
||||
'source/video_common.cc',
|
||||
],
|
||||
}
|
||||
}
|
||||
@@ -1,37 +0,0 @@
|
||||
# Copyright 2014 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
{
|
||||
'includes': [
|
||||
'libyuv.gypi',
|
||||
'../../native_client/build/untrusted.gypi',
|
||||
],
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'libyuv_nacl',
|
||||
'type': 'none',
|
||||
'variables': {
|
||||
'nlib_target': 'libyuv_nacl.a',
|
||||
'build_glibc': 0,
|
||||
'build_newlib': 0,
|
||||
'build_pnacl_newlib': 1,
|
||||
},
|
||||
'include_dirs': [
|
||||
'include',
|
||||
],
|
||||
'direct_dependent_settings': {
|
||||
'include_dirs': [
|
||||
'include',
|
||||
],
|
||||
},
|
||||
'sources': [
|
||||
'<@(libyuv_sources)',
|
||||
],
|
||||
}, # target libyuv_nacl
|
||||
]
|
||||
}
|
||||
@@ -1,202 +0,0 @@
|
||||
# Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
{
|
||||
'variables': {
|
||||
'libyuv_disable_jpeg%': 0,
|
||||
'mips_msa%': 0, # Default to msa off.
|
||||
},
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'libyuv_unittest',
|
||||
'type': '<(gtest_target_type)',
|
||||
'dependencies': [
|
||||
'libyuv.gyp:libyuv',
|
||||
'testing/gtest.gyp:gtest',
|
||||
'third_party/gflags/gflags.gyp:gflags',
|
||||
],
|
||||
'direct_dependent_settings': {
|
||||
'defines': [
|
||||
'GTEST_RELATIVE_PATH',
|
||||
],
|
||||
},
|
||||
'export_dependent_settings': [
|
||||
'<(DEPTH)/testing/gtest.gyp:gtest',
|
||||
],
|
||||
'sources': [
|
||||
# headers
|
||||
'unit_test/unit_test.h',
|
||||
|
||||
# sources
|
||||
'unit_test/basictypes_test.cc',
|
||||
'unit_test/compare_test.cc',
|
||||
'unit_test/color_test.cc',
|
||||
'unit_test/convert_test.cc',
|
||||
'unit_test/cpu_test.cc',
|
||||
'unit_test/math_test.cc',
|
||||
'unit_test/planar_test.cc',
|
||||
'unit_test/rotate_argb_test.cc',
|
||||
'unit_test/rotate_test.cc',
|
||||
'unit_test/scale_argb_test.cc',
|
||||
'unit_test/scale_test.cc',
|
||||
'unit_test/unit_test.cc',
|
||||
'unit_test/video_common_test.cc',
|
||||
],
|
||||
'conditions': [
|
||||
['OS=="linux"', {
|
||||
'cflags': [
|
||||
'-fexceptions',
|
||||
],
|
||||
}],
|
||||
[ 'OS == "ios"', {
|
||||
'xcode_settings': {
|
||||
'DEBUGGING_SYMBOLS': 'YES',
|
||||
'DEBUG_INFORMATION_FORMAT' : 'dwarf-with-dsym',
|
||||
# Work around compile issue with isosim.mm, see
|
||||
# https://code.google.com/p/libyuv/issues/detail?id=548 for details.
|
||||
'WARNING_CFLAGS': [
|
||||
'-Wno-sometimes-uninitialized',
|
||||
],
|
||||
},
|
||||
'cflags': [
|
||||
'-Wno-sometimes-uninitialized',
|
||||
],
|
||||
}],
|
||||
[ 'OS != "ios" and libyuv_disable_jpeg != 1', {
|
||||
'defines': [
|
||||
'HAVE_JPEG',
|
||||
],
|
||||
}],
|
||||
['OS=="android"', {
|
||||
'dependencies': [
|
||||
'<(DEPTH)/testing/android/native_test.gyp:native_test_native_code',
|
||||
],
|
||||
}],
|
||||
# TODO(YangZhang): These lines can be removed when high accuracy
|
||||
# YUV to RGB to Neon is ported.
|
||||
[ '(target_arch == "armv7" or target_arch == "armv7s" \
|
||||
or (target_arch == "arm" and arm_version >= 7) \
|
||||
or target_arch == "arm64") \
|
||||
and (arm_neon == 1 or arm_neon_optional == 1)', {
|
||||
'defines': [
|
||||
'LIBYUV_NEON'
|
||||
],
|
||||
}],
|
||||
[ '(target_arch == "mipsel" or target_arch == "mips64el") \
|
||||
and (mips_msa == 1)', {
|
||||
'defines': [
|
||||
'LIBYUV_MSA'
|
||||
],
|
||||
}],
|
||||
], # conditions
|
||||
'defines': [
|
||||
# Enable the following 3 macros to turn off assembly for specified CPU.
|
||||
# 'LIBYUV_DISABLE_X86',
|
||||
# 'LIBYUV_DISABLE_NEON',
|
||||
# 'LIBYUV_DISABLE_MIPS',
|
||||
# Enable the following macro to build libyuv as a shared library (dll).
|
||||
# 'LIBYUV_USING_SHARED_LIBRARY',
|
||||
],
|
||||
},
|
||||
{
|
||||
'target_name': 'compare',
|
||||
'type': 'executable',
|
||||
'dependencies': [
|
||||
'libyuv.gyp:libyuv',
|
||||
],
|
||||
'sources': [
|
||||
# sources
|
||||
'util/compare.cc',
|
||||
],
|
||||
'conditions': [
|
||||
['OS=="linux"', {
|
||||
'cflags': [
|
||||
'-fexceptions',
|
||||
],
|
||||
}],
|
||||
], # conditions
|
||||
},
|
||||
{
|
||||
'target_name': 'convert',
|
||||
'type': 'executable',
|
||||
'dependencies': [
|
||||
'libyuv.gyp:libyuv',
|
||||
],
|
||||
'sources': [
|
||||
# sources
|
||||
'util/convert.cc',
|
||||
],
|
||||
'conditions': [
|
||||
['OS=="linux"', {
|
||||
'cflags': [
|
||||
'-fexceptions',
|
||||
],
|
||||
}],
|
||||
], # conditions
|
||||
},
|
||||
# TODO(fbarchard): Enable SSE2 and OpenMP for better performance.
|
||||
{
|
||||
'target_name': 'psnr',
|
||||
'type': 'executable',
|
||||
'sources': [
|
||||
# sources
|
||||
'util/psnr_main.cc',
|
||||
'util/psnr.cc',
|
||||
'util/ssim.cc',
|
||||
],
|
||||
'dependencies': [
|
||||
'libyuv.gyp:libyuv',
|
||||
],
|
||||
'conditions': [
|
||||
[ 'OS != "ios" and libyuv_disable_jpeg != 1', {
|
||||
'defines': [
|
||||
'HAVE_JPEG',
|
||||
],
|
||||
}],
|
||||
], # conditions
|
||||
},
|
||||
|
||||
{
|
||||
'target_name': 'cpuid',
|
||||
'type': 'executable',
|
||||
'sources': [
|
||||
# sources
|
||||
'util/cpuid.c',
|
||||
],
|
||||
'dependencies': [
|
||||
'libyuv.gyp:libyuv',
|
||||
],
|
||||
},
|
||||
], # targets
|
||||
'conditions': [
|
||||
['OS=="android"', {
|
||||
'targets': [
|
||||
{
|
||||
'target_name': 'yuv_unittest_apk',
|
||||
'type': 'none',
|
||||
'variables': {
|
||||
'test_suite_name': 'yuv_unittest',
|
||||
'input_shlib_path': '<(SHARED_LIB_DIR)/(SHARED_LIB_PREFIX)libyuv_unittest<(SHARED_LIB_SUFFIX)',
|
||||
},
|
||||
'includes': [
|
||||
'build/apk_test.gypi',
|
||||
],
|
||||
'dependencies': [
|
||||
'libyuv_unittest',
|
||||
],
|
||||
},
|
||||
],
|
||||
}],
|
||||
],
|
||||
}
|
||||
|
||||
# Local Variables:
|
||||
# tab-width:2
|
||||
# indent-tabs-mode:nil
|
||||
# End:
|
||||
# vim: set expandtab tabstop=2 shiftwidth=2:
|
||||
+12
-7
@@ -13,6 +13,8 @@ LOCAL_OBJ_FILES := \
|
||||
source/compare.o \
|
||||
source/compare_common.o \
|
||||
source/compare_gcc.o \
|
||||
source/compare_mmi.o \
|
||||
source/compare_msa.o \
|
||||
source/compare_neon64.o \
|
||||
source/compare_neon.o \
|
||||
source/compare_win.o \
|
||||
@@ -32,14 +34,16 @@ LOCAL_OBJ_FILES := \
|
||||
source/rotate.o \
|
||||
source/rotate_common.o \
|
||||
source/rotate_gcc.o \
|
||||
source/rotate_mips.o \
|
||||
source/rotate_mmi.o \
|
||||
source/rotate_msa.o \
|
||||
source/rotate_neon64.o \
|
||||
source/rotate_neon.o \
|
||||
source/rotate_win.o \
|
||||
source/row_any.o \
|
||||
source/row_common.o \
|
||||
source/row_gcc.o \
|
||||
source/row_mips.o \
|
||||
source/row_mmi.o \
|
||||
source/row_msa.o \
|
||||
source/row_neon64.o \
|
||||
source/row_neon.o \
|
||||
source/row_win.o \
|
||||
@@ -48,7 +52,8 @@ LOCAL_OBJ_FILES := \
|
||||
source/scale.o \
|
||||
source/scale_common.o \
|
||||
source/scale_gcc.o \
|
||||
source/scale_mips.o \
|
||||
source/scale_mmi.o \
|
||||
source/scale_msa.o \
|
||||
source/scale_neon64.o \
|
||||
source/scale_neon.o \
|
||||
source/scale_win.o \
|
||||
@@ -60,14 +65,14 @@ LOCAL_OBJ_FILES := \
|
||||
.c.o:
|
||||
$(CC) -c $(CFLAGS) $*.c -o $*.o
|
||||
|
||||
all: libyuv.a convert cpuid psnr
|
||||
all: libyuv.a yuvconvert cpuid psnr
|
||||
|
||||
libyuv.a: $(LOCAL_OBJ_FILES)
|
||||
$(AR) $(ARFLAGS) $@ $(LOCAL_OBJ_FILES)
|
||||
|
||||
# A C++ test utility that uses libyuv conversion.
|
||||
convert: util/convert.cc libyuv.a
|
||||
$(CXX) $(CXXFLAGS) -Iutil/ -o $@ util/convert.cc libyuv.a
|
||||
yuvconvert: util/yuvconvert.cc libyuv.a
|
||||
$(CXX) $(CXXFLAGS) -Iutil/ -o $@ util/yuvconvert.cc libyuv.a
|
||||
|
||||
# A standalone test utility
|
||||
psnr: util/psnr.cc
|
||||
@@ -80,4 +85,4 @@ cpuid: util/cpuid.c libyuv.a
|
||||
$(CC) $(CFLAGS) -o $@ util/cpuid.c libyuv.a
|
||||
|
||||
clean:
|
||||
/bin/rm -f source/*.o *.ii *.s libyuv.a convert cpuid psnr
|
||||
/bin/rm -f source/*.o *.ii *.s libyuv.a yuvconvert cpuid psnr
|
||||
|
||||
+13
-13
@@ -1,13 +1,13 @@
|
||||
# This file contains all the common make variables which are useful for
|
||||
# anyone depending on this library.
|
||||
# Note that dependencies on NDK are not directly listed since NDK auto adds
|
||||
# them.
|
||||
|
||||
LIBYUV_INCLUDES := $(LIBYUV_PATH)/include
|
||||
|
||||
LIBYUV_C_FLAGS :=
|
||||
|
||||
LIBYUV_CPP_FLAGS :=
|
||||
|
||||
LIBYUV_LDLIBS :=
|
||||
LIBYUV_DEP_MODULES :=
|
||||
# This file contains all the common make variables which are useful for
|
||||
# anyone depending on this library.
|
||||
# Note that dependencies on NDK are not directly listed since NDK auto adds
|
||||
# them.
|
||||
|
||||
LIBYUV_INCLUDES := $(LIBYUV_PATH)/include
|
||||
|
||||
LIBYUV_C_FLAGS :=
|
||||
|
||||
LIBYUV_CPP_FLAGS :=
|
||||
|
||||
LIBYUV_LDLIBS :=
|
||||
LIBYUV_DEP_MODULES :=
|
||||
|
||||
@@ -0,0 +1,17 @@
|
||||
[MESSAGES CONTROL]
|
||||
|
||||
# Disable the message, report, category or checker with the given id(s).
|
||||
# TODO(kjellander): Reduce this list to as small as possible.
|
||||
disable=I0010,I0011,bad-continuation,broad-except,duplicate-code,eval-used,exec-used,fixme,invalid-name,missing-docstring,no-init,no-member,too-few-public-methods,too-many-ancestors,too-many-arguments,too-many-branches,too-many-function-args,too-many-instance-attributes,too-many-lines,too-many-locals,too-many-public-methods,too-many-return-statements,too-many-statements
|
||||
|
||||
|
||||
[REPORTS]
|
||||
|
||||
# Don't write out full reports, just messages.
|
||||
reports=no
|
||||
|
||||
|
||||
[FORMAT]
|
||||
|
||||
# We use two spaces for indents, instead of the usual four spaces or tab.
|
||||
indent-string=' '
|
||||
@@ -1,499 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
"""Setup links to a Chromium checkout for WebRTC.
|
||||
|
||||
WebRTC standalone shares a lot of dependencies and build tools with Chromium.
|
||||
To do this, many of the paths of a Chromium checkout is emulated by creating
|
||||
symlinks to files and directories. This script handles the setup of symlinks to
|
||||
achieve this.
|
||||
"""
|
||||
|
||||
|
||||
import ctypes
|
||||
import errno
|
||||
import logging
|
||||
import optparse
|
||||
import os
|
||||
import shelve
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
import textwrap
|
||||
|
||||
|
||||
DIRECTORIES = [
|
||||
'build',
|
||||
'buildtools',
|
||||
'mojo', # TODO(kjellander): Remove, see webrtc:5629.
|
||||
'native_client',
|
||||
'net',
|
||||
'testing',
|
||||
'third_party/binutils',
|
||||
'third_party/drmemory',
|
||||
'third_party/instrumented_libraries',
|
||||
'third_party/libjpeg',
|
||||
'third_party/libjpeg_turbo',
|
||||
'third_party/llvm-build',
|
||||
'third_party/lss',
|
||||
'third_party/proguard',
|
||||
'third_party/tcmalloc',
|
||||
'third_party/yasm',
|
||||
'third_party/WebKit', # TODO(kjellander): Remove, see webrtc:5629.
|
||||
'tools/clang',
|
||||
'tools/gn',
|
||||
'tools/gyp',
|
||||
'tools/memory',
|
||||
'tools/python',
|
||||
'tools/swarming_client',
|
||||
'tools/valgrind',
|
||||
'tools/vim',
|
||||
'tools/win',
|
||||
]
|
||||
|
||||
from sync_chromium import get_target_os_list
|
||||
target_os = get_target_os_list()
|
||||
if 'android' in target_os:
|
||||
DIRECTORIES += [
|
||||
'base',
|
||||
'third_party/accessibility_test_framework',
|
||||
'third_party/android_platform',
|
||||
'third_party/android_tools',
|
||||
'third_party/apache_velocity',
|
||||
'third_party/appurify-python',
|
||||
'third_party/ashmem',
|
||||
'third_party/bouncycastle',
|
||||
'third_party/catapult',
|
||||
'third_party/ced',
|
||||
'third_party/closure_compiler',
|
||||
'third_party/guava',
|
||||
'third_party/hamcrest',
|
||||
'third_party/icu',
|
||||
'third_party/icu4j',
|
||||
'third_party/ijar',
|
||||
'third_party/intellij',
|
||||
'third_party/jsr-305',
|
||||
'third_party/junit',
|
||||
'third_party/libxml',
|
||||
'third_party/mockito',
|
||||
'third_party/modp_b64',
|
||||
'third_party/ow2_asm',
|
||||
'third_party/protobuf',
|
||||
'third_party/requests',
|
||||
'third_party/robolectric',
|
||||
'third_party/sqlite4java',
|
||||
'third_party/zlib',
|
||||
'tools/android',
|
||||
'tools/grit',
|
||||
]
|
||||
if 'ios' in target_os:
|
||||
DIRECTORIES.append('third_party/class-dump')
|
||||
|
||||
FILES = {
|
||||
'tools/isolate_driver.py': None,
|
||||
'third_party/BUILD.gn': None,
|
||||
}
|
||||
|
||||
ROOT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
CHROMIUM_CHECKOUT = os.path.join('chromium', 'src')
|
||||
LINKS_DB = 'links'
|
||||
|
||||
# Version management to make future upgrades/downgrades easier to support.
|
||||
SCHEMA_VERSION = 1
|
||||
|
||||
|
||||
def query_yes_no(question, default=False):
|
||||
"""Ask a yes/no question via raw_input() and return their answer.
|
||||
|
||||
Modified from http://stackoverflow.com/a/3041990.
|
||||
"""
|
||||
prompt = " [%s/%%s]: "
|
||||
prompt = prompt % ('Y' if default is True else 'y')
|
||||
prompt = prompt % ('N' if default is False else 'n')
|
||||
|
||||
if default is None:
|
||||
default = 'INVALID'
|
||||
|
||||
while True:
|
||||
sys.stdout.write(question + prompt)
|
||||
choice = raw_input().lower()
|
||||
if choice == '' and default != 'INVALID':
|
||||
return default
|
||||
|
||||
if 'yes'.startswith(choice):
|
||||
return True
|
||||
elif 'no'.startswith(choice):
|
||||
return False
|
||||
|
||||
print "Please respond with 'yes' or 'no' (or 'y' or 'n')."
|
||||
|
||||
|
||||
# Actions
|
||||
class Action(object):
|
||||
def __init__(self, dangerous):
|
||||
self.dangerous = dangerous
|
||||
|
||||
def announce(self, planning):
|
||||
"""Log a description of this action.
|
||||
|
||||
Args:
|
||||
planning - True iff we're in the planning stage, False if we're in the
|
||||
doit stage.
|
||||
"""
|
||||
pass
|
||||
|
||||
def doit(self, links_db):
|
||||
"""Execute the action, recording what we did to links_db, if necessary."""
|
||||
pass
|
||||
|
||||
|
||||
class Remove(Action):
|
||||
def __init__(self, path, dangerous):
|
||||
super(Remove, self).__init__(dangerous)
|
||||
self._priority = 0
|
||||
self._path = path
|
||||
|
||||
def announce(self, planning):
|
||||
log = logging.warn
|
||||
filesystem_type = 'file'
|
||||
if not self.dangerous:
|
||||
log = logging.info
|
||||
filesystem_type = 'link'
|
||||
if planning:
|
||||
log('Planning to remove %s: %s', filesystem_type, self._path)
|
||||
else:
|
||||
log('Removing %s: %s', filesystem_type, self._path)
|
||||
|
||||
def doit(self, _):
|
||||
os.remove(self._path)
|
||||
|
||||
|
||||
class Rmtree(Action):
|
||||
def __init__(self, path):
|
||||
super(Rmtree, self).__init__(dangerous=True)
|
||||
self._priority = 0
|
||||
self._path = path
|
||||
|
||||
def announce(self, planning):
|
||||
if planning:
|
||||
logging.warn('Planning to remove directory: %s', self._path)
|
||||
else:
|
||||
logging.warn('Removing directory: %s', self._path)
|
||||
|
||||
def doit(self, _):
|
||||
if sys.platform.startswith('win'):
|
||||
# shutil.rmtree() doesn't work on Windows if any of the directories are
|
||||
# read-only.
|
||||
subprocess.check_call(['rd', '/q', '/s', self._path], shell=True)
|
||||
else:
|
||||
shutil.rmtree(self._path)
|
||||
|
||||
|
||||
class Makedirs(Action):
|
||||
def __init__(self, path):
|
||||
super(Makedirs, self).__init__(dangerous=False)
|
||||
self._priority = 1
|
||||
self._path = path
|
||||
|
||||
def doit(self, _):
|
||||
try:
|
||||
os.makedirs(self._path)
|
||||
except OSError as e:
|
||||
if e.errno != errno.EEXIST:
|
||||
raise
|
||||
|
||||
|
||||
class Symlink(Action):
|
||||
def __init__(self, source_path, link_path):
|
||||
super(Symlink, self).__init__(dangerous=False)
|
||||
self._priority = 2
|
||||
self._source_path = source_path
|
||||
self._link_path = link_path
|
||||
|
||||
def announce(self, planning):
|
||||
if planning:
|
||||
logging.info(
|
||||
'Planning to create link from %s to %s', self._link_path,
|
||||
self._source_path)
|
||||
else:
|
||||
logging.debug(
|
||||
'Linking from %s to %s', self._link_path, self._source_path)
|
||||
|
||||
def doit(self, links_db):
|
||||
# Files not in the root directory need relative path calculation.
|
||||
# On Windows, use absolute paths instead since NTFS doesn't seem to support
|
||||
# relative paths for symlinks.
|
||||
if sys.platform.startswith('win'):
|
||||
source_path = os.path.abspath(self._source_path)
|
||||
else:
|
||||
if os.path.dirname(self._link_path) != self._link_path:
|
||||
source_path = os.path.relpath(self._source_path,
|
||||
os.path.dirname(self._link_path))
|
||||
|
||||
os.symlink(source_path, os.path.abspath(self._link_path))
|
||||
links_db[self._source_path] = self._link_path
|
||||
|
||||
|
||||
class LinkError(IOError):
|
||||
"""Failed to create a link."""
|
||||
pass
|
||||
|
||||
|
||||
# Use junctions instead of symlinks on the Windows platform.
|
||||
if sys.platform.startswith('win'):
|
||||
def symlink(source_path, link_path):
|
||||
if os.path.isdir(source_path):
|
||||
subprocess.check_call(['cmd.exe', '/c', 'mklink', '/J', link_path,
|
||||
source_path])
|
||||
else:
|
||||
# Don't create symlinks to files on Windows, just copy the file instead
|
||||
# (there's no way to create a link without administrator's privileges).
|
||||
shutil.copy(source_path, link_path)
|
||||
os.symlink = symlink
|
||||
|
||||
|
||||
class WebRTCLinkSetup(object):
|
||||
def __init__(self, links_db, force=False, dry_run=False, prompt=False):
|
||||
self._force = force
|
||||
self._dry_run = dry_run
|
||||
self._prompt = prompt
|
||||
self._links_db = links_db
|
||||
|
||||
def CreateLinks(self, on_bot):
|
||||
logging.debug('CreateLinks')
|
||||
# First, make a plan of action
|
||||
actions = []
|
||||
|
||||
for source_path, link_path in FILES.iteritems():
|
||||
actions += self._ActionForPath(
|
||||
source_path, link_path, check_fn=os.path.isfile, check_msg='files')
|
||||
for source_dir in DIRECTORIES:
|
||||
actions += self._ActionForPath(
|
||||
source_dir, None, check_fn=os.path.isdir,
|
||||
check_msg='directories')
|
||||
|
||||
if not on_bot and self._force:
|
||||
# When making the manual switch from legacy SVN checkouts to the new
|
||||
# Git-based Chromium DEPS, the .gclient_entries file that contains cached
|
||||
# URLs for all DEPS entries must be removed to avoid future sync problems.
|
||||
entries_file = os.path.join(os.path.dirname(ROOT_DIR), '.gclient_entries')
|
||||
if os.path.exists(entries_file):
|
||||
actions.append(Remove(entries_file, dangerous=True))
|
||||
|
||||
actions.sort()
|
||||
|
||||
if self._dry_run:
|
||||
for action in actions:
|
||||
action.announce(planning=True)
|
||||
logging.info('Not doing anything because dry-run was specified.')
|
||||
sys.exit(0)
|
||||
|
||||
if any(a.dangerous for a in actions):
|
||||
logging.warn('Dangerous actions:')
|
||||
for action in (a for a in actions if a.dangerous):
|
||||
action.announce(planning=True)
|
||||
print
|
||||
|
||||
if not self._force:
|
||||
logging.error(textwrap.dedent("""\
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
A C T I O N R E Q I R E D
|
||||
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
|
||||
|
||||
Setting up the checkout requires creating symlinks to directories in the
|
||||
Chromium checkout inside chromium/src.
|
||||
To avoid disrupting developers, we've chosen to not delete directories
|
||||
forcibly, in case you have some work in progress in one of them :)
|
||||
|
||||
ACTION REQUIRED:
|
||||
Before running `gclient sync|runhooks` again, you must run:
|
||||
%s%s --force
|
||||
|
||||
Which will replace all directories which now must be symlinks, after
|
||||
prompting with a summary of the work-to-be-done.
|
||||
"""), 'python ' if sys.platform.startswith('win') else '', __file__)
|
||||
sys.exit(1)
|
||||
elif self._prompt:
|
||||
if not query_yes_no('Would you like to perform the above plan?'):
|
||||
sys.exit(1)
|
||||
|
||||
for action in actions:
|
||||
action.announce(planning=False)
|
||||
action.doit(self._links_db)
|
||||
|
||||
if not on_bot and self._force:
|
||||
logging.info('Completed!\n\nNow run `gclient sync|runhooks` again to '
|
||||
'let the remaining hooks (that probably were interrupted) '
|
||||
'execute.')
|
||||
|
||||
def CleanupLinks(self):
|
||||
logging.debug('CleanupLinks')
|
||||
for source, link_path in self._links_db.iteritems():
|
||||
if source == 'SCHEMA_VERSION':
|
||||
continue
|
||||
if os.path.islink(link_path) or sys.platform.startswith('win'):
|
||||
# os.path.islink() always returns false on Windows
|
||||
# See http://bugs.python.org/issue13143.
|
||||
logging.debug('Removing link to %s at %s', source, link_path)
|
||||
if not self._dry_run:
|
||||
if os.path.exists(link_path):
|
||||
if sys.platform.startswith('win') and os.path.isdir(link_path):
|
||||
subprocess.check_call(['rmdir', '/q', '/s', link_path],
|
||||
shell=True)
|
||||
else:
|
||||
os.remove(link_path)
|
||||
del self._links_db[source]
|
||||
|
||||
@staticmethod
|
||||
def _ActionForPath(source_path, link_path=None, check_fn=None,
|
||||
check_msg=None):
|
||||
"""Create zero or more Actions to link to a file or directory.
|
||||
|
||||
This will be a symlink on POSIX platforms. On Windows it will result in:
|
||||
* a junction for directories
|
||||
* a copied file for single files.
|
||||
|
||||
Args:
|
||||
source_path: Path relative to the Chromium checkout root.
|
||||
For readability, the path may contain slashes, which will
|
||||
automatically be converted to the right path delimiter on Windows.
|
||||
link_path: The location for the link to create. If omitted it will be the
|
||||
same path as source_path.
|
||||
check_fn: A function returning true if the type of filesystem object is
|
||||
correct for the attempted call. Otherwise an error message with
|
||||
check_msg will be printed.
|
||||
check_msg: String used to inform the user of an invalid attempt to create
|
||||
a file.
|
||||
Returns:
|
||||
A list of Action objects.
|
||||
"""
|
||||
def fix_separators(path):
|
||||
if sys.platform.startswith('win'):
|
||||
return path.replace(os.altsep, os.sep)
|
||||
else:
|
||||
return path
|
||||
|
||||
assert check_fn
|
||||
assert check_msg
|
||||
link_path = link_path or source_path
|
||||
link_path = fix_separators(link_path)
|
||||
|
||||
source_path = fix_separators(source_path)
|
||||
source_path = os.path.join(CHROMIUM_CHECKOUT, source_path)
|
||||
if os.path.exists(source_path) and not check_fn:
|
||||
raise LinkError('Can only to link to %s: tried to link to: %s' %
|
||||
(check_msg, source_path))
|
||||
|
||||
if not os.path.exists(source_path):
|
||||
logging.debug('Silently ignoring missing source: %s. This is to avoid '
|
||||
'errors on platform-specific dependencies.', source_path)
|
||||
return []
|
||||
|
||||
actions = []
|
||||
|
||||
if os.path.exists(link_path) or os.path.islink(link_path):
|
||||
if os.path.islink(link_path):
|
||||
actions.append(Remove(link_path, dangerous=False))
|
||||
elif os.path.isfile(link_path):
|
||||
actions.append(Remove(link_path, dangerous=True))
|
||||
elif os.path.isdir(link_path):
|
||||
actions.append(Rmtree(link_path))
|
||||
else:
|
||||
raise LinkError('Don\'t know how to plan: %s' % link_path)
|
||||
|
||||
# Create parent directories to the target link if needed.
|
||||
target_parent_dirs = os.path.dirname(link_path)
|
||||
if (target_parent_dirs and
|
||||
target_parent_dirs != link_path and
|
||||
not os.path.exists(target_parent_dirs)):
|
||||
actions.append(Makedirs(target_parent_dirs))
|
||||
|
||||
actions.append(Symlink(source_path, link_path))
|
||||
|
||||
return actions
|
||||
|
||||
def _initialize_database(filename):
|
||||
links_database = shelve.open(filename)
|
||||
|
||||
# Wipe the database if this version of the script ends up looking at a
|
||||
# newer (future) version of the links db, just to be sure.
|
||||
version = links_database.get('SCHEMA_VERSION')
|
||||
if version and version != SCHEMA_VERSION:
|
||||
logging.info('Found database with schema version %s while this script only '
|
||||
'supports %s. Wiping previous database contents.', version,
|
||||
SCHEMA_VERSION)
|
||||
links_database.clear()
|
||||
links_database['SCHEMA_VERSION'] = SCHEMA_VERSION
|
||||
return links_database
|
||||
|
||||
|
||||
def main():
|
||||
on_bot = os.environ.get('CHROME_HEADLESS') == '1'
|
||||
|
||||
parser = optparse.OptionParser()
|
||||
parser.add_option('-d', '--dry-run', action='store_true', default=False,
|
||||
help='Print what would be done, but don\'t perform any '
|
||||
'operations. This will automatically set logging to '
|
||||
'verbose.')
|
||||
parser.add_option('-c', '--clean-only', action='store_true', default=False,
|
||||
help='Only clean previously created links, don\'t create '
|
||||
'new ones. This will automatically set logging to '
|
||||
'verbose.')
|
||||
parser.add_option('-f', '--force', action='store_true', default=on_bot,
|
||||
help='Force link creation. CAUTION: This deletes existing '
|
||||
'folders and files in the locations where links are '
|
||||
'about to be created.')
|
||||
parser.add_option('-n', '--no-prompt', action='store_false', dest='prompt',
|
||||
default=(not on_bot),
|
||||
help='Prompt if we\'re planning to do a dangerous action')
|
||||
parser.add_option('-v', '--verbose', action='store_const',
|
||||
const=logging.DEBUG, default=logging.INFO,
|
||||
help='Print verbose output for debugging.')
|
||||
options, _ = parser.parse_args()
|
||||
|
||||
if options.dry_run or options.force or options.clean_only:
|
||||
options.verbose = logging.DEBUG
|
||||
logging.basicConfig(format='%(message)s', level=options.verbose)
|
||||
|
||||
# Work from the root directory of the checkout.
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
os.chdir(script_dir)
|
||||
|
||||
if sys.platform.startswith('win'):
|
||||
def is_admin():
|
||||
try:
|
||||
return os.getuid() == 0
|
||||
except AttributeError:
|
||||
return ctypes.windll.shell32.IsUserAnAdmin() != 0
|
||||
if is_admin():
|
||||
logging.warning('WARNING: On Windows, you no longer need run as '
|
||||
'administrator. Please run with user account privileges.')
|
||||
|
||||
if not os.path.exists(CHROMIUM_CHECKOUT):
|
||||
logging.error('Cannot find a Chromium checkout at %s. Did you run "gclient '
|
||||
'sync" before running this script?', CHROMIUM_CHECKOUT)
|
||||
return 2
|
||||
|
||||
links_database = _initialize_database(LINKS_DB)
|
||||
try:
|
||||
symlink_creator = WebRTCLinkSetup(links_database, options.force,
|
||||
options.dry_run, options.prompt)
|
||||
symlink_creator.CleanupLinks()
|
||||
if not options.clean_only:
|
||||
symlink_creator.CreateLinks(on_bot)
|
||||
except LinkError as e:
|
||||
print >> sys.stderr, e.message
|
||||
return 3
|
||||
finally:
|
||||
links_database.close()
|
||||
return 0
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
sys.exit(main())
|
||||
+189
-89
@@ -29,10 +29,10 @@ extern "C" {
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
LIBYUV_API
|
||||
uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) {
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
int remainder;
|
||||
uint32 (*HashDjb2_SSE)(const uint8* src, int count, uint32 seed) =
|
||||
uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) =
|
||||
HashDjb2_C;
|
||||
#if defined(HAS_HASHDJB2_SSE41)
|
||||
if (TestCpuFlag(kCpuHasSSE41)) {
|
||||
@@ -45,25 +45,25 @@ uint32 HashDjb2(const uint8* src, uint64 count, uint32 seed) {
|
||||
}
|
||||
#endif
|
||||
|
||||
while (count >= (uint64)(kBlockSize)) {
|
||||
while (count >= (uint64_t)(kBlockSize)) {
|
||||
seed = HashDjb2_SSE(src, kBlockSize, seed);
|
||||
src += kBlockSize;
|
||||
count -= kBlockSize;
|
||||
}
|
||||
remainder = (int)(count) & ~15;
|
||||
remainder = (int)count & ~15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_SSE(src, remainder, seed);
|
||||
src += remainder;
|
||||
count -= remainder;
|
||||
}
|
||||
remainder = (int)(count) & 15;
|
||||
remainder = (int)count & 15;
|
||||
if (remainder) {
|
||||
seed = HashDjb2_C(src, remainder, seed);
|
||||
}
|
||||
return seed;
|
||||
}
|
||||
|
||||
static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
|
||||
static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) {
|
||||
int x;
|
||||
for (x = 0; x < width - 1; x += 2) {
|
||||
if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB.
|
||||
@@ -94,8 +94,11 @@ static uint32 ARGBDetectRow_C(const uint8* argb, int width) {
|
||||
// Scan an opaque argb image and return fourcc based on alpha offset.
|
||||
// Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown.
|
||||
LIBYUV_API
|
||||
uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
|
||||
uint32 fourcc = 0;
|
||||
uint32_t ARGBDetect(const uint8_t* argb,
|
||||
int stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
uint32_t fourcc = 0;
|
||||
int h;
|
||||
|
||||
// Coalesce rows.
|
||||
@@ -111,19 +114,86 @@ uint32 ARGBDetect(const uint8* argb, int stride_argb, int width, int height) {
|
||||
return fourcc;
|
||||
}
|
||||
|
||||
// NEON version accumulates in 16 bit shorts which overflow at 65536 bytes.
|
||||
// So actual maximum is 1 less loop, which is 64436 - 32 bytes.
|
||||
|
||||
LIBYUV_API
|
||||
uint64_t ComputeHammingDistance(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
const int kBlockSize = 1 << 15; // 32768;
|
||||
const int kSimdSize = 64;
|
||||
// SIMD for multiple of 64, and C for remainder
|
||||
int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1);
|
||||
uint64_t diff = 0;
|
||||
int i;
|
||||
uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b,
|
||||
int count) = HammingDistance_C;
|
||||
#if defined(HAS_HAMMINGDISTANCE_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
HammingDistance = HammingDistance_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
HammingDistance = HammingDistance_SSSE3;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_SSE42)
|
||||
if (TestCpuFlag(kCpuHasSSE42)) {
|
||||
HammingDistance = HammingDistance_SSE42;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_AVX2)
|
||||
if (TestCpuFlag(kCpuHasAVX2)) {
|
||||
HammingDistance = HammingDistance_AVX2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
HammingDistance = HammingDistance_MSA;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_HAMMINGDISTANCE_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
HammingDistance = HammingDistance_MMI;
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+ : diff)
|
||||
#endif
|
||||
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
diff += HammingDistance(src_a + i, src_b + i, kBlockSize);
|
||||
}
|
||||
src_a += count & ~(kBlockSize - 1);
|
||||
src_b += count & ~(kBlockSize - 1);
|
||||
if (remainder) {
|
||||
diff += HammingDistance(src_a, src_b, remainder);
|
||||
src_a += remainder;
|
||||
src_b += remainder;
|
||||
}
|
||||
remainder = count & (kSimdSize - 1);
|
||||
if (remainder) {
|
||||
diff += HammingDistance_C(src_a, src_b, remainder);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Refactor into row function.
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
int count) {
|
||||
uint64_t ComputeSumSquareError(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
// SumSquareError returns values 0 to 65535 for each squared difference.
|
||||
// Up to 65536 of those can be summed and remain within a uint32.
|
||||
// After each block of 65536 pixels, accumulate into a uint64.
|
||||
// Up to 65536 of those can be summed and remain within a uint32_t.
|
||||
// After each block of 65536 pixels, accumulate into a uint64_t.
|
||||
const int kBlockSize = 65536;
|
||||
int remainder = count & (kBlockSize - 1) & ~31;
|
||||
uint64 sse = 0;
|
||||
uint64_t sse = 0;
|
||||
int i;
|
||||
uint32 (*SumSquareError)(const uint8* src_a, const uint8* src_b, int count) =
|
||||
SumSquareError_C;
|
||||
uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b,
|
||||
int count) = SumSquareError_C;
|
||||
#if defined(HAS_SUMSQUAREERROR_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
SumSquareError = SumSquareError_NEON;
|
||||
@@ -141,8 +211,18 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
SumSquareError = SumSquareError_AVX2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
SumSquareError = SumSquareError_MSA;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SUMSQUAREERROR_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
SumSquareError = SumSquareError_MMI;
|
||||
}
|
||||
#endif
|
||||
#ifdef _OPENMP
|
||||
#pragma omp parallel for reduction(+: sse)
|
||||
#pragma omp parallel for reduction(+ : sse)
|
||||
#endif
|
||||
for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) {
|
||||
sse += SumSquareError(src_a + i, src_b + i, kBlockSize);
|
||||
@@ -162,14 +242,16 @@ uint64 ComputeSumSquareError(const uint8* src_a, const uint8* src_b,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
uint64 sse = 0;
|
||||
uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
uint64_t sse = 0;
|
||||
int h;
|
||||
// Coalesce rows.
|
||||
if (stride_a == width &&
|
||||
stride_b == width) {
|
||||
if (stride_a == width && stride_b == width) {
|
||||
width *= height;
|
||||
height = 1;
|
||||
stride_a = stride_b = 0;
|
||||
@@ -183,66 +265,76 @@ uint64 ComputeSumSquareErrorPlane(const uint8* src_a, int stride_a,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double SumSquareErrorToPsnr(uint64 sse, uint64 count) {
|
||||
double SumSquareErrorToPsnr(uint64_t sse, uint64_t count) {
|
||||
double psnr;
|
||||
if (sse > 0) {
|
||||
double mse = (double)(count) / (double)(sse);
|
||||
double mse = (double)count / (double)sse;
|
||||
psnr = 10.0 * log10(255.0 * 255.0 * mse);
|
||||
} else {
|
||||
psnr = kMaxPsnr; // Limit to prevent divide by 0
|
||||
psnr = kMaxPsnr; // Limit to prevent divide by 0
|
||||
}
|
||||
|
||||
if (psnr > kMaxPsnr)
|
||||
if (psnr > kMaxPsnr) {
|
||||
psnr = kMaxPsnr;
|
||||
}
|
||||
|
||||
return psnr;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double CalcFramePsnr(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
const uint64 samples = width * height;
|
||||
const uint64 sse = ComputeSumSquareErrorPlane(src_a, stride_a,
|
||||
src_b, stride_b,
|
||||
width, height);
|
||||
double CalcFramePsnr(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
const uint64_t samples = (uint64_t)width * (uint64_t)height;
|
||||
const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b,
|
||||
stride_b, width, height);
|
||||
return SumSquareErrorToPsnr(sse, samples);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double I420Psnr(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height) {
|
||||
const uint64 sse_y = ComputeSumSquareErrorPlane(src_y_a, stride_y_a,
|
||||
src_y_b, stride_y_b,
|
||||
width, height);
|
||||
double I420Psnr(const uint8_t* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8_t* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8_t* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8_t* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8_t* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8_t* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height) {
|
||||
const uint64_t sse_y = ComputeSumSquareErrorPlane(
|
||||
src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
|
||||
const int width_uv = (width + 1) >> 1;
|
||||
const int height_uv = (height + 1) >> 1;
|
||||
const uint64 sse_u = ComputeSumSquareErrorPlane(src_u_a, stride_u_a,
|
||||
src_u_b, stride_u_b,
|
||||
width_uv, height_uv);
|
||||
const uint64 sse_v = ComputeSumSquareErrorPlane(src_v_a, stride_v_a,
|
||||
src_v_b, stride_v_b,
|
||||
width_uv, height_uv);
|
||||
const uint64 samples = width * height + 2 * (width_uv * height_uv);
|
||||
const uint64 sse = sse_y + sse_u + sse_v;
|
||||
const uint64_t sse_u = ComputeSumSquareErrorPlane(
|
||||
src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv);
|
||||
const uint64_t sse_v = ComputeSumSquareErrorPlane(
|
||||
src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv);
|
||||
const uint64_t samples = (uint64_t)width * (uint64_t)height +
|
||||
2 * ((uint64_t)width_uv * (uint64_t)height_uv);
|
||||
const uint64_t sse = sse_y + sse_u + sse_v;
|
||||
return SumSquareErrorToPsnr(sse, samples);
|
||||
}
|
||||
|
||||
static const int64 cc1 = 26634; // (64^2*(.01*255)^2
|
||||
static const int64 cc2 = 239708; // (64^2*(.03*255)^2
|
||||
static const int64_t cc1 = 26634; // (64^2*(.01*255)^2
|
||||
static const int64_t cc2 = 239708; // (64^2*(.03*255)^2
|
||||
|
||||
static double Ssim8x8_C(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b) {
|
||||
int64 sum_a = 0;
|
||||
int64 sum_b = 0;
|
||||
int64 sum_sq_a = 0;
|
||||
int64 sum_sq_b = 0;
|
||||
int64 sum_axb = 0;
|
||||
static double Ssim8x8_C(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b) {
|
||||
int64_t sum_a = 0;
|
||||
int64_t sum_b = 0;
|
||||
int64_t sum_sq_a = 0;
|
||||
int64_t sum_sq_b = 0;
|
||||
int64_t sum_axb = 0;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < 8; ++i) {
|
||||
@@ -260,22 +352,22 @@ static double Ssim8x8_C(const uint8* src_a, int stride_a,
|
||||
}
|
||||
|
||||
{
|
||||
const int64 count = 64;
|
||||
const int64_t count = 64;
|
||||
// scale the constants by number of pixels
|
||||
const int64 c1 = (cc1 * count * count) >> 12;
|
||||
const int64 c2 = (cc2 * count * count) >> 12;
|
||||
const int64_t c1 = (cc1 * count * count) >> 12;
|
||||
const int64_t c2 = (cc2 * count * count) >> 12;
|
||||
|
||||
const int64 sum_a_x_sum_b = sum_a * sum_b;
|
||||
const int64_t sum_a_x_sum_b = sum_a * sum_b;
|
||||
|
||||
const int64 ssim_n = (2 * sum_a_x_sum_b + c1) *
|
||||
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
|
||||
const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) *
|
||||
(2 * count * sum_axb - 2 * sum_a_x_sum_b + c2);
|
||||
|
||||
const int64 sum_a_sq = sum_a*sum_a;
|
||||
const int64 sum_b_sq = sum_b*sum_b;
|
||||
const int64_t sum_a_sq = sum_a * sum_a;
|
||||
const int64_t sum_b_sq = sum_b * sum_b;
|
||||
|
||||
const int64 ssim_d = (sum_a_sq + sum_b_sq + c1) *
|
||||
(count * sum_sq_a - sum_a_sq +
|
||||
count * sum_sq_b - sum_b_sq + c2);
|
||||
const int64_t ssim_d =
|
||||
(sum_a_sq + sum_b_sq + c1) *
|
||||
(count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2);
|
||||
|
||||
if (ssim_d == 0.0) {
|
||||
return DBL_MAX;
|
||||
@@ -288,13 +380,16 @@ static double Ssim8x8_C(const uint8* src_a, int stride_a,
|
||||
// on the 4x4 pixel grid. Such arrangement allows the windows to overlap
|
||||
// block boundaries to penalize blocking artifacts.
|
||||
LIBYUV_API
|
||||
double CalcFrameSsim(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b,
|
||||
int width, int height) {
|
||||
double CalcFrameSsim(const uint8_t* src_a,
|
||||
int stride_a,
|
||||
const uint8_t* src_b,
|
||||
int stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int samples = 0;
|
||||
double ssim_total = 0;
|
||||
double (*Ssim8x8)(const uint8* src_a, int stride_a,
|
||||
const uint8* src_b, int stride_b) = Ssim8x8_C;
|
||||
double (*Ssim8x8)(const uint8_t* src_a, int stride_a, const uint8_t* src_b,
|
||||
int stride_b) = Ssim8x8_C;
|
||||
|
||||
// sample point start with each 4x4 location
|
||||
int i;
|
||||
@@ -314,22 +409,27 @@ double CalcFrameSsim(const uint8* src_a, int stride_a,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
double I420Ssim(const uint8* src_y_a, int stride_y_a,
|
||||
const uint8* src_u_a, int stride_u_a,
|
||||
const uint8* src_v_a, int stride_v_a,
|
||||
const uint8* src_y_b, int stride_y_b,
|
||||
const uint8* src_u_b, int stride_u_b,
|
||||
const uint8* src_v_b, int stride_v_b,
|
||||
int width, int height) {
|
||||
const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a,
|
||||
src_y_b, stride_y_b, width, height);
|
||||
double I420Ssim(const uint8_t* src_y_a,
|
||||
int stride_y_a,
|
||||
const uint8_t* src_u_a,
|
||||
int stride_u_a,
|
||||
const uint8_t* src_v_a,
|
||||
int stride_v_a,
|
||||
const uint8_t* src_y_b,
|
||||
int stride_y_b,
|
||||
const uint8_t* src_u_b,
|
||||
int stride_u_b,
|
||||
const uint8_t* src_v_b,
|
||||
int stride_v_b,
|
||||
int width,
|
||||
int height) {
|
||||
const double ssim_y =
|
||||
CalcFrameSsim(src_y_a, stride_y_a, src_y_b, stride_y_b, width, height);
|
||||
const int width_uv = (width + 1) >> 1;
|
||||
const int height_uv = (height + 1) >> 1;
|
||||
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a,
|
||||
src_u_b, stride_u_b,
|
||||
const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, src_u_b, stride_u_b,
|
||||
width_uv, height_uv);
|
||||
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a,
|
||||
src_v_b, stride_v_b,
|
||||
const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, src_v_b, stride_v_b,
|
||||
width_uv, height_uv);
|
||||
return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v);
|
||||
}
|
||||
|
||||
@@ -17,20 +17,80 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint32 SumSquareError_C(const uint8* src_a, const uint8* src_b, int count) {
|
||||
uint32 sse = 0u;
|
||||
#if ORIGINAL_OPT
|
||||
uint32_t HammingDistance_C1(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
int x = src_a[i] ^ src_b[i];
|
||||
if (x & 1)
|
||||
++diff;
|
||||
if (x & 2)
|
||||
++diff;
|
||||
if (x & 4)
|
||||
++diff;
|
||||
if (x & 8)
|
||||
++diff;
|
||||
if (x & 16)
|
||||
++diff;
|
||||
if (x & 32)
|
||||
++diff;
|
||||
if (x & 64)
|
||||
++diff;
|
||||
if (x & 128)
|
||||
++diff;
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Hakmem method for hamming distance.
|
||||
uint32_t HammingDistance_C(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < count - 3; i += 4) {
|
||||
uint32_t x = *((const uint32_t*)src_a) ^ *((const uint32_t*)src_b);
|
||||
uint32_t u = x - ((x >> 1) & 0x55555555);
|
||||
u = ((u >> 2) & 0x33333333) + (u & 0x33333333);
|
||||
diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24);
|
||||
src_a += 4;
|
||||
src_b += 4;
|
||||
}
|
||||
|
||||
for (; i < count; ++i) {
|
||||
uint32_t x = *src_a ^ *src_b;
|
||||
uint32_t u = x - ((x >> 1) & 0x55);
|
||||
u = ((u >> 2) & 0x33) + (u & 0x33);
|
||||
diff += (u + (u >> 4)) & 0x0f;
|
||||
src_a += 1;
|
||||
src_b += 1;
|
||||
}
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_C(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse = 0u;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
int diff = src_a[i] - src_b[i];
|
||||
sse += (uint32)(diff * diff);
|
||||
sse += (uint32_t)(diff * diff);
|
||||
}
|
||||
return sse;
|
||||
}
|
||||
|
||||
// hash seed of 5381 recommended.
|
||||
// Internal C version of HashDjb2 with int sized count for efficiency.
|
||||
uint32 HashDjb2_C(const uint8* src, int count, uint32 seed) {
|
||||
uint32 hash = seed;
|
||||
uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed) {
|
||||
uint32_t hash = seed;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
hash += (hash << 5) + src[i];
|
||||
|
||||
+318
-109
@@ -22,124 +22,334 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER)))
|
||||
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
uint32 sse;
|
||||
asm volatile (
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x10, 0) ",%0 \n"
|
||||
"movdqu " MEMACCESS(1) ",%%xmm2 \n"
|
||||
"lea " MEMLEA(0x10, 1) ",%1 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psubusb %%xmm2,%%xmm1 \n"
|
||||
"psubusb %%xmm3,%%xmm2 \n"
|
||||
"por %%xmm2,%%xmm1 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"punpckhbw %%xmm5,%%xmm2 \n"
|
||||
"pmaddwd %%xmm1,%%xmm1 \n"
|
||||
"pmaddwd %%xmm2,%%xmm2 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"paddd %%xmm2,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
#if defined(__x86_64__)
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint64_t diff = 0u;
|
||||
|
||||
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
asm volatile(
|
||||
"xor %3,%3 \n"
|
||||
"xor %%r8,%%r8 \n"
|
||||
"xor %%r9,%%r9 \n"
|
||||
"xor %%r10,%%r10 \n"
|
||||
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=g"(sse) // %3
|
||||
:: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"
|
||||
);
|
||||
// Process 32 bytes per loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"mov (%0),%%rcx \n"
|
||||
"mov 0x8(%0),%%rdx \n"
|
||||
"xor (%1),%%rcx \n"
|
||||
"xor 0x8(%1),%%rdx \n"
|
||||
"popcnt %%rcx,%%rcx \n"
|
||||
"popcnt %%rdx,%%rdx \n"
|
||||
"mov 0x10(%0),%%rsi \n"
|
||||
"mov 0x18(%0),%%rdi \n"
|
||||
"xor 0x10(%1),%%rsi \n"
|
||||
"xor 0x18(%1),%%rdi \n"
|
||||
"popcnt %%rsi,%%rsi \n"
|
||||
"popcnt %%rdi,%%rdi \n"
|
||||
"add $0x20,%0 \n"
|
||||
"add $0x20,%1 \n"
|
||||
"add %%rcx,%3 \n"
|
||||
"add %%rdx,%%r8 \n"
|
||||
"add %%rsi,%%r9 \n"
|
||||
"add %%rdi,%%r10 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"add %%r8, %3 \n"
|
||||
"add %%r9, %3 \n"
|
||||
"add %%r10, %3 \n"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=r"(diff) // %3
|
||||
:
|
||||
: "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10");
|
||||
|
||||
return static_cast<uint32_t>(diff);
|
||||
}
|
||||
#else
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
asm volatile(
|
||||
// Process 16 bytes per loop.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"mov (%0),%%ecx \n"
|
||||
"mov 0x4(%0),%%edx \n"
|
||||
"xor (%1),%%ecx \n"
|
||||
"xor 0x4(%1),%%edx \n"
|
||||
"popcnt %%ecx,%%ecx \n"
|
||||
"add %%ecx,%3 \n"
|
||||
"popcnt %%edx,%%edx \n"
|
||||
"add %%edx,%3 \n"
|
||||
"mov 0x8(%0),%%ecx \n"
|
||||
"mov 0xc(%0),%%edx \n"
|
||||
"xor 0x8(%1),%%ecx \n"
|
||||
"xor 0xc(%1),%%edx \n"
|
||||
"popcnt %%ecx,%%ecx \n"
|
||||
"add %%ecx,%3 \n"
|
||||
"popcnt %%edx,%%edx \n"
|
||||
"add %%edx,%3 \n"
|
||||
"add $0x10,%0 \n"
|
||||
"add $0x10,%1 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"+r"(diff) // %3
|
||||
:
|
||||
: "memory", "cc", "ecx", "edx");
|
||||
|
||||
return diff;
|
||||
}
|
||||
#endif
|
||||
|
||||
static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15,
|
||||
15, 15, 15, 15, 15, 15, 15, 15};
|
||||
static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4};
|
||||
|
||||
uint32_t HammingDistance_SSSE3(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
asm volatile(
|
||||
"movdqa %4,%%xmm2 \n"
|
||||
"movdqa %5,%%xmm3 \n"
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm1,%%xmm1 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqa (%0),%%xmm4 \n"
|
||||
"movdqa 0x10(%0), %%xmm5 \n"
|
||||
"pxor (%0,%1), %%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"pand %%xmm2,%%xmm6 \n"
|
||||
"psrlw $0x4,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"pshufb %%xmm6,%%xmm7 \n"
|
||||
"pand %%xmm2,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm6 \n"
|
||||
"pshufb %%xmm4,%%xmm6 \n"
|
||||
"paddb %%xmm7,%%xmm6 \n"
|
||||
"pxor 0x10(%0,%1),%%xmm5 \n"
|
||||
"add $0x20,%0 \n"
|
||||
"movdqa %%xmm5,%%xmm4 \n"
|
||||
"pand %%xmm2,%%xmm5 \n"
|
||||
"psrlw $0x4,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"pshufb %%xmm5,%%xmm7 \n"
|
||||
"pand %%xmm2,%%xmm4 \n"
|
||||
"movdqa %%xmm3,%%xmm5 \n"
|
||||
"pshufb %%xmm4,%%xmm5 \n"
|
||||
"paddb %%xmm7,%%xmm5 \n"
|
||||
"paddb %%xmm5,%%xmm6 \n"
|
||||
"psadbw %%xmm1,%%xmm6 \n"
|
||||
"paddd %%xmm6,%%xmm0 \n"
|
||||
"sub $0x20,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"pshufd $0xaa,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"movd %%xmm0, %3 \n"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=r"(diff) // %3
|
||||
: "m"(kNibbleMask), // %4
|
||||
"m"(kBitCount) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
|
||||
return diff;
|
||||
}
|
||||
|
||||
#ifdef HAS_HAMMINGDISTANCE_AVX2
|
||||
uint32_t HammingDistance_AVX2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
asm volatile(
|
||||
"vbroadcastf128 %4,%%ymm2 \n"
|
||||
"vbroadcastf128 %5,%%ymm3 \n"
|
||||
"vpxor %%ymm0,%%ymm0,%%ymm0 \n"
|
||||
"vpxor %%ymm1,%%ymm1,%%ymm1 \n"
|
||||
"sub %0,%1 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"vmovdqa (%0),%%ymm4 \n"
|
||||
"vmovdqa 0x20(%0), %%ymm5 \n"
|
||||
"vpxor (%0,%1), %%ymm4, %%ymm4 \n"
|
||||
"vpand %%ymm2,%%ymm4,%%ymm6 \n"
|
||||
"vpsrlw $0x4,%%ymm4,%%ymm4 \n"
|
||||
"vpshufb %%ymm6,%%ymm3,%%ymm6 \n"
|
||||
"vpand %%ymm2,%%ymm4,%%ymm4 \n"
|
||||
"vpshufb %%ymm4,%%ymm3,%%ymm4 \n"
|
||||
"vpaddb %%ymm4,%%ymm6,%%ymm6 \n"
|
||||
"vpxor 0x20(%0,%1),%%ymm5,%%ymm4 \n"
|
||||
"add $0x40,%0 \n"
|
||||
"vpand %%ymm2,%%ymm4,%%ymm5 \n"
|
||||
"vpsrlw $0x4,%%ymm4,%%ymm4 \n"
|
||||
"vpshufb %%ymm5,%%ymm3,%%ymm5 \n"
|
||||
"vpand %%ymm2,%%ymm4,%%ymm4 \n"
|
||||
"vpshufb %%ymm4,%%ymm3,%%ymm4 \n"
|
||||
"vpaddb %%ymm5,%%ymm4,%%ymm4 \n"
|
||||
"vpaddb %%ymm6,%%ymm4,%%ymm4 \n"
|
||||
"vpsadbw %%ymm1,%%ymm4,%%ymm4 \n"
|
||||
"vpaddd %%ymm0,%%ymm4,%%ymm0 \n"
|
||||
"sub $0x40,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"vpermq $0xb1,%%ymm0,%%ymm1 \n"
|
||||
"vpaddd %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vpermq $0xaa,%%ymm0,%%ymm1 \n"
|
||||
"vpaddd %%ymm1,%%ymm0,%%ymm0 \n"
|
||||
"vmovd %%xmm0, %3 \n"
|
||||
"vzeroupper \n"
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=r"(diff) // %3
|
||||
: "m"(kNibbleMask), // %4
|
||||
"m"(kBitCount) // %5
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6");
|
||||
|
||||
return diff;
|
||||
}
|
||||
#endif // HAS_HAMMINGDISTANCE_AVX2
|
||||
|
||||
uint32_t SumSquareError_SSE2(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse;
|
||||
asm volatile(
|
||||
"pxor %%xmm0,%%xmm0 \n"
|
||||
"pxor %%xmm5,%%xmm5 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"movdqu (%1),%%xmm2 \n"
|
||||
"lea 0x10(%1),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"psubusb %%xmm2,%%xmm1 \n"
|
||||
"psubusb %%xmm3,%%xmm2 \n"
|
||||
"por %%xmm2,%%xmm1 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm5,%%xmm1 \n"
|
||||
"punpckhbw %%xmm5,%%xmm2 \n"
|
||||
"pmaddwd %%xmm1,%%xmm1 \n"
|
||||
"pmaddwd %%xmm2,%%xmm2 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"paddd %%xmm2,%%xmm0 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"jg 1b \n"
|
||||
|
||||
"pshufd $0xee,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"pshufd $0x1,%%xmm0,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
|
||||
: "+r"(src_a), // %0
|
||||
"+r"(src_b), // %1
|
||||
"+r"(count), // %2
|
||||
"=g"(sse) // %3
|
||||
::"memory",
|
||||
"cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5");
|
||||
return sse;
|
||||
}
|
||||
|
||||
static uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
static uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
static const uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
|
||||
static const uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
static uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
static const uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
static uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
static const uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
static uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
static const uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
uint32 hash;
|
||||
asm volatile (
|
||||
"movd %2,%%xmm0 \n"
|
||||
"pxor %%xmm7,%%xmm7 \n"
|
||||
"movdqa %4,%%xmm6 \n"
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu " MEMACCESS(0) ",%%xmm1 \n"
|
||||
"lea " MEMLEA(0x10, 0) ",%0 \n"
|
||||
"pmulld %%xmm6,%%xmm0 \n"
|
||||
"movdqa %5,%%xmm5 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm7,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklwd %%xmm7,%%xmm3 \n"
|
||||
"pmulld %%xmm5,%%xmm3 \n"
|
||||
"movdqa %6,%%xmm5 \n"
|
||||
"movdqa %%xmm2,%%xmm4 \n"
|
||||
"punpckhwd %%xmm7,%%xmm4 \n"
|
||||
"pmulld %%xmm5,%%xmm4 \n"
|
||||
"movdqa %7,%%xmm5 \n"
|
||||
"punpckhbw %%xmm7,%%xmm1 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklwd %%xmm7,%%xmm2 \n"
|
||||
"pmulld %%xmm5,%%xmm2 \n"
|
||||
"movdqa %8,%%xmm5 \n"
|
||||
"punpckhwd %%xmm7,%%xmm1 \n"
|
||||
"pmulld %%xmm5,%%xmm1 \n"
|
||||
"paddd %%xmm4,%%xmm3 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm3,%%xmm1 \n"
|
||||
"pshufd $0xe,%%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"pshufd $0x1,%%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%1 \n"
|
||||
"jg 1b \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(count), // %1
|
||||
"+rm"(seed), // %2
|
||||
"=g"(hash) // %3
|
||||
: "m"(kHash16x33), // %4
|
||||
"m"(kHashMul0), // %5
|
||||
"m"(kHashMul1), // %6
|
||||
"m"(kHashMul2), // %7
|
||||
"m"(kHashMul3) // %8
|
||||
: "memory", "cc"
|
||||
, "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
|
||||
uint32_t hash;
|
||||
asm volatile(
|
||||
"movd %2,%%xmm0 \n"
|
||||
"pxor %%xmm7,%%xmm7 \n"
|
||||
"movdqa %4,%%xmm6 \n"
|
||||
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm1 \n"
|
||||
"lea 0x10(%0),%0 \n"
|
||||
"pmulld %%xmm6,%%xmm0 \n"
|
||||
"movdqa %5,%%xmm5 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklbw %%xmm7,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"punpcklwd %%xmm7,%%xmm3 \n"
|
||||
"pmulld %%xmm5,%%xmm3 \n"
|
||||
"movdqa %6,%%xmm5 \n"
|
||||
"movdqa %%xmm2,%%xmm4 \n"
|
||||
"punpckhwd %%xmm7,%%xmm4 \n"
|
||||
"pmulld %%xmm5,%%xmm4 \n"
|
||||
"movdqa %7,%%xmm5 \n"
|
||||
"punpckhbw %%xmm7,%%xmm1 \n"
|
||||
"movdqa %%xmm1,%%xmm2 \n"
|
||||
"punpcklwd %%xmm7,%%xmm2 \n"
|
||||
"pmulld %%xmm5,%%xmm2 \n"
|
||||
"movdqa %8,%%xmm5 \n"
|
||||
"punpckhwd %%xmm7,%%xmm1 \n"
|
||||
"pmulld %%xmm5,%%xmm1 \n"
|
||||
"paddd %%xmm4,%%xmm3 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm3,%%xmm1 \n"
|
||||
"pshufd $0xe,%%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"pshufd $0x1,%%xmm1,%%xmm2 \n"
|
||||
"paddd %%xmm2,%%xmm1 \n"
|
||||
"paddd %%xmm1,%%xmm0 \n"
|
||||
"sub $0x10,%1 \n"
|
||||
"jg 1b \n"
|
||||
"movd %%xmm0,%3 \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(count), // %1
|
||||
"+rm"(seed), // %2
|
||||
"=g"(hash) // %3
|
||||
: "m"(kHash16x33), // %4
|
||||
"m"(kHashMul0), // %5
|
||||
"m"(kHashMul1), // %6
|
||||
"m"(kHashMul2), // %7
|
||||
"m"(kHashMul3) // %8
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
return hash;
|
||||
}
|
||||
#endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__)))
|
||||
@@ -148,4 +358,3 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
@@ -0,0 +1,123 @@
|
||||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Mips MMI.
|
||||
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
|
||||
// Hakmem method for hamming distance.
|
||||
uint32_t HammingDistance_MMI(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
uint64_t temp = 0, temp1 = 0, ta = 0, tb = 0;
|
||||
uint64_t c1 = 0x5555555555555555;
|
||||
uint64_t c2 = 0x3333333333333333;
|
||||
uint64_t c3 = 0x0f0f0f0f0f0f0f0f;
|
||||
uint32_t c4 = 0x01010101;
|
||||
uint64_t s1 = 1, s2 = 2, s3 = 4;
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
"ldc1 %[ta], 0(%[src_a]) \n\t"
|
||||
"ldc1 %[tb], 0(%[src_b]) \n\t"
|
||||
"xor %[temp], %[ta], %[tb] \n\t"
|
||||
"psrlw %[temp1], %[temp], %[s1] \n\t" // temp1=x>>1
|
||||
"and %[temp1], %[temp1], %[c1] \n\t" // temp1&=c1
|
||||
"psubw %[temp1], %[temp], %[temp1] \n\t" // x-temp1
|
||||
"and %[temp], %[temp1], %[c2] \n\t" // t = (u&c2)
|
||||
"psrlw %[temp1], %[temp1], %[s2] \n\t" // u>>2
|
||||
"and %[temp1], %[temp1], %[c2] \n\t" // u>>2 & c2
|
||||
"paddw %[temp1], %[temp1], %[temp] \n\t" // t1 = t1+t
|
||||
"psrlw %[temp], %[temp1], %[s3] \n\t" // u>>4
|
||||
"paddw %[temp1], %[temp1], %[temp] \n\t" // u+(u>>4)
|
||||
"and %[temp1], %[temp1], %[c3] \n\t" //&c3
|
||||
"dmfc1 $t0, %[temp1] \n\t"
|
||||
"dsrl32 $t0, $t0, 0 \n\t "
|
||||
"mul $t0, $t0, %[c4] \n\t"
|
||||
"dsrl $t0, $t0, 24 \n\t"
|
||||
"dadd %[diff], %[diff], $t0 \n\t"
|
||||
"dmfc1 $t0, %[temp1] \n\t"
|
||||
"mul $t0, $t0, %[c4] \n\t"
|
||||
"dsrl $t0, $t0, 24 \n\t"
|
||||
"dadd %[diff], %[diff], $t0 \n\t"
|
||||
"daddiu %[src_a], %[src_a], 8 \n\t"
|
||||
"daddiu %[src_b], %[src_b], 8 \n\t"
|
||||
"addiu %[count], %[count], -8 \n\t"
|
||||
"bgtz %[count], 1b \n\t"
|
||||
"nop \n\t"
|
||||
: [diff] "+r"(diff), [src_a] "+r"(src_a), [src_b] "+r"(src_b),
|
||||
[count] "+r"(count), [ta] "+f"(ta), [tb] "+f"(tb), [temp] "+f"(temp),
|
||||
[temp1] "+f"(temp1)
|
||||
: [c1] "f"(c1), [c2] "f"(c2), [c3] "f"(c3), [c4] "r"(c4), [s1] "f"(s1),
|
||||
[s2] "f"(s2), [s3] "f"(s3)
|
||||
: "memory");
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_MMI(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse = 0u;
|
||||
uint32_t sse_hi = 0u, sse_lo = 0u;
|
||||
|
||||
uint64_t src1, src2;
|
||||
uint64_t diff, diff_hi, diff_lo;
|
||||
uint64_t sse_sum, sse_tmp;
|
||||
|
||||
const uint64_t mask = 0x0ULL;
|
||||
|
||||
__asm__ volatile(
|
||||
"xor %[sse_sum], %[sse_sum], %[sse_sum] \n\t"
|
||||
|
||||
"1: \n\t"
|
||||
"ldc1 %[src1], 0x00(%[src_a]) \n\t"
|
||||
"ldc1 %[src2], 0x00(%[src_b]) \n\t"
|
||||
"pasubub %[diff], %[src1], %[src2] \n\t"
|
||||
"punpcklbh %[diff_lo], %[diff], %[mask] \n\t"
|
||||
"punpckhbh %[diff_hi], %[diff], %[mask] \n\t"
|
||||
"pmaddhw %[sse_tmp], %[diff_lo], %[diff_lo] \n\t"
|
||||
"paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
|
||||
"pmaddhw %[sse_tmp], %[diff_hi], %[diff_hi] \n\t"
|
||||
"paddw %[sse_sum], %[sse_sum], %[sse_tmp] \n\t"
|
||||
|
||||
"daddiu %[src_a], %[src_a], 0x08 \n\t"
|
||||
"daddiu %[src_b], %[src_b], 0x08 \n\t"
|
||||
"daddiu %[count], %[count], -0x08 \n\t"
|
||||
"bnez %[count], 1b \n\t"
|
||||
|
||||
"mfc1 %[sse_lo], %[sse_sum] \n\t"
|
||||
"mfhc1 %[sse_hi], %[sse_sum] \n\t"
|
||||
"daddu %[sse], %[sse_hi], %[sse_lo] \n\t"
|
||||
: [sse] "+&r"(sse), [diff] "=&f"(diff), [src1] "=&f"(src1),
|
||||
[src2] "=&f"(src2), [diff_lo] "=&f"(diff_lo), [diff_hi] "=&f"(diff_hi),
|
||||
[sse_sum] "=&f"(sse_sum), [sse_tmp] "=&f"(sse_tmp),
|
||||
[sse_hi] "+&r"(sse_hi), [sse_lo] "+&r"(sse_lo)
|
||||
: [src_a] "r"(src_a), [src_b] "r"(src_b), [count] "r"(count),
|
||||
[mask] "f"(mask)
|
||||
: "memory");
|
||||
|
||||
return sse;
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
@@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright 2017 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
// This module is for GCC MSA
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include "libyuv/macros_msa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
uint32_t HammingDistance_MSA(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
int i;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v2i64 vec0 = {0}, vec1 = {0};
|
||||
|
||||
for (i = 0; i < count; i += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
|
||||
src0 ^= src2;
|
||||
src1 ^= src3;
|
||||
vec0 += __msa_pcnt_d((v2i64)src0);
|
||||
vec1 += __msa_pcnt_d((v2i64)src1);
|
||||
src_a += 32;
|
||||
src_b += 32;
|
||||
}
|
||||
|
||||
vec0 += vec1;
|
||||
diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0);
|
||||
diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2);
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_MSA(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse = 0u;
|
||||
int i;
|
||||
v16u8 src0, src1, src2, src3;
|
||||
v8i16 vec0, vec1, vec2, vec3;
|
||||
v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0};
|
||||
v2i64 tmp0;
|
||||
|
||||
for (i = 0; i < count; i += 32) {
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0);
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16);
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0);
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16);
|
||||
vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0);
|
||||
vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0);
|
||||
vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1);
|
||||
vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1);
|
||||
vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0);
|
||||
vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1);
|
||||
vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2);
|
||||
vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3);
|
||||
reg0 = __msa_dpadd_s_w(reg0, vec0, vec0);
|
||||
reg1 = __msa_dpadd_s_w(reg1, vec1, vec1);
|
||||
reg2 = __msa_dpadd_s_w(reg2, vec2, vec2);
|
||||
reg3 = __msa_dpadd_s_w(reg3, vec3, vec3);
|
||||
src_a += 32;
|
||||
src_b += 32;
|
||||
}
|
||||
|
||||
reg0 += reg1;
|
||||
reg2 += reg3;
|
||||
reg0 += reg2;
|
||||
tmp0 = __msa_hadd_s_d(reg0, reg0);
|
||||
sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0);
|
||||
sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2);
|
||||
return sse;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
@@ -21,40 +21,70 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
|
||||
!defined(__aarch64__)
|
||||
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 sse;
|
||||
asm volatile (
|
||||
"vmov.u8 q8, #0 \n"
|
||||
"vmov.u8 q10, #0 \n"
|
||||
"vmov.u8 q9, #0 \n"
|
||||
"vmov.u8 q11, #0 \n"
|
||||
// 256 bits at a time
|
||||
// uses short accumulator which restricts count to 131 KB
|
||||
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff;
|
||||
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {q0}, [%0]! \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {q1}, [%1]! \n"
|
||||
"subs %2, %2, #16 \n"
|
||||
"vsubl.u8 q2, d0, d2 \n"
|
||||
"vsubl.u8 q3, d1, d3 \n"
|
||||
"vmlal.s16 q8, d4, d4 \n"
|
||||
"vmlal.s16 q9, d6, d6 \n"
|
||||
"vmlal.s16 q10, d5, d5 \n"
|
||||
"vmlal.s16 q11, d7, d7 \n"
|
||||
"bgt 1b \n"
|
||||
asm volatile(
|
||||
"vmov.u16 q4, #0 \n" // accumulator
|
||||
|
||||
"vadd.u32 q8, q8, q9 \n"
|
||||
"vadd.u32 q10, q10, q11 \n"
|
||||
"vadd.u32 q11, q8, q10 \n"
|
||||
"vpaddl.u32 q1, q11 \n"
|
||||
"vadd.u64 d0, d2, d3 \n"
|
||||
"vmov.32 %3, d0[0] \n"
|
||||
: "+r"(src_a),
|
||||
"+r"(src_b),
|
||||
"+r"(count),
|
||||
"=r"(sse)
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
"1: \n"
|
||||
"vld1.8 {q0, q1}, [%0]! \n"
|
||||
"vld1.8 {q2, q3}, [%1]! \n"
|
||||
"veor.32 q0, q0, q2 \n"
|
||||
"veor.32 q1, q1, q3 \n"
|
||||
"vcnt.i8 q0, q0 \n"
|
||||
"vcnt.i8 q1, q1 \n"
|
||||
"subs %2, %2, #32 \n"
|
||||
"vadd.u8 q0, q0, q1 \n" // 16 byte counts
|
||||
"vpadal.u8 q4, q0 \n" // 8 shorts
|
||||
"bgt 1b \n"
|
||||
|
||||
"vpaddl.u16 q0, q4 \n" // 4 ints
|
||||
"vpadd.u32 d0, d0, d1 \n"
|
||||
"vpadd.u32 d0, d0, d0 \n"
|
||||
"vmov.32 %3, d0[0] \n"
|
||||
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff)
|
||||
:
|
||||
: "cc", "q0", "q1", "q2", "q3", "q4");
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse;
|
||||
asm volatile(
|
||||
"vmov.u8 q8, #0 \n"
|
||||
"vmov.u8 q10, #0 \n"
|
||||
"vmov.u8 q9, #0 \n"
|
||||
"vmov.u8 q11, #0 \n"
|
||||
|
||||
"1: \n"
|
||||
"vld1.8 {q0}, [%0]! \n"
|
||||
"vld1.8 {q1}, [%1]! \n"
|
||||
"subs %2, %2, #16 \n"
|
||||
"vsubl.u8 q2, d0, d2 \n"
|
||||
"vsubl.u8 q3, d1, d3 \n"
|
||||
"vmlal.s16 q8, d4, d4 \n"
|
||||
"vmlal.s16 q9, d6, d6 \n"
|
||||
"vmlal.s16 q10, d5, d5 \n"
|
||||
"vmlal.s16 q11, d7, d7 \n"
|
||||
"bgt 1b \n"
|
||||
|
||||
"vadd.u32 q8, q8, q9 \n"
|
||||
"vadd.u32 q10, q10, q11 \n"
|
||||
"vadd.u32 q11, q8, q10 \n"
|
||||
"vpaddl.u32 q1, q11 \n"
|
||||
"vadd.u64 d0, d2, d3 \n"
|
||||
"vmov.32 %3, d0[0] \n"
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
|
||||
:
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
return sse;
|
||||
}
|
||||
|
||||
|
||||
@@ -20,39 +20,65 @@ extern "C" {
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
uint32 SumSquareError_NEON(const uint8* src_a, const uint8* src_b, int count) {
|
||||
volatile uint32 sse;
|
||||
asm volatile (
|
||||
"eor v16.16b, v16.16b, v16.16b \n"
|
||||
"eor v18.16b, v18.16b, v18.16b \n"
|
||||
"eor v17.16b, v17.16b, v17.16b \n"
|
||||
"eor v19.16b, v19.16b, v19.16b \n"
|
||||
// 256 bits at a time
|
||||
// uses short accumulator which restricts count to 131 KB
|
||||
uint32_t HammingDistance_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff;
|
||||
asm volatile(
|
||||
"movi v4.8h, #0 \n"
|
||||
|
||||
"1: \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b}, [%0], #16 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v1.16b}, [%1], #16 \n"
|
||||
"subs %w2, %w2, #16 \n"
|
||||
"usubl v2.8h, v0.8b, v1.8b \n"
|
||||
"usubl2 v3.8h, v0.16b, v1.16b \n"
|
||||
"smlal v16.4s, v2.4h, v2.4h \n"
|
||||
"smlal v17.4s, v3.4h, v3.4h \n"
|
||||
"smlal2 v18.4s, v2.8h, v2.8h \n"
|
||||
"smlal2 v19.4s, v3.8h, v3.8h \n"
|
||||
"b.gt 1b \n"
|
||||
"1: \n"
|
||||
"ld1 {v0.16b, v1.16b}, [%0], #32 \n"
|
||||
"ld1 {v2.16b, v3.16b}, [%1], #32 \n"
|
||||
"eor v0.16b, v0.16b, v2.16b \n"
|
||||
"eor v1.16b, v1.16b, v3.16b \n"
|
||||
"cnt v0.16b, v0.16b \n"
|
||||
"cnt v1.16b, v1.16b \n"
|
||||
"subs %w2, %w2, #32 \n"
|
||||
"add v0.16b, v0.16b, v1.16b \n"
|
||||
"uadalp v4.8h, v0.16b \n"
|
||||
"b.gt 1b \n"
|
||||
|
||||
"add v16.4s, v16.4s, v17.4s \n"
|
||||
"add v18.4s, v18.4s, v19.4s \n"
|
||||
"add v19.4s, v16.4s, v18.4s \n"
|
||||
"addv s0, v19.4s \n"
|
||||
"fmov %w3, s0 \n"
|
||||
: "+r"(src_a),
|
||||
"+r"(src_b),
|
||||
"+r"(count),
|
||||
"=r"(sse)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
|
||||
"uaddlv s4, v4.8h \n"
|
||||
"fmov %w3, s4 \n"
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v4");
|
||||
return diff;
|
||||
}
|
||||
|
||||
uint32_t SumSquareError_NEON(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t sse;
|
||||
asm volatile(
|
||||
"eor v16.16b, v16.16b, v16.16b \n"
|
||||
"eor v18.16b, v18.16b, v18.16b \n"
|
||||
"eor v17.16b, v17.16b, v17.16b \n"
|
||||
"eor v19.16b, v19.16b, v19.16b \n"
|
||||
|
||||
"1: \n"
|
||||
"ld1 {v0.16b}, [%0], #16 \n"
|
||||
"ld1 {v1.16b}, [%1], #16 \n"
|
||||
"subs %w2, %w2, #16 \n"
|
||||
"usubl v2.8h, v0.8b, v1.8b \n"
|
||||
"usubl2 v3.8h, v0.16b, v1.16b \n"
|
||||
"smlal v16.4s, v2.4h, v2.4h \n"
|
||||
"smlal v17.4s, v3.4h, v3.4h \n"
|
||||
"smlal2 v18.4s, v2.8h, v2.8h \n"
|
||||
"smlal2 v19.4s, v3.8h, v3.8h \n"
|
||||
"b.gt 1b \n"
|
||||
|
||||
"add v16.4s, v16.4s, v17.4s \n"
|
||||
"add v18.4s, v18.4s, v19.4s \n"
|
||||
"add v19.4s, v16.4s, v18.4s \n"
|
||||
"addv s0, v19.4s \n"
|
||||
"fmov %w3, s0 \n"
|
||||
: "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse)
|
||||
:
|
||||
: "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19");
|
||||
return sse;
|
||||
}
|
||||
|
||||
|
||||
@@ -13,20 +13,39 @@
|
||||
#include "libyuv/compare_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h> // For __popcnt
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for 32 bit Visual C x86 and clangcl
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
|
||||
__declspec(naked)
|
||||
uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
uint32_t HammingDistance_SSE42(const uint8_t* src_a,
|
||||
const uint8_t* src_b,
|
||||
int count) {
|
||||
uint32_t diff = 0u;
|
||||
|
||||
int i;
|
||||
for (i = 0; i < count - 3; i += 4) {
|
||||
uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT
|
||||
src_a += 4;
|
||||
src_b += 4;
|
||||
diff += __popcnt(x);
|
||||
}
|
||||
return diff;
|
||||
}
|
||||
|
||||
__declspec(naked) uint32_t
|
||||
SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
pxor xmm0, xmm0
|
||||
pxor xmm5, xmm5
|
||||
|
||||
@@ -61,13 +80,13 @@ uint32 SumSquareError_SSE2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if _MSC_VER >= 1700
|
||||
// C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX.
|
||||
#pragma warning(disable: 4752)
|
||||
__declspec(naked)
|
||||
uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
#pragma warning(disable : 4752)
|
||||
__declspec(naked) uint32_t
|
||||
SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
mov eax, [esp + 4] // src_a
|
||||
mov edx, [esp + 8] // src_b
|
||||
mov ecx, [esp + 12] // count
|
||||
vpxor ymm0, ymm0, ymm0 // sum
|
||||
vpxor ymm5, ymm5, ymm5 // constant 0 for unpck
|
||||
sub edx, eax
|
||||
@@ -101,65 +120,65 @@ uint32 SumSquareError_AVX2(const uint8* src_a, const uint8* src_b, int count) {
|
||||
}
|
||||
#endif // _MSC_VER >= 1700
|
||||
|
||||
uvec32 kHash16x33 = { 0x92d9e201, 0, 0, 0 }; // 33 ^ 16
|
||||
uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16
|
||||
uvec32 kHashMul0 = {
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
0x0c3525e1, // 33 ^ 15
|
||||
0xa3476dc1, // 33 ^ 14
|
||||
0x3b4039a1, // 33 ^ 13
|
||||
0x4f5f0981, // 33 ^ 12
|
||||
};
|
||||
uvec32 kHashMul1 = {
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
0x30f35d61, // 33 ^ 11
|
||||
0x855cb541, // 33 ^ 10
|
||||
0x040a9121, // 33 ^ 9
|
||||
0x747c7101, // 33 ^ 8
|
||||
};
|
||||
uvec32 kHashMul2 = {
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
0xec41d4e1, // 33 ^ 7
|
||||
0x4cfa3cc1, // 33 ^ 6
|
||||
0x025528a1, // 33 ^ 5
|
||||
0x00121881, // 33 ^ 4
|
||||
};
|
||||
uvec32 kHashMul3 = {
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
0x00008c61, // 33 ^ 3
|
||||
0x00000441, // 33 ^ 2
|
||||
0x00000021, // 33 ^ 1
|
||||
0x00000001, // 33 ^ 0
|
||||
};
|
||||
|
||||
__declspec(naked)
|
||||
uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
__declspec(naked) uint32_t
|
||||
HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
movd xmm0, [esp + 12] // seed
|
||||
|
||||
pxor xmm7, xmm7 // constant 0 for unpck
|
||||
pxor xmm7, xmm7 // constant 0 for unpck
|
||||
movdqa xmm6, xmmword ptr kHash16x33
|
||||
|
||||
wloop:
|
||||
movdqu xmm1, [eax] // src[0-15]
|
||||
movdqu xmm1, [eax] // src[0-15]
|
||||
lea eax, [eax + 16]
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
pmulld xmm0, xmm6 // hash *= 33 ^ 16
|
||||
movdqa xmm5, xmmword ptr kHashMul0
|
||||
movdqa xmm2, xmm1
|
||||
punpcklbw xmm2, xmm7 // src[0-7]
|
||||
punpcklbw xmm2, xmm7 // src[0-7]
|
||||
movdqa xmm3, xmm2
|
||||
punpcklwd xmm3, xmm7 // src[0-3]
|
||||
punpcklwd xmm3, xmm7 // src[0-3]
|
||||
pmulld xmm3, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul1
|
||||
movdqa xmm4, xmm2
|
||||
punpckhwd xmm4, xmm7 // src[4-7]
|
||||
punpckhwd xmm4, xmm7 // src[4-7]
|
||||
pmulld xmm4, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul2
|
||||
punpckhbw xmm1, xmm7 // src[8-15]
|
||||
punpckhbw xmm1, xmm7 // src[8-15]
|
||||
movdqa xmm2, xmm1
|
||||
punpcklwd xmm2, xmm7 // src[8-11]
|
||||
punpcklwd xmm2, xmm7 // src[8-11]
|
||||
pmulld xmm2, xmm5
|
||||
movdqa xmm5, xmmword ptr kHashMul3
|
||||
punpckhwd xmm1, xmm7 // src[12-15]
|
||||
punpckhwd xmm1, xmm7 // src[12-15]
|
||||
pmulld xmm1, xmm5
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm3, xmm4 // add 16 results
|
||||
paddd xmm1, xmm2
|
||||
paddd xmm1, xmm3
|
||||
|
||||
@@ -171,18 +190,18 @@ uint32 HashDjb2_SSE41(const uint8* src, int count, uint32 seed) {
|
||||
sub ecx, 16
|
||||
jg wloop
|
||||
|
||||
movd eax, xmm0 // return hash
|
||||
movd eax, xmm0 // return hash
|
||||
ret
|
||||
}
|
||||
}
|
||||
|
||||
// Visual C 2012 required for AVX2.
|
||||
#if _MSC_VER >= 1700
|
||||
__declspec(naked)
|
||||
uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
__declspec(naked) uint32_t
|
||||
HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) {
|
||||
__asm {
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
mov eax, [esp + 4] // src
|
||||
mov ecx, [esp + 8] // count
|
||||
vmovd xmm0, [esp + 12] // seed
|
||||
|
||||
wloop:
|
||||
@@ -196,7 +215,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
vpmulld xmm2, xmm2, xmmword ptr kHashMul2
|
||||
lea eax, [eax + 16]
|
||||
vpmulld xmm1, xmm1, xmmword ptr kHashMul3
|
||||
vpaddd xmm3, xmm3, xmm4 // add 16 results
|
||||
vpaddd xmm3, xmm3, xmm4 // add 16 results
|
||||
vpaddd xmm1, xmm1, xmm2
|
||||
vpaddd xmm1, xmm1, xmm3
|
||||
vpshufd xmm2, xmm1, 0x0e // upper 2 dwords
|
||||
@@ -207,7 +226,7 @@ uint32 HashDjb2_AVX2(const uint8* src, int count, uint32 seed) {
|
||||
sub ecx, 16
|
||||
jg wloop
|
||||
|
||||
vmovd eax, xmm0 // return hash
|
||||
vmovd eax, xmm0 // return hash
|
||||
vzeroupper
|
||||
ret
|
||||
}
|
||||
|
||||
+1070
-326
File diff suppressed because it is too large
Load Diff
+1367
-501
File diff suppressed because it is too large
Load Diff
+819
-430
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
+241
-166
@@ -22,28 +22,24 @@ extern "C" {
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
struct I420Buffers {
|
||||
uint8* y;
|
||||
uint8_t* y;
|
||||
int y_stride;
|
||||
uint8* u;
|
||||
uint8_t* u;
|
||||
int u_stride;
|
||||
uint8* v;
|
||||
uint8_t* v;
|
||||
int v_stride;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
static void JpegCopyI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I420Copy(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
I420Copy(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
@@ -51,17 +47,13 @@ static void JpegCopyI420(void* opaque,
|
||||
}
|
||||
|
||||
static void JpegI422ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I422ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
I422ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
@@ -69,35 +61,13 @@ static void JpegI422ToI420(void* opaque,
|
||||
}
|
||||
|
||||
static void JpegI444ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I444ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI411ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I411ToI420(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
I444ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v,
|
||||
dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
@@ -105,15 +75,12 @@ static void JpegI411ToI420(void* opaque,
|
||||
}
|
||||
|
||||
static void JpegI400ToI420(void* opaque,
|
||||
const uint8* const* data,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
I420Buffers* dest = (I420Buffers*)(opaque);
|
||||
I400ToI420(data[0], strides[0],
|
||||
dest->y, dest->y_stride,
|
||||
dest->u, dest->u_stride,
|
||||
dest->v, dest->v_stride,
|
||||
dest->w, rows);
|
||||
I400ToI420(data[0], strides[0], dest->y, dest->y_stride, dest->u,
|
||||
dest->u_stride, dest->v, dest->v_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->u += ((rows + 1) >> 1) * dest->u_stride;
|
||||
dest->v += ((rows + 1) >> 1) * dest->v_stride;
|
||||
@@ -122,10 +89,12 @@ static void JpegI400ToI420(void* opaque,
|
||||
|
||||
// Query size of MJPG in pixels.
|
||||
LIBYUV_API
|
||||
int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
int* width, int* height) {
|
||||
int MJPGSize(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
int* width,
|
||||
int* height) {
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
|
||||
if (ret) {
|
||||
*width = mjpeg_decoder.GetWidth();
|
||||
*height = mjpeg_decoder.GetHeight();
|
||||
@@ -135,34 +104,40 @@ int MJPGSize(const uint8* sample, size_t sample_size,
|
||||
}
|
||||
|
||||
// MJPG (Motion JPeg) to I420
|
||||
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
|
||||
// TODO(fbarchard): review src_width and src_height requirement. dst_width and
|
||||
// dst_height may be enough.
|
||||
LIBYUV_API
|
||||
int MJPGToI420(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* y, int y_stride,
|
||||
uint8* u, int u_stride,
|
||||
uint8* v, int v_stride,
|
||||
int w, int h,
|
||||
int dw, int dh) {
|
||||
if (sample_size == kUnknownDataSize) {
|
||||
int MJPGToI420(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (src_size_mjpg == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != w ||
|
||||
mjpeg_decoder.GetHeight() != h)) {
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
|
||||
mjpeg_decoder.GetHeight() != src_height)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
I420Buffers bufs = { y, y_stride, u, u_stride, v, v_stride, dw, dh };
|
||||
I420Buffers bufs = {dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, dst_width, dst_height};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
@@ -170,8 +145,9 @@ int MJPGToI420(const uint8* sample,
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dw, dh);
|
||||
// YUV422
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
@@ -181,8 +157,9 @@ int MJPGToI420(const uint8* sample,
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dw, dh);
|
||||
// YUV444
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
@@ -192,29 +169,158 @@ int MJPGToI420(const uint8* sample,
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dw, dh);
|
||||
// YUV411
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToI420, &bufs, dw, dh);
|
||||
// YUV400
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dw, dh);
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dst_width,
|
||||
dst_height);
|
||||
} else {
|
||||
// TODO(fbarchard): Implement conversion for any other colorspace/sample
|
||||
// factors that occur in practice. 411 is supported by libjpeg
|
||||
// ERROR: Unable to convert MJPEG frame because format is not supported
|
||||
// TODO(fbarchard): Implement conversion for any other
|
||||
// colorspace/subsample factors that occur in practice. ERROR: Unable to
|
||||
// convert MJPEG frame because format is not supported
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
|
||||
struct NV21Buffers {
|
||||
uint8_t* y;
|
||||
int y_stride;
|
||||
uint8_t* vu;
|
||||
int vu_stride;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
static void JpegI420ToNV21(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
I420ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI422ToNV21(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
I422ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI444ToNV21(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
I444ToNV21(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->y, dest->y_stride, dest->vu, dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToNV21(void* opaque,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
NV21Buffers* dest = (NV21Buffers*)(opaque);
|
||||
I400ToNV21(data[0], strides[0], dest->y, dest->y_stride, dest->vu,
|
||||
dest->vu_stride, dest->w, rows);
|
||||
dest->y += rows * dest->y_stride;
|
||||
dest->vu += ((rows + 1) >> 1) * dest->vu_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
// MJPG (Motion JPeg) to NV21
|
||||
LIBYUV_API
|
||||
int MJPGToNV21(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_vu,
|
||||
int dst_stride_vu,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (src_size_mjpg == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
|
||||
mjpeg_decoder.GetHeight() != src_height)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
NV21Buffers bufs = {dst_y, dst_stride_y, dst_vu,
|
||||
dst_stride_vu, dst_width, dst_height};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToNV21, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToNV21, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToNV21, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToNV21, &bufs, dst_width,
|
||||
dst_height);
|
||||
} else {
|
||||
// Unknown colorspace.
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
@@ -222,109 +328,86 @@ int MJPGToI420(const uint8* sample,
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
|
||||
#ifdef HAVE_JPEG
|
||||
struct ARGBBuffers {
|
||||
uint8* argb;
|
||||
uint8_t* argb;
|
||||
int argb_stride;
|
||||
int w;
|
||||
int h;
|
||||
};
|
||||
|
||||
static void JpegI420ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I420ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
I420ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI422ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I422ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
I422ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI444ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I444ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI411ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I411ToARGB(data[0], strides[0],
|
||||
data[1], strides[1],
|
||||
data[2], strides[2],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
I444ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2],
|
||||
dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
static void JpegI400ToARGB(void* opaque,
|
||||
const uint8* const* data,
|
||||
const uint8_t* const* data,
|
||||
const int* strides,
|
||||
int rows) {
|
||||
ARGBBuffers* dest = (ARGBBuffers*)(opaque);
|
||||
I400ToARGB(data[0], strides[0],
|
||||
dest->argb, dest->argb_stride,
|
||||
dest->w, rows);
|
||||
I400ToARGB(data[0], strides[0], dest->argb, dest->argb_stride, dest->w, rows);
|
||||
dest->argb += rows * dest->argb_stride;
|
||||
dest->h -= rows;
|
||||
}
|
||||
|
||||
// MJPG (Motion JPeg) to ARGB
|
||||
// TODO(fbarchard): review w and h requirement. dw and dh may be enough.
|
||||
// TODO(fbarchard): review src_width and src_height requirement. dst_width and
|
||||
// dst_height may be enough.
|
||||
LIBYUV_API
|
||||
int MJPGToARGB(const uint8* sample,
|
||||
size_t sample_size,
|
||||
uint8* argb, int argb_stride,
|
||||
int w, int h,
|
||||
int dw, int dh) {
|
||||
if (sample_size == kUnknownDataSize) {
|
||||
int MJPGToARGB(const uint8_t* src_mjpg,
|
||||
size_t src_size_mjpg,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (src_size_mjpg == kUnknownDataSize) {
|
||||
// ERROR: MJPEG frame size unknown
|
||||
return -1;
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Port MJpeg to C.
|
||||
MJpegDecoder mjpeg_decoder;
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != w ||
|
||||
mjpeg_decoder.GetHeight() != h)) {
|
||||
LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(src_mjpg, src_size_mjpg);
|
||||
if (ret && (mjpeg_decoder.GetWidth() != src_width ||
|
||||
mjpeg_decoder.GetHeight() != src_height)) {
|
||||
// ERROR: MJPEG frame has unexpected dimensions
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1; // runtime failure
|
||||
}
|
||||
if (ret) {
|
||||
ARGBBuffers bufs = { argb, argb_stride, dw, dh };
|
||||
ARGBBuffers bufs = {dst_argb, dst_stride_argb, dst_width, dst_height};
|
||||
// YUV420
|
||||
if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 2 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 2 &&
|
||||
@@ -332,8 +415,9 @@ int MJPGToARGB(const uint8* sample,
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dw, dh);
|
||||
// YUV422
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV422
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
@@ -343,8 +427,9 @@ int MJPGToARGB(const uint8* sample,
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dw, dh);
|
||||
// YUV444
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV444
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
@@ -354,38 +439,28 @@ int MJPGToARGB(const uint8* sample,
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dw, dh);
|
||||
// YUV411
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceYCbCr &&
|
||||
mjpeg_decoder.GetNumComponents() == 3 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 4 &&
|
||||
mjpeg_decoder.GetVertSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(1) == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(2) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(2) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI411ToARGB, &bufs, dw, dh);
|
||||
// YUV400
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dst_width,
|
||||
dst_height);
|
||||
// YUV400
|
||||
} else if (mjpeg_decoder.GetColorSpace() ==
|
||||
MJpegDecoder::kColorSpaceGrayscale &&
|
||||
mjpeg_decoder.GetNumComponents() == 1 &&
|
||||
mjpeg_decoder.GetVertSampFactor(0) == 1 &&
|
||||
mjpeg_decoder.GetHorizSampFactor(0) == 1) {
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dw, dh);
|
||||
ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dst_width,
|
||||
dst_height);
|
||||
} else {
|
||||
// TODO(fbarchard): Implement conversion for any other colorspace/sample
|
||||
// factors that occur in practice. 411 is supported by libjpeg
|
||||
// ERROR: Unable to convert MJPEG frame because format is not supported
|
||||
// TODO(fbarchard): Implement conversion for any other
|
||||
// colorspace/subsample factors that occur in practice. ERROR: Unable to
|
||||
// convert MJPEG frame because format is not supported
|
||||
mjpeg_decoder.UnloadFrame();
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
return ret ? 0 : 1;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
#endif // HAVE_JPEG
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
|
||||
@@ -28,36 +28,48 @@ extern "C" {
|
||||
// src_height is used to compute location of planes, and indicate inversion
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
// With MJPEG it is the compressed size of the frame.
|
||||
|
||||
// TODO(fbarchard): Add the following:
|
||||
// H010ToARGB
|
||||
// I010ToARGB
|
||||
// J400ToARGB
|
||||
// J422ToARGB
|
||||
// J444ToARGB
|
||||
|
||||
LIBYUV_API
|
||||
int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
uint8* crop_argb, int argb_stride,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
int ConvertToARGB(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 fourcc) {
|
||||
uint32 format = CanonicalFourCC(fourcc);
|
||||
uint32_t fourcc) {
|
||||
uint32_t format = CanonicalFourCC(fourcc);
|
||||
int aligned_src_width = (src_width + 1) & ~1;
|
||||
const uint8* src;
|
||||
const uint8* src_uv;
|
||||
const uint8_t* src;
|
||||
const uint8_t* src_uv;
|
||||
int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||
int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
|
||||
// One pass rotation is available for some formats. For the rest, convert
|
||||
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
|
||||
// and then rotate the I420 to the final destination buffer.
|
||||
// For in-place conversion, if destination crop_argb is same as source sample,
|
||||
// to ARGB (with optional vertical flipping) into a temporary ARGB buffer,
|
||||
// and then rotate the ARGB to the final destination buffer.
|
||||
// For in-place conversion, if destination dst_argb is same as source sample,
|
||||
// also enable temporary buffer.
|
||||
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) ||
|
||||
crop_argb == sample;
|
||||
uint8* dest_argb = crop_argb;
|
||||
int dest_argb_stride = argb_stride;
|
||||
uint8* rotate_buffer = NULL;
|
||||
LIBYUV_BOOL need_buf =
|
||||
(rotation && format != FOURCC_ARGB) || dst_argb == sample;
|
||||
uint8_t* dest_argb = dst_argb;
|
||||
int dest_dst_stride_argb = dst_stride_argb;
|
||||
uint8_t* rotate_buffer = NULL;
|
||||
int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
|
||||
if (crop_argb == NULL || sample == NULL ||
|
||||
src_width <= 0 || crop_width <= 0 ||
|
||||
if (dst_argb == NULL || sample == NULL || src_width <= 0 || crop_width <= 0 ||
|
||||
src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
@@ -67,187 +79,200 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
|
||||
if (need_buf) {
|
||||
int argb_size = crop_width * 4 * abs_crop_height;
|
||||
rotate_buffer = (uint8*)malloc(argb_size);
|
||||
rotate_buffer = (uint8_t*)malloc(argb_size); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
crop_argb = rotate_buffer;
|
||||
argb_stride = crop_width * 4;
|
||||
dst_argb = rotate_buffer;
|
||||
dst_stride_argb = crop_width * 4;
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = YUY2ToARGB(src, aligned_src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = UYVYToARGB(src, aligned_src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RGB24ToARGB(src, src_width * 3,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RAWToARGB(src, src_width * 3,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
if (!need_buf && !rotation) {
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
}
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = BGRAToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = RGBAToARGB(src, src_width * 4,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_AR30:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = AR30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_AB30:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = AB30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB1555ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB4444ToARGB(src, src_width * 2,
|
||||
crop_argb, argb_stride,
|
||||
r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_I400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = I400ToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
|
||||
// Biplanar formats
|
||||
case FOURCC_NV12:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
|
||||
r = NV12ToARGB(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
src_uv =
|
||||
sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x;
|
||||
r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
|
||||
dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_NV21:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + aligned_src_width * (src_height + crop_y / 2) + crop_x;
|
||||
src_uv =
|
||||
sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x;
|
||||
// Call NV12 but with u and v parameters swapped.
|
||||
r = NV21ToARGB(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb,
|
||||
dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
case FOURCC_M420:
|
||||
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
|
||||
r = M420ToARGB(src, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = M420ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YV12: {
|
||||
const uint8* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
if (format == FOURCC_YV12) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
}
|
||||
r = I420ToARGB(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_H420: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
r = H420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_J420: {
|
||||
const uint8* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
r = J420ToARGB(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_I422:
|
||||
case FOURCC_YV16: {
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
if (format == FOURCC_YV16) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
}
|
||||
r = I422ToARGB(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_H422: {
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u =
|
||||
sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2;
|
||||
const uint8_t* src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
r = H422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
|
||||
case FOURCC_I444:
|
||||
case FOURCC_YV24: {
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
if (format == FOURCC_YV24) {
|
||||
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
@@ -255,32 +280,14 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
}
|
||||
r = I444ToARGB(src_y, src_width,
|
||||
src_u, src_width,
|
||||
src_v, src_width,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I411: {
|
||||
int quarterwidth = (src_width + 3) / 4;
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u = sample + src_width * abs_src_height +
|
||||
quarterwidth * crop_y + crop_x / 4;
|
||||
const uint8* src_v = sample + src_width * abs_src_height +
|
||||
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
|
||||
r = I411ToARGB(src_y, src_width,
|
||||
src_u, quarterwidth,
|
||||
src_v, quarterwidth,
|
||||
crop_argb, argb_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width,
|
||||
dst_argb, dst_stride_argb, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToARGB(sample, sample_size,
|
||||
crop_argb, argb_stride,
|
||||
src_width, abs_src_height, crop_width, inv_crop_height);
|
||||
r = MJPGToARGB(sample, sample_size, dst_argb, dst_stride_argb, src_width,
|
||||
abs_src_height, crop_width, inv_crop_height);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@@ -289,11 +296,14 @@ int ConvertToARGB(const uint8* sample, size_t sample_size,
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = ARGBRotate(crop_argb, argb_stride,
|
||||
dest_argb, dest_argb_stride,
|
||||
r = ARGBRotate(dst_argb, dst_stride_argb, dest_argb, dest_dst_stride_argb,
|
||||
crop_width, abs_crop_height, rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
} else if (rotation) {
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width,
|
||||
inv_crop_height, rotation);
|
||||
}
|
||||
|
||||
return r;
|
||||
|
||||
@@ -25,251 +25,216 @@ extern "C" {
|
||||
// sample_size is measured in bytes and is the size of the frame.
|
||||
// With MJPEG it is the compressed size of the frame.
|
||||
LIBYUV_API
|
||||
int ConvertToI420(const uint8* sample,
|
||||
int ConvertToI420(const uint8_t* sample,
|
||||
size_t sample_size,
|
||||
uint8* y, int y_stride,
|
||||
uint8* u, int u_stride,
|
||||
uint8* v, int v_stride,
|
||||
int crop_x, int crop_y,
|
||||
int src_width, int src_height,
|
||||
int crop_width, int crop_height,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int crop_x,
|
||||
int crop_y,
|
||||
int src_width,
|
||||
int src_height,
|
||||
int crop_width,
|
||||
int crop_height,
|
||||
enum RotationMode rotation,
|
||||
uint32 fourcc) {
|
||||
uint32 format = CanonicalFourCC(fourcc);
|
||||
uint32_t fourcc) {
|
||||
uint32_t format = CanonicalFourCC(fourcc);
|
||||
int aligned_src_width = (src_width + 1) & ~1;
|
||||
const uint8* src;
|
||||
const uint8* src_uv;
|
||||
const uint8_t* src;
|
||||
const uint8_t* src_uv;
|
||||
const int abs_src_height = (src_height < 0) ? -src_height : src_height;
|
||||
// TODO(nisse): Why allow crop_height < 0?
|
||||
const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height;
|
||||
int r = 0;
|
||||
LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 &&
|
||||
format != FOURCC_NV12 && format != FOURCC_NV21 &&
|
||||
format != FOURCC_YV12) || y == sample;
|
||||
uint8* tmp_y = y;
|
||||
uint8* tmp_u = u;
|
||||
uint8* tmp_v = v;
|
||||
int tmp_y_stride = y_stride;
|
||||
int tmp_u_stride = u_stride;
|
||||
int tmp_v_stride = v_stride;
|
||||
uint8* rotate_buffer = NULL;
|
||||
LIBYUV_BOOL need_buf =
|
||||
(rotation && format != FOURCC_I420 && format != FOURCC_NV12 &&
|
||||
format != FOURCC_NV21 && format != FOURCC_YV12) ||
|
||||
dst_y == sample;
|
||||
uint8_t* tmp_y = dst_y;
|
||||
uint8_t* tmp_u = dst_u;
|
||||
uint8_t* tmp_v = dst_v;
|
||||
int tmp_y_stride = dst_stride_y;
|
||||
int tmp_u_stride = dst_stride_u;
|
||||
int tmp_v_stride = dst_stride_v;
|
||||
uint8_t* rotate_buffer = NULL;
|
||||
const int inv_crop_height =
|
||||
(src_height < 0) ? -abs_crop_height : abs_crop_height;
|
||||
|
||||
if (!y || !u || !v || !sample ||
|
||||
src_width <= 0 || crop_width <= 0 ||
|
||||
src_height == 0 || crop_height == 0) {
|
||||
if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 ||
|
||||
crop_width <= 0 || src_height == 0 || crop_height == 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// One pass rotation is available for some formats. For the rest, convert
|
||||
// to I420 (with optional vertical flipping) into a temporary I420 buffer,
|
||||
// and then rotate the I420 to the final destination buffer.
|
||||
// For in-place conversion, if destination y is same as source sample,
|
||||
// For in-place conversion, if destination dst_y is same as source sample,
|
||||
// also enable temporary buffer.
|
||||
if (need_buf) {
|
||||
int y_size = crop_width * abs_crop_height;
|
||||
int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2);
|
||||
rotate_buffer = (uint8*)malloc(y_size + uv_size * 2);
|
||||
rotate_buffer = (uint8_t*)malloc(y_size + uv_size * 2); /* NOLINT */
|
||||
if (!rotate_buffer) {
|
||||
return 1; // Out of memory runtime error.
|
||||
}
|
||||
y = rotate_buffer;
|
||||
u = y + y_size;
|
||||
v = u + uv_size;
|
||||
y_stride = crop_width;
|
||||
u_stride = v_stride = ((crop_width + 1) / 2);
|
||||
dst_y = rotate_buffer;
|
||||
dst_u = dst_y + y_size;
|
||||
dst_v = dst_u + uv_size;
|
||||
dst_stride_y = crop_width;
|
||||
dst_stride_u = dst_stride_v = ((crop_width + 1) / 2);
|
||||
}
|
||||
|
||||
switch (format) {
|
||||
// Single plane formats
|
||||
case FOURCC_YUY2:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = YUY2ToI420(src, aligned_src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_UYVY:
|
||||
src = sample + (aligned_src_width * crop_y + crop_x) * 2;
|
||||
r = UYVYToI420(src, aligned_src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBP:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = RGB565ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBO:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB1555ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_R444:
|
||||
src = sample + (src_width * crop_y + crop_x) * 2;
|
||||
r = ARGB4444ToI420(src, src_width * 2,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_24BG:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RGB24ToI420(src, src_width * 3,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RAW:
|
||||
src = sample + (src_width * crop_y + crop_x) * 3;
|
||||
r = RAWToI420(src, src_width * 3,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ARGB:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ARGBToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_BGRA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = BGRAToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_ABGR:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = ABGRToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
case FOURCC_RGBA:
|
||||
src = sample + (src_width * crop_y + crop_x) * 4;
|
||||
r = RGBAToI420(src, src_width * 4,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, crop_width,
|
||||
inv_crop_height);
|
||||
break;
|
||||
// TODO(fbarchard): Add AR30 and AB30
|
||||
case FOURCC_I400:
|
||||
src = sample + src_width * crop_y + crop_x;
|
||||
r = I400ToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
// Biplanar formats
|
||||
case FOURCC_NV12:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + (src_width * src_height) +
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
r = NV12ToI420Rotate(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
src_uv = sample + (src_width * abs_src_height) +
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
|
||||
dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
case FOURCC_NV21:
|
||||
src = sample + (src_width * crop_y + crop_x);
|
||||
src_uv = sample + (src_width * src_height) +
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
// Call NV12 but with u and v parameters swapped.
|
||||
r = NV12ToI420Rotate(src, src_width,
|
||||
src_uv, aligned_src_width,
|
||||
y, y_stride,
|
||||
v, v_stride,
|
||||
u, u_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
src_uv = sample + (src_width * abs_src_height) +
|
||||
((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2);
|
||||
// Call NV12 but with dst_u and dst_v parameters swapped.
|
||||
r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y,
|
||||
dst_stride_y, dst_v, dst_stride_v, dst_u,
|
||||
dst_stride_u, crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
case FOURCC_M420:
|
||||
src = sample + (src_width * crop_y) * 12 / 8 + crop_x;
|
||||
r = M420ToI420(src, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = M420ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
// Triplanar formats
|
||||
case FOURCC_I420:
|
||||
case FOURCC_YV12: {
|
||||
const uint8* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
const uint8_t* src_y = sample + (src_width * crop_y + crop_x);
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
int halfheight = (abs_src_height + 1) / 2;
|
||||
if (format == FOURCC_YV12) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_v = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) +
|
||||
(crop_x / 2);
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2);
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
(halfwidth * crop_y + crop_x) / 2;
|
||||
src_u = sample + src_width * abs_src_height + halfwidth * (crop_y / 2) +
|
||||
(crop_x / 2);
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (halfheight + crop_y / 2) + crop_x / 2;
|
||||
halfwidth * (halfheight + (crop_y / 2)) + (crop_x / 2);
|
||||
}
|
||||
r = I420Rotate(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height, rotation);
|
||||
r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, crop_width, inv_crop_height, rotation);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I422:
|
||||
case FOURCC_YV16: {
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
int halfwidth = (src_width + 1) / 2;
|
||||
if (format == FOURCC_YV16) {
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_v = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
(crop_x / 2);
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
halfwidth * (abs_src_height + crop_y) + (crop_x / 2);
|
||||
} else {
|
||||
src_u = sample + src_width * abs_src_height +
|
||||
halfwidth * crop_y + crop_x / 2;
|
||||
src_u = sample + src_width * abs_src_height + halfwidth * crop_y +
|
||||
(crop_x / 2);
|
||||
src_v = sample + src_width * abs_src_height +
|
||||
halfwidth * (abs_src_height + crop_y) + crop_x / 2;
|
||||
halfwidth * (abs_src_height + crop_y) + (crop_x / 2);
|
||||
}
|
||||
r = I422ToI420(src_y, src_width,
|
||||
src_u, halfwidth,
|
||||
src_v, halfwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth,
|
||||
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I444:
|
||||
case FOURCC_YV24: {
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u;
|
||||
const uint8* src_v;
|
||||
const uint8_t* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8_t* src_u;
|
||||
const uint8_t* src_v;
|
||||
if (format == FOURCC_YV24) {
|
||||
src_v = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
@@ -277,38 +242,16 @@ int ConvertToI420(const uint8* sample,
|
||||
src_u = sample + src_width * (abs_src_height + crop_y) + crop_x;
|
||||
src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x;
|
||||
}
|
||||
r = I444ToI420(src_y, src_width,
|
||||
src_u, src_width,
|
||||
src_v, src_width,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
case FOURCC_I411: {
|
||||
int quarterwidth = (src_width + 3) / 4;
|
||||
const uint8* src_y = sample + src_width * crop_y + crop_x;
|
||||
const uint8* src_u = sample + src_width * abs_src_height +
|
||||
quarterwidth * crop_y + crop_x / 4;
|
||||
const uint8* src_v = sample + src_width * abs_src_height +
|
||||
quarterwidth * (abs_src_height + crop_y) + crop_x / 4;
|
||||
r = I411ToI420(src_y, src_width,
|
||||
src_u, quarterwidth,
|
||||
src_v, quarterwidth,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
crop_width, inv_crop_height);
|
||||
r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width,
|
||||
dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, crop_width, inv_crop_height);
|
||||
break;
|
||||
}
|
||||
#ifdef HAVE_JPEG
|
||||
case FOURCC_MJPG:
|
||||
r = MJPGToI420(sample, sample_size,
|
||||
y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
src_width, abs_src_height, crop_width, inv_crop_height);
|
||||
r = MJPGToI420(sample, sample_size, dst_y, dst_stride_y, dst_u,
|
||||
dst_stride_u, dst_v, dst_stride_v, src_width,
|
||||
abs_src_height, crop_width, inv_crop_height);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
@@ -317,13 +260,10 @@ int ConvertToI420(const uint8* sample,
|
||||
|
||||
if (need_buf) {
|
||||
if (!r) {
|
||||
r = I420Rotate(y, y_stride,
|
||||
u, u_stride,
|
||||
v, v_stride,
|
||||
tmp_y, tmp_y_stride,
|
||||
tmp_u, tmp_u_stride,
|
||||
tmp_v, tmp_v_stride,
|
||||
crop_width, abs_crop_height, rotation);
|
||||
r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride,
|
||||
tmp_v, tmp_v_stride, crop_width, abs_crop_height,
|
||||
rotation);
|
||||
}
|
||||
free(rotate_buffer);
|
||||
}
|
||||
|
||||
+84
-132
@@ -13,22 +13,16 @@
|
||||
#if defined(_MSC_VER)
|
||||
#include <intrin.h> // For __cpuidex()
|
||||
#endif
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
!defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \
|
||||
defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
#include <immintrin.h> // For _xgetbv()
|
||||
#endif
|
||||
|
||||
#if !defined(__native_client__)
|
||||
#include <stdlib.h> // For getenv()
|
||||
#endif
|
||||
|
||||
// For ArmCpuCaps() but unittested on all platforms
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "libyuv/basic_types.h" // For CPU_X86
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
@@ -43,16 +37,20 @@ extern "C" {
|
||||
#define SAFEBUFFERS
|
||||
#endif
|
||||
|
||||
// cpu_info_ variable for SIMD instruction sets detected.
|
||||
LIBYUV_API int cpu_info_ = 0;
|
||||
|
||||
// TODO(fbarchard): Consider using int for cpuid so casting is not needed.
|
||||
// Low level cpuid for X86.
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(__x86_64__)) && \
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER)
|
||||
LIBYUV_API
|
||||
void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
void CpuId(int info_eax, int info_ecx, int* cpu_info) {
|
||||
#if defined(_MSC_VER)
|
||||
// Visual C version uses intrinsic or inline x86 assembly.
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
__cpuidex((int*)(cpu_info), info_eax, info_ecx);
|
||||
__cpuidex(cpu_info, info_eax, info_ecx);
|
||||
#elif defined(_M_IX86)
|
||||
__asm {
|
||||
mov eax, info_eax
|
||||
@@ -66,26 +64,26 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
}
|
||||
#else // Visual C but not x86
|
||||
if (info_ecx == 0) {
|
||||
__cpuid((int*)(cpu_info), info_eax);
|
||||
__cpuid(cpu_info, info_eax);
|
||||
} else {
|
||||
cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0;
|
||||
cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u;
|
||||
}
|
||||
#endif
|
||||
// GCC version uses inline x86 assembly.
|
||||
#else // defined(_MSC_VER)
|
||||
uint32 info_ebx, info_edx;
|
||||
asm volatile (
|
||||
#if defined( __i386__) && defined(__PIC__)
|
||||
// Preserve ebx for fpic 32 bit.
|
||||
"mov %%ebx, %%edi \n"
|
||||
"cpuid \n"
|
||||
"xchg %%edi, %%ebx \n"
|
||||
: "=D" (info_ebx),
|
||||
int info_ebx, info_edx;
|
||||
asm volatile(
|
||||
#if defined(__i386__) && defined(__PIC__)
|
||||
// Preserve ebx for fpic 32 bit.
|
||||
"mov %%ebx, %%edi \n"
|
||||
"cpuid \n"
|
||||
"xchg %%edi, %%ebx \n"
|
||||
: "=D"(info_ebx),
|
||||
#else
|
||||
"cpuid \n"
|
||||
: "=b" (info_ebx),
|
||||
"cpuid \n"
|
||||
: "=b"(info_ebx),
|
||||
#endif // defined( __i386__) && defined(__PIC__)
|
||||
"+a" (info_eax), "+c" (info_ecx), "=d" (info_edx));
|
||||
"+a"(info_eax), "+c"(info_ecx), "=d"(info_edx));
|
||||
cpu_info[0] = info_eax;
|
||||
cpu_info[1] = info_ebx;
|
||||
cpu_info[2] = info_ecx;
|
||||
@@ -94,7 +92,9 @@ void CpuId(uint32 info_eax, uint32 info_ecx, uint32* cpu_info) {
|
||||
}
|
||||
#else // (defined(_M_IX86) || defined(_M_X64) ...
|
||||
LIBYUV_API
|
||||
void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
|
||||
void CpuId(int eax, int ecx, int* cpu_info) {
|
||||
(void)eax;
|
||||
(void)ecx;
|
||||
cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0;
|
||||
}
|
||||
#endif
|
||||
@@ -111,20 +111,22 @@ void CpuId(uint32 eax, uint32 ecx, uint32* cpu_info) {
|
||||
#if defined(_M_IX86) && (_MSC_VER < 1900)
|
||||
#pragma optimize("g", off)
|
||||
#endif
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || \
|
||||
defined(__i386__) || defined(__x86_64__)) && \
|
||||
#if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(__x86_64__)) && \
|
||||
!defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__)
|
||||
#define HAS_XGETBV
|
||||
// X86 CPUs have xgetbv to detect OS saves high parts of ymm registers.
|
||||
int GetXCR0() {
|
||||
uint32 xcr0 = 0u;
|
||||
int xcr0 = 0;
|
||||
#if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219)
|
||||
xcr0 = (uint32)(_xgetbv(0)); // VS2010 SP1 required.
|
||||
xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT
|
||||
#elif defined(__i386__) || defined(__x86_64__)
|
||||
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcr0) : "c" (0) : "%edx");
|
||||
asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx");
|
||||
#endif // defined(__i386__) || defined(__x86_64__)
|
||||
return xcr0;
|
||||
}
|
||||
#else
|
||||
// xgetbv unavailable to query for OSSave support. Return 0.
|
||||
#define GetXCR0() 0
|
||||
#endif // defined(_M_IX86) || defined(_M_X64) ..
|
||||
// Return optimization to previous setting.
|
||||
#if defined(_M_IX86) && (_MSC_VER < 1900)
|
||||
@@ -133,8 +135,7 @@ int GetXCR0() {
|
||||
|
||||
// based on libvpx arm_cpudetect.c
|
||||
// For Arm, but public to allow testing on any CPU
|
||||
LIBYUV_API SAFEBUFFERS
|
||||
int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
char cpuinfo_line[512];
|
||||
FILE* f = fopen(cpuinfo_name, "r");
|
||||
if (!f) {
|
||||
@@ -151,7 +152,7 @@ int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
}
|
||||
// aarch64 uses asimd for Neon.
|
||||
p = strstr(cpuinfo_line, " asimd");
|
||||
if (p && (p[6] == ' ' || p[6] == '\n')) {
|
||||
if (p) {
|
||||
fclose(f);
|
||||
return kCpuHasNEON;
|
||||
}
|
||||
@@ -161,31 +162,40 @@ int ArmCpuCaps(const char* cpuinfo_name) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
LIBYUV_API SAFEBUFFERS
|
||||
int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) {
|
||||
// TODO(fbarchard): Consider read_msa_ir().
|
||||
// TODO(fbarchard): Add unittest.
|
||||
LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name,
|
||||
const char ase[]) {
|
||||
char cpuinfo_line[512];
|
||||
int len = (int)strlen(ase);
|
||||
FILE* f = fopen(cpuinfo_name, "r");
|
||||
if (!f) {
|
||||
// ase enabled if /proc/cpuinfo is unavailable.
|
||||
if (strcmp(ase, " msa") == 0) {
|
||||
return kCpuHasMSA;
|
||||
}
|
||||
if (strcmp(ase, " dspr2") == 0) {
|
||||
return kCpuHasDSPR2;
|
||||
if (strcmp(ase, " mmi") == 0) {
|
||||
return kCpuHasMMI;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) {
|
||||
if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) {
|
||||
char* p = strstr(cpuinfo_line, ase);
|
||||
if (p && (p[len] == ' ' || p[len] == '\n')) {
|
||||
if (p) {
|
||||
fclose(f);
|
||||
if (strcmp(ase, " msa") == 0) {
|
||||
return kCpuHasMSA;
|
||||
}
|
||||
if (strcmp(ase, " dspr2") == 0) {
|
||||
return kCpuHasDSPR2;
|
||||
return 0;
|
||||
}
|
||||
} else if (memcmp(cpuinfo_line, "cpu model", 9) == 0) {
|
||||
char* p = strstr(cpuinfo_line, "Loongson-3");
|
||||
if (p) {
|
||||
fclose(f);
|
||||
if (strcmp(ase, " mmi") == 0) {
|
||||
return kCpuHasMMI;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -193,109 +203,51 @@ int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// CPU detect function for SIMD instruction sets.
|
||||
LIBYUV_API
|
||||
int cpu_info_ = 0; // cpu_info is not initialized yet.
|
||||
|
||||
// Test environment variable for disabling CPU features. Any non-zero value
|
||||
// to disable. Zero ignored to make it easy to set the variable on/off.
|
||||
#if !defined(__native_client__) && !defined(_M_ARM)
|
||||
|
||||
static LIBYUV_BOOL TestEnv(const char* name) {
|
||||
const char* var = getenv(name);
|
||||
if (var) {
|
||||
if (var[0] != '0') {
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
}
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#else // nacl does not support getenv().
|
||||
static LIBYUV_BOOL TestEnv(const char*) {
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
#endif
|
||||
|
||||
LIBYUV_API SAFEBUFFERS
|
||||
int InitCpuFlags(void) {
|
||||
// TODO(fbarchard): swap kCpuInit logic so 0 means uninitialized.
|
||||
static SAFEBUFFERS int GetCpuFlags(void) {
|
||||
int cpu_info = 0;
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && defined(CPU_X86)
|
||||
uint32 cpu_info0[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info1[4] = { 0, 0, 0, 0 };
|
||||
uint32 cpu_info7[4] = { 0, 0, 0, 0 };
|
||||
#if !defined(__pnacl__) && !defined(__CLR_VER) && \
|
||||
(defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \
|
||||
defined(_M_IX86))
|
||||
int cpu_info0[4] = {0, 0, 0, 0};
|
||||
int cpu_info1[4] = {0, 0, 0, 0};
|
||||
int cpu_info7[4] = {0, 0, 0, 0};
|
||||
CpuId(0, 0, cpu_info0);
|
||||
CpuId(1, 0, cpu_info1);
|
||||
if (cpu_info0[0] >= 7) {
|
||||
CpuId(7, 0, cpu_info7);
|
||||
}
|
||||
cpu_info = ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) |
|
||||
((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) |
|
||||
((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) |
|
||||
((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) |
|
||||
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0) |
|
||||
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
|
||||
kCpuHasX86;
|
||||
((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0);
|
||||
|
||||
#ifdef HAS_XGETBV
|
||||
// AVX requires CPU has AVX, XSAVE and OSXSave for xgetbv
|
||||
// AVX requires OS saves YMM registers.
|
||||
if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave
|
||||
((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers
|
||||
cpu_info |= ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | kCpuHasAVX;
|
||||
cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) |
|
||||
((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) |
|
||||
((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0);
|
||||
|
||||
// Detect AVX512bw
|
||||
if ((GetXCR0() & 0xe0) == 0xe0) {
|
||||
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX3 : 0;
|
||||
cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0;
|
||||
cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0;
|
||||
cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Environment variable overrides for testing.
|
||||
if (TestEnv("LIBYUV_DISABLE_X86")) {
|
||||
cpu_info &= ~kCpuHasX86;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_SSE2")) {
|
||||
cpu_info &= ~kCpuHasSSE2;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_SSSE3")) {
|
||||
cpu_info &= ~kCpuHasSSSE3;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_SSE41")) {
|
||||
cpu_info &= ~kCpuHasSSE41;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_SSE42")) {
|
||||
cpu_info &= ~kCpuHasSSE42;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_AVX")) {
|
||||
cpu_info &= ~kCpuHasAVX;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_AVX2")) {
|
||||
cpu_info &= ~kCpuHasAVX2;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_ERMS")) {
|
||||
cpu_info &= ~kCpuHasERMS;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_FMA3")) {
|
||||
cpu_info &= ~kCpuHasFMA3;
|
||||
}
|
||||
if (TestEnv("LIBYUV_DISABLE_AVX3")) {
|
||||
cpu_info &= ~kCpuHasAVX3;
|
||||
}
|
||||
#endif
|
||||
#if defined(__mips__) && defined(__linux__)
|
||||
#if defined(__mips_dspr2)
|
||||
cpu_info |= kCpuHasDSPR2;
|
||||
#endif
|
||||
#if defined(__mips_msa)
|
||||
cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa");
|
||||
#elif defined(_MIPS_ARCH_LOONGSON3A)
|
||||
cpu_info = MipsCpuCaps("/proc/cpuinfo", " mmi");
|
||||
#endif
|
||||
cpu_info |= kCpuHasMIPS;
|
||||
if (getenv("LIBYUV_DISABLE_DSPR2")) {
|
||||
cpu_info &= ~kCpuHasDSPR2;
|
||||
}
|
||||
if (getenv("LIBYUV_DISABLE_MSA")) {
|
||||
cpu_info &= ~kCpuHasMSA;
|
||||
}
|
||||
#endif
|
||||
#if defined(__arm__) || defined(__aarch64__)
|
||||
// gcc -mfpu=neon defines __ARM_NEON__
|
||||
@@ -314,22 +266,22 @@ int InitCpuFlags(void) {
|
||||
cpu_info = ArmCpuCaps("/proc/cpuinfo");
|
||||
#endif
|
||||
cpu_info |= kCpuHasARM;
|
||||
if (TestEnv("LIBYUV_DISABLE_NEON")) {
|
||||
cpu_info &= ~kCpuHasNEON;
|
||||
}
|
||||
#endif // __arm__
|
||||
if (TestEnv("LIBYUV_DISABLE_ASM")) {
|
||||
cpu_info = 0;
|
||||
}
|
||||
cpu_info |= kCpuInitialized;
|
||||
cpu_info_ = cpu_info;
|
||||
cpu_info |= kCpuInitialized;
|
||||
return cpu_info;
|
||||
}
|
||||
|
||||
// Note that use of this function is not thread safe.
|
||||
LIBYUV_API
|
||||
void MaskCpuFlags(int enable_flags) {
|
||||
cpu_info_ = InitCpuFlags() & enable_flags;
|
||||
int MaskCpuFlags(int enable_flags) {
|
||||
int cpu_info = GetCpuFlags() & enable_flags;
|
||||
SetCpuFlags(cpu_info);
|
||||
return cpu_info;
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int InitCpuFlags(void) {
|
||||
return MaskCpuFlags(-1);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
@@ -21,7 +21,7 @@
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
// disable warning 4324: structure was padded due to __declspec(align())
|
||||
#pragma warning(disable:4324)
|
||||
#pragma warning(disable : 4324)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -102,7 +102,7 @@ MJpegDecoder::~MJpegDecoder() {
|
||||
DestroyOutputBuffers();
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
|
||||
LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) {
|
||||
if (!ValidateJpeg(src, src_len)) {
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
@@ -129,7 +129,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
|
||||
if (scanlines_[i]) {
|
||||
delete scanlines_[i];
|
||||
}
|
||||
scanlines_[i] = new uint8* [scanlines_size];
|
||||
scanlines_[i] = new uint8_t*[scanlines_size];
|
||||
scanlines_sizes_[i] = scanlines_size;
|
||||
}
|
||||
|
||||
@@ -145,7 +145,7 @@ LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8* src, size_t src_len) {
|
||||
if (databuf_[i]) {
|
||||
delete databuf_[i];
|
||||
}
|
||||
databuf_[i] = new uint8[databuf_size];
|
||||
databuf_[i] = new uint8_t[databuf_size];
|
||||
databuf_strides_[i] = databuf_stride;
|
||||
}
|
||||
|
||||
@@ -195,13 +195,11 @@ int MJpegDecoder::GetVertSampFactor(int component) {
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetHorizSubSampFactor(int component) {
|
||||
return decompress_struct_->max_h_samp_factor /
|
||||
GetHorizSampFactor(component);
|
||||
return decompress_struct_->max_h_samp_factor / GetHorizSampFactor(component);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetVertSubSampFactor(int component) {
|
||||
return decompress_struct_->max_v_samp_factor /
|
||||
GetVertSampFactor(component);
|
||||
return decompress_struct_->max_v_samp_factor / GetVertSampFactor(component);
|
||||
}
|
||||
|
||||
int MJpegDecoder::GetImageScanlinesPerImcuRow() {
|
||||
@@ -245,10 +243,10 @@ LIBYUV_BOOL MJpegDecoder::UnloadFrame() {
|
||||
}
|
||||
|
||||
// TODO(fbarchard): Allow rectangle to be specified: x, y, width, height.
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
uint8** planes, int dst_width, int dst_height) {
|
||||
if (dst_width != GetWidth() ||
|
||||
dst_height > GetHeight()) {
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8_t** planes,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (dst_width != GetWidth() || dst_height > GetHeight()) {
|
||||
// ERROR: Bad dimensions
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
@@ -289,14 +287,13 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
// TODO(fbarchard): Compute skip to avoid this
|
||||
assert(skip % GetVertSubSampFactor(i) == 0);
|
||||
int rows_to_skip =
|
||||
DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) -
|
||||
rows_to_skip;
|
||||
int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i));
|
||||
int scanlines_to_copy =
|
||||
GetComponentScanlinesPerImcuRow(i) - rows_to_skip;
|
||||
int data_to_skip = rows_to_skip * GetComponentStride(i);
|
||||
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i),
|
||||
scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
lines_left -= (GetImageScanlinesPerImcuRow() - skip);
|
||||
@@ -305,16 +302,15 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
|
||||
// Read full MCUs but cropped horizontally
|
||||
for (; lines_left > GetImageScanlinesPerImcuRow();
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i);
|
||||
CopyPlane(databuf_[i], GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
CopyPlane(databuf_[i], GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
}
|
||||
@@ -328,19 +324,19 @@ LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
int scanlines_to_copy =
|
||||
DivideAndRoundUp(lines_left, GetVertSubSampFactor(i));
|
||||
CopyPlane(databuf_[i], GetComponentStride(i),
|
||||
planes[i], GetComponentWidth(i),
|
||||
GetComponentWidth(i), scanlines_to_copy);
|
||||
CopyPlane(databuf_[i], GetComponentStride(i), planes[i],
|
||||
GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy);
|
||||
planes[i] += scanlines_to_copy * GetComponentWidth(i);
|
||||
}
|
||||
}
|
||||
return FinishDecode();
|
||||
}
|
||||
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
|
||||
int dst_width, int dst_height) {
|
||||
if (dst_width != GetWidth() ||
|
||||
dst_height > GetHeight()) {
|
||||
LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn,
|
||||
void* opaque,
|
||||
int dst_width,
|
||||
int dst_height) {
|
||||
if (dst_width != GetWidth() || dst_height > GetHeight()) {
|
||||
// ERROR: Bad dimensions
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
@@ -395,7 +391,7 @@ LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque,
|
||||
}
|
||||
// Read full MCUs until we get to the crop point.
|
||||
for (; lines_left >= GetImageScanlinesPerImcuRow();
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
lines_left -= GetImageScanlinesPerImcuRow()) {
|
||||
if (!DecodeImcuRow()) {
|
||||
FinishDecode();
|
||||
return LIBYUV_FALSE;
|
||||
@@ -435,22 +431,22 @@ void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT
|
||||
}
|
||||
|
||||
void term_source(j_decompress_ptr cinfo) {
|
||||
// Nothing to do.
|
||||
(void)cinfo; // Nothing to do.
|
||||
}
|
||||
|
||||
#ifdef HAVE_SETJMP
|
||||
void ErrorHandler(j_common_ptr cinfo) {
|
||||
// This is called when a jpeglib command experiences an error. Unfortunately
|
||||
// jpeglib's error handling model is not very flexible, because it expects the
|
||||
// error handler to not return--i.e., it wants the program to terminate. To
|
||||
// recover from errors we use setjmp() as shown in their example. setjmp() is
|
||||
// C's implementation for the "call with current continuation" functionality
|
||||
// seen in some functional programming languages.
|
||||
// A formatted message can be output, but is unsafe for release.
|
||||
// This is called when a jpeglib command experiences an error. Unfortunately
|
||||
// jpeglib's error handling model is not very flexible, because it expects the
|
||||
// error handler to not return--i.e., it wants the program to terminate. To
|
||||
// recover from errors we use setjmp() as shown in their example. setjmp() is
|
||||
// C's implementation for the "call with current continuation" functionality
|
||||
// seen in some functional programming languages.
|
||||
// A formatted message can be output, but is unsafe for release.
|
||||
#ifdef DEBUG
|
||||
char buf[JMSG_LENGTH_MAX];
|
||||
(*cinfo->err->format_message)(cinfo, buf);
|
||||
// ERROR: Error in jpeglib: buf
|
||||
// ERROR: Error in jpeglib: buf
|
||||
#endif
|
||||
|
||||
SetJmpErrorMgr* mgr = reinterpret_cast<SetJmpErrorMgr*>(cinfo->err);
|
||||
@@ -459,8 +455,9 @@ void ErrorHandler(j_common_ptr cinfo) {
|
||||
longjmp(mgr->setjmp_buffer, 1);
|
||||
}
|
||||
|
||||
// Suppress fprintf warnings.
|
||||
void OutputHandler(j_common_ptr cinfo) {
|
||||
// Suppress fprintf warnings.
|
||||
(void)cinfo;
|
||||
}
|
||||
|
||||
#endif // HAVE_SETJMP
|
||||
@@ -472,9 +469,9 @@ void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
|
||||
// it.
|
||||
DestroyOutputBuffers();
|
||||
|
||||
scanlines_ = new uint8** [num_outbufs];
|
||||
scanlines_ = new uint8_t**[num_outbufs];
|
||||
scanlines_sizes_ = new int[num_outbufs];
|
||||
databuf_ = new uint8* [num_outbufs];
|
||||
databuf_ = new uint8_t*[num_outbufs];
|
||||
databuf_strides_ = new int[num_outbufs];
|
||||
|
||||
for (int i = 0; i < num_outbufs; ++i) {
|
||||
@@ -490,13 +487,13 @@ void MJpegDecoder::AllocOutputBuffers(int num_outbufs) {
|
||||
|
||||
void MJpegDecoder::DestroyOutputBuffers() {
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
delete [] scanlines_[i];
|
||||
delete [] databuf_[i];
|
||||
delete[] scanlines_[i];
|
||||
delete[] databuf_[i];
|
||||
}
|
||||
delete [] scanlines_;
|
||||
delete [] databuf_;
|
||||
delete [] scanlines_sizes_;
|
||||
delete [] databuf_strides_;
|
||||
delete[] scanlines_;
|
||||
delete[] databuf_;
|
||||
delete[] scanlines_sizes_;
|
||||
delete[] databuf_strides_;
|
||||
scanlines_ = NULL;
|
||||
databuf_ = NULL;
|
||||
scanlines_sizes_ = NULL;
|
||||
@@ -530,9 +527,9 @@ LIBYUV_BOOL MJpegDecoder::FinishDecode() {
|
||||
return LIBYUV_TRUE;
|
||||
}
|
||||
|
||||
void MJpegDecoder::SetScanlinePointers(uint8** data) {
|
||||
void MJpegDecoder::SetScanlinePointers(uint8_t** data) {
|
||||
for (int i = 0; i < num_outbufs_; ++i) {
|
||||
uint8* data_i = data[i];
|
||||
uint8_t* data_i = data[i];
|
||||
for (int j = 0; j < scanlines_sizes_[i]; ++j) {
|
||||
scanlines_[i][j] = data_i;
|
||||
data_i += GetComponentStride(i);
|
||||
@@ -542,26 +539,26 @@ void MJpegDecoder::SetScanlinePointers(uint8** data) {
|
||||
|
||||
inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() {
|
||||
return (unsigned int)(GetImageScanlinesPerImcuRow()) ==
|
||||
jpeg_read_raw_data(decompress_struct_,
|
||||
scanlines_,
|
||||
GetImageScanlinesPerImcuRow());
|
||||
jpeg_read_raw_data(decompress_struct_, scanlines_,
|
||||
GetImageScanlinesPerImcuRow());
|
||||
}
|
||||
|
||||
// The helper function which recognizes the jpeg sub-sampling type.
|
||||
JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
|
||||
int* subsample_x, int* subsample_y, int number_of_components) {
|
||||
int* subsample_x,
|
||||
int* subsample_y,
|
||||
int number_of_components) {
|
||||
if (number_of_components == 3) { // Color images.
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 2 && subsample_y[1] == 2 &&
|
||||
subsample_x[2] == 2 && subsample_y[2] == 2) {
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
|
||||
subsample_y[1] == 2 && subsample_x[2] == 2 && subsample_y[2] == 2) {
|
||||
return kJpegYuv420;
|
||||
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 2 && subsample_y[1] == 1 &&
|
||||
subsample_x[2] == 2 && subsample_y[2] == 1) {
|
||||
}
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 &&
|
||||
subsample_y[1] == 1 && subsample_x[2] == 2 && subsample_y[2] == 1) {
|
||||
return kJpegYuv422;
|
||||
} else if (subsample_x[0] == 1 && subsample_y[0] == 1 &&
|
||||
subsample_x[1] == 1 && subsample_y[1] == 1 &&
|
||||
subsample_x[2] == 1 && subsample_y[2] == 1) {
|
||||
}
|
||||
if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 1 &&
|
||||
subsample_y[1] == 1 && subsample_x[2] == 1 && subsample_y[2] == 1) {
|
||||
return kJpegYuv444;
|
||||
}
|
||||
} else if (number_of_components == 1) { // Grey-scale images.
|
||||
@@ -574,4 +571,3 @@ JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper(
|
||||
|
||||
} // namespace libyuv
|
||||
#endif // HAVE_JPEG
|
||||
|
||||
|
||||
@@ -18,13 +18,13 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// Helper function to scan for EOI marker (0xff 0xd9).
|
||||
static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
|
||||
if (sample_size >= 2) {
|
||||
const uint8* end = sample + sample_size - 1;
|
||||
const uint8* it = sample;
|
||||
static LIBYUV_BOOL ScanEOI(const uint8_t* src_mjpg, size_t src_size_mjpg) {
|
||||
if (src_size_mjpg >= 2) {
|
||||
const uint8_t* end = src_mjpg + src_size_mjpg - 1;
|
||||
const uint8_t* it = src_mjpg;
|
||||
while (it < end) {
|
||||
// TODO(fbarchard): scan for 0xd9 instead.
|
||||
it = static_cast<const uint8 *>(memchr(it, 0xff, end - it));
|
||||
it = (const uint8_t*)(memchr(it, 0xff, end - it));
|
||||
if (it == NULL) {
|
||||
break;
|
||||
}
|
||||
@@ -34,38 +34,37 @@ static LIBYUV_BOOL ScanEOI(const uint8* sample, size_t sample_size) {
|
||||
++it; // Skip over current 0xff.
|
||||
}
|
||||
}
|
||||
// ERROR: Invalid jpeg end code not found. Size sample_size
|
||||
// ERROR: Invalid jpeg end code not found. Size src_size_mjpg
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
|
||||
// Helper function to validate the jpeg appears intact.
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8* sample, size_t sample_size) {
|
||||
LIBYUV_BOOL ValidateJpeg(const uint8_t* src_mjpg, size_t src_size_mjpg) {
|
||||
// Maximum size that ValidateJpeg will consider valid.
|
||||
const size_t kMaxJpegSize = 0x7fffffffull;
|
||||
const size_t kBackSearchSize = 1024;
|
||||
if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) {
|
||||
// ERROR: Invalid jpeg size: sample_size
|
||||
if (src_size_mjpg < 64 || src_size_mjpg > kMaxJpegSize || !src_mjpg) {
|
||||
// ERROR: Invalid jpeg size: src_size_mjpg
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker
|
||||
if (src_mjpg[0] != 0xff || src_mjpg[1] != 0xd8) { // SOI marker
|
||||
// ERROR: Invalid jpeg initial start code
|
||||
return LIBYUV_FALSE;
|
||||
}
|
||||
|
||||
// Look for the End Of Image (EOI) marker near the end of the buffer.
|
||||
if (sample_size > kBackSearchSize) {
|
||||
if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) {
|
||||
if (src_size_mjpg > kBackSearchSize) {
|
||||
if (ScanEOI(src_mjpg + src_size_mjpg - kBackSearchSize, kBackSearchSize)) {
|
||||
return LIBYUV_TRUE; // Success: Valid jpeg.
|
||||
}
|
||||
// Reduce search size for forward search.
|
||||
sample_size = sample_size - kBackSearchSize + 1;
|
||||
src_size_mjpg = src_size_mjpg - kBackSearchSize + 1;
|
||||
}
|
||||
// Step over SOI marker and scan for EOI.
|
||||
return ScanEOI(sample + 2, sample_size - 2);
|
||||
return ScanEOI(src_mjpg + 2, src_size_mjpg - 2);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
+1609
-553
File diff suppressed because it is too large
Load Diff
+224
-178
@@ -10,8 +10,8 @@
|
||||
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
@@ -22,12 +22,20 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
LIBYUV_API
|
||||
void TransposePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void TransposePlane(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i = height;
|
||||
void (*TransposeWx8)(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) = TransposeWx8_C;
|
||||
#if defined(HAS_TRANSPOSEWX16_MSA)
|
||||
void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst,
|
||||
int dst_stride, int width) = TransposeWx16_C;
|
||||
#else
|
||||
void (*TransposeWx8)(const uint8_t* src, int src_stride, uint8_t* dst,
|
||||
int dst_stride, int width) = TransposeWx8_C;
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEWX8_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
TransposeWx8 = TransposeWx8_NEON;
|
||||
@@ -41,6 +49,11 @@ void TransposePlane(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEWX8_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
TransposeWx8 = TransposeWx8_MMI;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
TransposeWx8 = TransposeWx8_Fast_Any_SSSE3;
|
||||
@@ -49,24 +62,32 @@ void TransposePlane(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEWX8_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2)) {
|
||||
if (IS_ALIGNED(width, 4) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
TransposeWx8 = TransposeWx8_Fast_DSPR2;
|
||||
} else {
|
||||
TransposeWx8 = TransposeWx8_DSPR2;
|
||||
#if defined(HAS_TRANSPOSEWX16_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
TransposeWx16 = TransposeWx16_Any_MSA;
|
||||
if (IS_ALIGNED(width, 16)) {
|
||||
TransposeWx16 = TransposeWx16_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_TRANSPOSEWX16_MSA)
|
||||
// Work across the source in 16x16 tiles
|
||||
while (i >= 16) {
|
||||
TransposeWx16(src, src_stride, dst, dst_stride, width);
|
||||
src += 16 * src_stride; // Go down 16 rows.
|
||||
dst += 16; // Move over 16 columns.
|
||||
i -= 16;
|
||||
}
|
||||
#else
|
||||
// Work across the source in 8x8 tiles
|
||||
while (i >= 8) {
|
||||
TransposeWx8(src, src_stride, dst, dst_stride, width);
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst += 8; // Move over 8 columns.
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst += 8; // Move over 8 columns.
|
||||
i -= 8;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (i > 0) {
|
||||
TransposeWxH_C(src, src_stride, dst, dst_stride, width, i);
|
||||
@@ -74,9 +95,12 @@ void TransposePlane(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane90(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void RotatePlane90(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 90 is a transpose with the source read
|
||||
// from bottom to top. So set the source pointer to the end
|
||||
// of the buffer and flip the sign of the source stride.
|
||||
@@ -86,9 +110,12 @@ void RotatePlane90(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane270(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void RotatePlane270(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 270 is a transpose with the destination written
|
||||
// from bottom to top. So set the destination pointer to the end
|
||||
// of the buffer and flip the sign of the destination stride.
|
||||
@@ -98,17 +125,20 @@ void RotatePlane270(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotatePlane180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void RotatePlane180(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
// Swap first and last row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64(row, width);
|
||||
const uint8* src_bot = src + src_stride * (height - 1);
|
||||
uint8* dst_bot = dst + dst_stride * (height - 1);
|
||||
const uint8_t* src_bot = src + src_stride * (height - 1);
|
||||
uint8_t* dst_bot = dst + dst_stride * (height - 1);
|
||||
int half_height = (height + 1) >> 1;
|
||||
int y;
|
||||
void (*MirrorRow)(const uint8* src, uint8* dst, int width) = MirrorRow_C;
|
||||
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
|
||||
void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C;
|
||||
void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C;
|
||||
#if defined(HAS_MIRRORROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
MirrorRow = MirrorRow_Any_NEON;
|
||||
@@ -133,21 +163,21 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// TODO(fbarchard): Mirror on mips handle unaligned memory.
|
||||
#if defined(HAS_MIRRORROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4) &&
|
||||
IS_ALIGNED(dst, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
MirrorRow = MirrorRow_DSPR2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
MirrorRow = MirrorRow_Any_MSA;
|
||||
if (IS_ALIGNED(width, 64)) {
|
||||
MirrorRow = MirrorRow_MSA;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
MirrorRow = MirrorRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
MirrorRow = MirrorRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
@@ -169,9 +199,9 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_MIPS)
|
||||
if (TestCpuFlag(kCpuHasMIPS)) {
|
||||
CopyRow = CopyRow_MIPS;
|
||||
#if defined(HAS_COPYROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
CopyRow = IS_ALIGNED(width, 8) ? CopyRow_MMI : CopyRow_Any_MMI;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -189,15 +219,24 @@ void RotatePlane180(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void TransposeUV(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void TransposeUV(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int i = height;
|
||||
void (*TransposeUVWx8)(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
#if defined(HAS_TRANSPOSEUVWX16_MSA)
|
||||
void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a,
|
||||
int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
|
||||
int width) = TransposeUVWx16_C;
|
||||
#else
|
||||
void (*TransposeUVWx8)(const uint8_t* src, int src_stride, uint8_t* dst_a,
|
||||
int dst_stride_a, uint8_t* dst_b, int dst_stride_b,
|
||||
int width) = TransposeUVWx8_C;
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEUVWX8_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_NEON;
|
||||
@@ -211,72 +250,98 @@ void TransposeUV(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEUVWX8_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(width, 2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_DSPR2;
|
||||
#if defined(HAS_TRANSPOSEUVWX8_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_Any_MMI;
|
||||
if (IS_ALIGNED(width, 4)) {
|
||||
TransposeUVWx8 = TransposeUVWx8_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_TRANSPOSEUVWX16_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
TransposeUVWx16 = TransposeUVWx16_Any_MSA;
|
||||
if (IS_ALIGNED(width, 8)) {
|
||||
TransposeUVWx16 = TransposeUVWx16_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(HAS_TRANSPOSEUVWX16_MSA)
|
||||
// Work through the source in 8x8 tiles.
|
||||
while (i >= 16) {
|
||||
TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
|
||||
width);
|
||||
src += 16 * src_stride; // Go down 16 rows.
|
||||
dst_a += 16; // Move over 8 columns.
|
||||
dst_b += 16; // Move over 8 columns.
|
||||
i -= 16;
|
||||
}
|
||||
#else
|
||||
// Work through the source in 8x8 tiles.
|
||||
while (i >= 8) {
|
||||
TransposeUVWx8(src, src_stride,
|
||||
dst_a, dst_stride_a,
|
||||
dst_b, dst_stride_b,
|
||||
TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
|
||||
width);
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst_a += 8; // Move over 8 columns.
|
||||
dst_b += 8; // Move over 8 columns.
|
||||
src += 8 * src_stride; // Go down 8 rows.
|
||||
dst_a += 8; // Move over 8 columns.
|
||||
dst_b += 8; // Move over 8 columns.
|
||||
i -= 8;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (i > 0) {
|
||||
TransposeUVWxH_C(src, src_stride,
|
||||
dst_a, dst_stride_a,
|
||||
dst_b, dst_stride_b,
|
||||
TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
|
||||
width, i);
|
||||
}
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV90(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void RotateUV90(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
src += src_stride * (height - 1);
|
||||
src_stride = -src_stride;
|
||||
|
||||
TransposeUV(src, src_stride,
|
||||
dst_a, dst_stride_a,
|
||||
dst_b, dst_stride_b,
|
||||
width, height);
|
||||
TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width,
|
||||
height);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
void RotateUV270(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void RotateUV270(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
dst_a += dst_stride_a * (width - 1);
|
||||
dst_b += dst_stride_b * (width - 1);
|
||||
dst_stride_a = -dst_stride_a;
|
||||
dst_stride_b = -dst_stride_b;
|
||||
|
||||
TransposeUV(src, src_stride,
|
||||
dst_a, dst_stride_a,
|
||||
dst_b, dst_stride_b,
|
||||
width, height);
|
||||
TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width,
|
||||
height);
|
||||
}
|
||||
|
||||
// Rotate 180 is a horizontal and vertical flip.
|
||||
LIBYUV_API
|
||||
void RotateUV180(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void RotateUV180(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
void (*MirrorUVRow)(const uint8* src, uint8* dst_u, uint8* dst_v, int width) =
|
||||
MirrorUVRow_C;
|
||||
void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v,
|
||||
int width) = MirrorUVRow_C;
|
||||
#if defined(HAS_MIRRORUVROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) {
|
||||
MirrorUVRow = MirrorUVRow_NEON;
|
||||
@@ -287,10 +352,14 @@ void RotateUV180(const uint8* src, int src_stride,
|
||||
MirrorUVRow = MirrorUVRow_SSSE3;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORUVROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
MirrorUVRow = MirrorUVRow_DSPR2;
|
||||
#if defined(HAS_MIRRORUVROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) {
|
||||
MirrorUVRow = MirrorUVRow_MSA;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_MIRRORUVROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(width, 8)) {
|
||||
MirrorUVRow = MirrorUVRow_MMI;
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -306,9 +375,12 @@ void RotateUV180(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int RotatePlane(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height,
|
||||
int RotatePlane(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
if (!src || width <= 0 || height == 0 || !dst) {
|
||||
return -1;
|
||||
@@ -324,24 +396,16 @@ int RotatePlane(const uint8* src, int src_stride,
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
CopyPlane(src, src_stride,
|
||||
dst, dst_stride,
|
||||
width, height);
|
||||
CopyPlane(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate90:
|
||||
RotatePlane90(src, src_stride,
|
||||
dst, dst_stride,
|
||||
width, height);
|
||||
RotatePlane90(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270(src, src_stride,
|
||||
dst, dst_stride,
|
||||
width, height);
|
||||
RotatePlane270(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180(src, src_stride,
|
||||
dst, dst_stride,
|
||||
width, height);
|
||||
RotatePlane180(src, src_stride, dst, dst_stride, width, height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
@@ -350,18 +414,25 @@ int RotatePlane(const uint8* src, int src_stride,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height,
|
||||
int I420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 ||
|
||||
!dst_y || !dst_u || !dst_v) {
|
||||
if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y ||
|
||||
!dst_u || !dst_v) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -380,45 +451,29 @@ int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return I420Copy(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
dst_y, dst_stride_y,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
width, height);
|
||||
return I420Copy(src_y, src_stride_y, src_u, src_stride_u, src_v,
|
||||
src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v, width, height);
|
||||
case kRotate90:
|
||||
RotatePlane90(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane90(src_u, src_stride_u,
|
||||
dst_u, dst_stride_u,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane90(src_v, src_stride_v,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane270(src_u, src_stride_u,
|
||||
dst_u, dst_stride_u,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane270(src_v, src_stride_v,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotatePlane180(src_u, src_stride_u,
|
||||
dst_u, dst_stride_u,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane180(src_v, src_stride_v,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth,
|
||||
halfheight);
|
||||
RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth,
|
||||
halfheight);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
@@ -427,17 +482,23 @@ int I420Rotate(const uint8* src_y, int src_stride_y,
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_uv, int src_stride_uv,
|
||||
uint8* dst_y, int dst_stride_y,
|
||||
uint8* dst_u, int dst_stride_u,
|
||||
uint8* dst_v, int dst_stride_v,
|
||||
int width, int height,
|
||||
int NV12ToI420Rotate(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_uv,
|
||||
int src_stride_uv,
|
||||
uint8_t* dst_y,
|
||||
int dst_stride_y,
|
||||
uint8_t* dst_u,
|
||||
int dst_stride_u,
|
||||
uint8_t* dst_v,
|
||||
int dst_stride_v,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
int halfwidth = (width + 1) >> 1;
|
||||
int halfheight = (height + 1) >> 1;
|
||||
if (!src_y || !src_uv || width <= 0 || height == 0 ||
|
||||
!dst_y || !dst_u || !dst_v) {
|
||||
if (!src_y || !src_uv || width <= 0 || height == 0 || !dst_y || !dst_u ||
|
||||
!dst_v) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -454,38 +515,23 @@ int NV12ToI420Rotate(const uint8* src_y, int src_stride_y,
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return NV12ToI420(src_y, src_stride_y,
|
||||
src_uv, src_stride_uv,
|
||||
dst_y, dst_stride_y,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
return NV12ToI420(src_y, src_stride_y, src_uv, src_stride_uv, dst_y,
|
||||
dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
|
||||
width, height);
|
||||
case kRotate90:
|
||||
RotatePlane90(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotateUV90(src_uv, src_stride_uv,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotateUV90(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
RotatePlane270(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotateUV270(src_uv, src_stride_uv,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotateUV270(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
RotatePlane180(src_y, src_stride_y,
|
||||
dst_y, dst_stride_y,
|
||||
width, height);
|
||||
RotateUV180(src_uv, src_stride_uv,
|
||||
dst_u, dst_stride_u,
|
||||
dst_v, dst_stride_v,
|
||||
halfwidth, halfheight);
|
||||
RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height);
|
||||
RotateUV180(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v,
|
||||
dst_stride_v, halfwidth, halfheight);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
|
||||
@@ -18,16 +18,16 @@ namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8* src, int src_stride, \
|
||||
uint8* dst, int dst_stride, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
|
||||
} \
|
||||
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r);\
|
||||
}
|
||||
#define TANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \
|
||||
int dst_stride, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst, dst_stride, n); \
|
||||
} \
|
||||
TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEWX8_NEON
|
||||
TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
|
||||
@@ -35,28 +35,29 @@ TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7)
|
||||
#ifdef HAS_TRANSPOSEWX8_SSSE3
|
||||
TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_MMI
|
||||
TANY(TransposeWx8_Any_MMI, TransposeWx8_MMI, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_FAST_SSSE3
|
||||
TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEWX8_DSPR2
|
||||
TANY(TransposeWx8_Any_DSPR2, TransposeWx8_DSPR2, 7)
|
||||
#ifdef HAS_TRANSPOSEWX16_MSA
|
||||
TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15)
|
||||
#endif
|
||||
#undef TANY
|
||||
|
||||
#define TUVANY(NAMEANY, TPOS_SIMD, MASK) \
|
||||
void NAMEANY(const uint8* src, int src_stride, \
|
||||
uint8* dst_a, int dst_stride_a, \
|
||||
uint8* dst_b, int dst_stride_b, int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, \
|
||||
n); \
|
||||
} \
|
||||
TransposeUVWx8_C(src + n * 2, src_stride, \
|
||||
dst_a + n * dst_stride_a, dst_stride_a, \
|
||||
dst_b + n * dst_stride_b, dst_stride_b, r); \
|
||||
}
|
||||
void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \
|
||||
int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \
|
||||
int width) { \
|
||||
int r = width & MASK; \
|
||||
int n = width - r; \
|
||||
if (n > 0) { \
|
||||
TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, n); \
|
||||
} \
|
||||
TransposeUVWx8_C(src + n * 2, src_stride, dst_a + n * dst_stride_a, \
|
||||
dst_stride_a, dst_b + n * dst_stride_b, dst_stride_b, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_TRANSPOSEUVWX8_NEON
|
||||
TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
|
||||
@@ -64,8 +65,11 @@ TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7)
|
||||
#ifdef HAS_TRANSPOSEUVWX8_SSE2
|
||||
TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEUVWX8_DSPR2
|
||||
TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7)
|
||||
#ifdef HAS_TRANSPOSEUVWX8_MMI
|
||||
TUVANY(TransposeUVWx8_Any_MMI, TransposeUVWx8_MMI, 7)
|
||||
#endif
|
||||
#ifdef HAS_TRANSPOSEUVWX16_MSA
|
||||
TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7)
|
||||
#endif
|
||||
#undef TUVANY
|
||||
|
||||
@@ -73,8 +77,3 @@ TUVANY(TransposeUVWx8_Any_DSPR2, TransposeUVWx8_DSPR2, 7)
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -10,90 +10,114 @@
|
||||
|
||||
#include "libyuv/rotate.h"
|
||||
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/convert.h"
|
||||
#include "libyuv/cpu_id.h"
|
||||
#include "libyuv/planar_functions.h"
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// ARGBScale has a function to copy pixels to a row, striding each source
|
||||
// pixel by a constant.
|
||||
#if !defined(LIBYUV_DISABLE_X86) && \
|
||||
(defined(_M_IX86) || \
|
||||
(defined(__x86_64__) && !defined(__native_client__)) || defined(__i386__))
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_SSE2
|
||||
void ScaleARGBRowDownEven_SSE2(const uint8* src_ptr, int src_stride,
|
||||
int src_stepx, uint8* dst_ptr, int dst_width);
|
||||
#endif
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && !defined(__native_client__) && \
|
||||
(defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__))
|
||||
#define HAS_SCALEARGBROWDOWNEVEN_NEON
|
||||
void ScaleARGBRowDownEven_NEON(const uint8* src_ptr, int src_stride,
|
||||
int src_stepx, uint8* dst_ptr, int dst_width);
|
||||
#endif
|
||||
|
||||
void ScaleARGBRowDownEven_C(const uint8* src_ptr, int,
|
||||
int src_stepx, uint8* dst_ptr, int dst_width);
|
||||
|
||||
static void ARGBTranspose(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height) {
|
||||
static void ARGBTranspose(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
int src_pixel_step = src_stride >> 2;
|
||||
void (*ScaleARGBRowDownEven)(const uint8* src_ptr, int src_stride,
|
||||
int src_step, uint8* dst_ptr, int dst_width) = ScaleARGBRowDownEven_C;
|
||||
int src_pixel_step = src_stride_argb >> 2;
|
||||
void (*ScaleARGBRowDownEven)(
|
||||
const uint8_t* src_argb, ptrdiff_t src_stride_argb, int src_step,
|
||||
uint8_t* dst_argb, int dst_width) = ScaleARGBRowDownEven_C;
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_SSE2;
|
||||
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_NEON;
|
||||
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MSA;
|
||||
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MMI;
|
||||
if (IS_ALIGNED(height, 4)) { // Width of dest.
|
||||
ScaleARGBRowDownEven = ScaleARGBRowDownEven_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
for (i = 0; i < width; ++i) { // column of source to row of dest.
|
||||
ScaleARGBRowDownEven(src, 0, src_pixel_step, dst, height);
|
||||
dst += dst_stride;
|
||||
src += 4;
|
||||
ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height);
|
||||
dst_argb += dst_stride_argb;
|
||||
src_argb += 4;
|
||||
}
|
||||
}
|
||||
|
||||
void ARGBRotate90(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height) {
|
||||
void ARGBRotate90(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 90 is a ARGBTranspose with the source read
|
||||
// from bottom to top. So set the source pointer to the end
|
||||
// of the buffer and flip the sign of the source stride.
|
||||
src += src_stride * (height - 1);
|
||||
src_stride = -src_stride;
|
||||
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
|
||||
src_argb += src_stride_argb * (height - 1);
|
||||
src_stride_argb = -src_stride_argb;
|
||||
ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
|
||||
height);
|
||||
}
|
||||
|
||||
void ARGBRotate270(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height) {
|
||||
void ARGBRotate270(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
// Rotate by 270 is a ARGBTranspose with the destination written
|
||||
// from bottom to top. So set the destination pointer to the end
|
||||
// of the buffer and flip the sign of the destination stride.
|
||||
dst += dst_stride * (width - 1);
|
||||
dst_stride = -dst_stride;
|
||||
ARGBTranspose(src, src_stride, dst, dst_stride, width, height);
|
||||
dst_argb += dst_stride_argb * (width - 1);
|
||||
dst_stride_argb = -dst_stride_argb;
|
||||
ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
|
||||
height);
|
||||
}
|
||||
|
||||
void ARGBRotate180(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width, int height) {
|
||||
void ARGBRotate180(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height) {
|
||||
// Swap first and last row and mirror the content. Uses a temporary row.
|
||||
align_buffer_64(row, width * 4);
|
||||
const uint8* src_bot = src + src_stride * (height - 1);
|
||||
uint8* dst_bot = dst + dst_stride * (height - 1);
|
||||
const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1);
|
||||
uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1);
|
||||
int half_height = (height + 1) >> 1;
|
||||
int y;
|
||||
void (*ARGBMirrorRow)(const uint8* src, uint8* dst, int width) =
|
||||
void (*ARGBMirrorRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
||||
ARGBMirrorRow_C;
|
||||
void (*CopyRow)(const uint8* src, uint8* dst, int width) = CopyRow_C;
|
||||
void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) =
|
||||
CopyRow_C;
|
||||
#if defined(HAS_ARGBMIRRORROW_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_Any_NEON;
|
||||
@@ -126,6 +150,14 @@ void ARGBRotate180(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_ARGBMIRRORROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_Any_MMI;
|
||||
if (IS_ALIGNED(width, 2)) {
|
||||
ARGBMirrorRow = ARGBMirrorRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2;
|
||||
@@ -146,28 +178,27 @@ void ARGBRotate180(const uint8* src, int src_stride,
|
||||
CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_COPYROW_MIPS)
|
||||
if (TestCpuFlag(kCpuHasMIPS)) {
|
||||
CopyRow = CopyRow_MIPS;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Odd height will harmlessly mirror the middle row twice.
|
||||
for (y = 0; y < half_height; ++y) {
|
||||
ARGBMirrorRow(src, row, width); // Mirror first row into a buffer
|
||||
ARGBMirrorRow(src_bot, dst, width); // Mirror last row into first row
|
||||
ARGBMirrorRow(src_argb, row, width); // Mirror first row into a buffer
|
||||
ARGBMirrorRow(src_bot, dst_argb, width); // Mirror last row into first row
|
||||
CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last
|
||||
src += src_stride;
|
||||
dst += dst_stride;
|
||||
src_bot -= src_stride;
|
||||
dst_bot -= dst_stride;
|
||||
src_argb += src_stride_argb;
|
||||
dst_argb += dst_stride_argb;
|
||||
src_bot -= src_stride_argb;
|
||||
dst_bot -= dst_stride_argb;
|
||||
}
|
||||
free_aligned_buffer_64(row);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBRotate(const uint8* src_argb, int src_stride_argb,
|
||||
uint8* dst_argb, int dst_stride_argb, int width, int height,
|
||||
int ARGBRotate(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int width,
|
||||
int height,
|
||||
enum RotationMode mode) {
|
||||
if (!src_argb || width <= 0 || height == 0 || !dst_argb) {
|
||||
return -1;
|
||||
@@ -183,23 +214,19 @@ int ARGBRotate(const uint8* src_argb, int src_stride_argb,
|
||||
switch (mode) {
|
||||
case kRotate0:
|
||||
// copy frame
|
||||
return ARGBCopy(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
return ARGBCopy(src_argb, src_stride_argb, dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
case kRotate90:
|
||||
ARGBRotate90(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
ARGBRotate90(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
|
||||
height);
|
||||
return 0;
|
||||
case kRotate270:
|
||||
ARGBRotate270(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
ARGBRotate270(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
|
||||
height);
|
||||
return 0;
|
||||
case kRotate180:
|
||||
ARGBRotate180(src_argb, src_stride_argb,
|
||||
dst_argb, dst_stride_argb,
|
||||
width, height);
|
||||
ARGBRotate180(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width,
|
||||
height);
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
|
||||
@@ -8,16 +8,19 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
void TransposeWx8_C(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
void TransposeWx8_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst[0] = src[0 * src_stride];
|
||||
@@ -33,9 +36,13 @@ void TransposeWx8_C(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx8_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width) {
|
||||
void TransposeUVWx8_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
dst_a[0] = src[0 * src_stride + 0];
|
||||
@@ -60,9 +67,12 @@ void TransposeUVWx8_C(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
int width, int height) {
|
||||
void TransposeWxH_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width; ++i) {
|
||||
int j;
|
||||
@@ -72,10 +82,14 @@ void TransposeWxH_C(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWxH_C(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width, int height) {
|
||||
void TransposeUVWxH_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width,
|
||||
int height) {
|
||||
int i;
|
||||
for (i = 0; i < width * 2; i += 2) {
|
||||
int j;
|
||||
|
||||
+333
-327
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
@@ -22,342 +22,348 @@ extern "C" {
|
||||
|
||||
// Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit.
|
||||
#if defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"movq (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"movq (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movq (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"movq (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movq (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"lea 0x8(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"
|
||||
);
|
||||
void TransposeWx8_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movq (%0),%%xmm0 \n"
|
||||
"movq (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"movq (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"movq (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movq (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"movq (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movq (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"lea 0x8(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"sub $0x8,%2 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEWX8_SSSE3)
|
||||
|
||||
// Transpose 16x8. 64 bit
|
||||
#if defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
void TransposeWx8_Fast_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm9 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm9,%%xmm9 \n"
|
||||
"movdqu (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm10 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm10 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movdqa %%xmm10,%%xmm11 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"movdqu (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm12 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm12 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movdqa %%xmm12,%%xmm13 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movdqu (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm14 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"punpckhbw %%xmm7,%%xmm14 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"movdqa %%xmm14,%%xmm15 \n"
|
||||
"lea 0x10(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"punpcklwd %%xmm10,%%xmm8 \n"
|
||||
"punpcklwd %%xmm11,%%xmm9 \n"
|
||||
"movdqa %%xmm8,%%xmm10 \n"
|
||||
"movdqa %%xmm9,%%xmm11 \n"
|
||||
"palignr $0x8,%%xmm10,%%xmm10 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"punpcklwd %%xmm14,%%xmm12 \n"
|
||||
"punpcklwd %%xmm15,%%xmm13 \n"
|
||||
"movdqa %%xmm12,%%xmm14 \n"
|
||||
"movdqa %%xmm13,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm12,%%xmm8 \n"
|
||||
"movq %%xmm8,(%1) \n"
|
||||
"movdqa %%xmm8,%%xmm12 \n"
|
||||
"palignr $0x8,%%xmm12,%%xmm12 \n"
|
||||
"movq %%xmm12,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm14,%%xmm10 \n"
|
||||
"movdqa %%xmm10,%%xmm14 \n"
|
||||
"movq %%xmm10,(%1) \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"punpckldq %%xmm13,%%xmm9 \n"
|
||||
"movq %%xmm14,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm9,%%xmm13 \n"
|
||||
"movq %%xmm9,(%1) \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movq %%xmm13,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm15,%%xmm11 \n"
|
||||
"movq %%xmm11,(%1) \n"
|
||||
"movdqa %%xmm11,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movq %%xmm15,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"
|
||||
);
|
||||
void TransposeWx8_Fast_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%3),%%xmm1 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqa %%xmm0,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm9 \n"
|
||||
"palignr $0x8,%%xmm1,%%xmm1 \n"
|
||||
"palignr $0x8,%%xmm9,%%xmm9 \n"
|
||||
"movdqu (%0,%3),%%xmm3 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm10 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm10 \n"
|
||||
"movdqa %%xmm2,%%xmm3 \n"
|
||||
"movdqa %%xmm10,%%xmm11 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"movdqu (%0,%3),%%xmm5 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm12 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm12 \n"
|
||||
"movdqa %%xmm4,%%xmm5 \n"
|
||||
"movdqa %%xmm12,%%xmm13 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movdqu (%0,%3),%%xmm7 \n"
|
||||
"lea (%0,%3,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm14 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"punpckhbw %%xmm7,%%xmm14 \n"
|
||||
"neg %3 \n"
|
||||
"movdqa %%xmm6,%%xmm7 \n"
|
||||
"movdqa %%xmm14,%%xmm15 \n"
|
||||
"lea 0x10(%0,%3,8),%0 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"neg %3 \n"
|
||||
// Second round of bit swap.
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm0,%%xmm2 \n"
|
||||
"movdqa %%xmm1,%%xmm3 \n"
|
||||
"palignr $0x8,%%xmm2,%%xmm2 \n"
|
||||
"palignr $0x8,%%xmm3,%%xmm3 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm4,%%xmm6 \n"
|
||||
"movdqa %%xmm5,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"punpcklwd %%xmm10,%%xmm8 \n"
|
||||
"punpcklwd %%xmm11,%%xmm9 \n"
|
||||
"movdqa %%xmm8,%%xmm10 \n"
|
||||
"movdqa %%xmm9,%%xmm11 \n"
|
||||
"palignr $0x8,%%xmm10,%%xmm10 \n"
|
||||
"palignr $0x8,%%xmm11,%%xmm11 \n"
|
||||
"punpcklwd %%xmm14,%%xmm12 \n"
|
||||
"punpcklwd %%xmm15,%%xmm13 \n"
|
||||
"movdqa %%xmm12,%%xmm14 \n"
|
||||
"movdqa %%xmm13,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movq %%xmm0,(%1) \n"
|
||||
"movdqa %%xmm0,%%xmm4 \n"
|
||||
"palignr $0x8,%%xmm4,%%xmm4 \n"
|
||||
"movq %%xmm4,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movdqa %%xmm2,%%xmm6 \n"
|
||||
"movq %%xmm2,(%1) \n"
|
||||
"palignr $0x8,%%xmm6,%%xmm6 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movq %%xmm6,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm1,%%xmm5 \n"
|
||||
"movq %%xmm1,(%1) \n"
|
||||
"palignr $0x8,%%xmm5,%%xmm5 \n"
|
||||
"movq %%xmm5,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movq %%xmm3,(%1) \n"
|
||||
"movdqa %%xmm3,%%xmm7 \n"
|
||||
"palignr $0x8,%%xmm7,%%xmm7 \n"
|
||||
"movq %%xmm7,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm12,%%xmm8 \n"
|
||||
"movq %%xmm8,(%1) \n"
|
||||
"movdqa %%xmm8,%%xmm12 \n"
|
||||
"palignr $0x8,%%xmm12,%%xmm12 \n"
|
||||
"movq %%xmm12,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm14,%%xmm10 \n"
|
||||
"movdqa %%xmm10,%%xmm14 \n"
|
||||
"movq %%xmm10,(%1) \n"
|
||||
"palignr $0x8,%%xmm14,%%xmm14 \n"
|
||||
"punpckldq %%xmm13,%%xmm9 \n"
|
||||
"movq %%xmm14,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"movdqa %%xmm9,%%xmm13 \n"
|
||||
"movq %%xmm9,(%1) \n"
|
||||
"palignr $0x8,%%xmm13,%%xmm13 \n"
|
||||
"movq %%xmm13,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"punpckldq %%xmm15,%%xmm11 \n"
|
||||
"movq %%xmm11,(%1) \n"
|
||||
"movdqa %%xmm11,%%xmm15 \n"
|
||||
"palignr $0x8,%%xmm15,%%xmm15 \n"
|
||||
"sub $0x10,%2 \n"
|
||||
"movq %%xmm15,(%1,%4) \n"
|
||||
"lea (%1,%4,2),%1 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst), // %1
|
||||
"+r"(width) // %2
|
||||
: "r"((intptr_t)(src_stride)), // %3
|
||||
"r"((intptr_t)(dst_stride)) // %4
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14",
|
||||
"xmm15");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3)
|
||||
|
||||
// Transpose UV 8x8. 64 bit.
|
||||
#if defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b, int width) {
|
||||
asm volatile (
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%4),%%xmm1 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqu (%0,%4),%%xmm3 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm3 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"movdqu (%0,%4),%%xmm5 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm5 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4),%%xmm7 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm8 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %4 \n"
|
||||
"lea 0x10(%0,%4,8),%0 \n"
|
||||
"punpckhbw %%xmm7,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm7 \n"
|
||||
"neg %4 \n"
|
||||
// Second round of bit swap.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"movdqa %%xmm1,%%xmm9 \n"
|
||||
"punpckhwd %%xmm2,%%xmm8 \n"
|
||||
"punpckhwd %%xmm3,%%xmm9 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm2 \n"
|
||||
"movdqa %%xmm9,%%xmm3 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"movdqa %%xmm5,%%xmm9 \n"
|
||||
"punpckhwd %%xmm6,%%xmm8 \n"
|
||||
"punpckhwd %%xmm7,%%xmm9 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm8,%%xmm6 \n"
|
||||
"movdqa %%xmm9,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n" // Write back U channel
|
||||
"movhpd %%xmm0,(%2) \n" // Write back V channel
|
||||
"punpckhdq %%xmm4,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movlpd %%xmm2,(%1) \n"
|
||||
"movhpd %%xmm2,(%2) \n"
|
||||
"punpckhdq %%xmm6,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm1,%%xmm8 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movlpd %%xmm1,(%1) \n"
|
||||
"movhpd %%xmm1,(%2) \n"
|
||||
"punpckhdq %%xmm5,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm3,%%xmm8 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movlpd %%xmm3,(%1) \n"
|
||||
"movhpd %%xmm3,(%2) \n"
|
||||
"punpckhdq %%xmm7,%%xmm8 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"((intptr_t)(src_stride)), // %4
|
||||
"r"((intptr_t)(dst_stride_a)), // %5
|
||||
"r"((intptr_t)(dst_stride_b)) // %6
|
||||
: "memory", "cc",
|
||||
"xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
|
||||
"xmm8", "xmm9"
|
||||
);
|
||||
void TransposeUVWx8_SSE2(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
asm volatile(
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
LABELALIGN
|
||||
"1: \n"
|
||||
"movdqu (%0),%%xmm0 \n"
|
||||
"movdqu (%0,%4),%%xmm1 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpcklbw %%xmm1,%%xmm0 \n"
|
||||
"punpckhbw %%xmm1,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm1 \n"
|
||||
"movdqu (%0),%%xmm2 \n"
|
||||
"movdqu (%0,%4),%%xmm3 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpcklbw %%xmm3,%%xmm2 \n"
|
||||
"punpckhbw %%xmm3,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm3 \n"
|
||||
"movdqu (%0),%%xmm4 \n"
|
||||
"movdqu (%0,%4),%%xmm5 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"punpcklbw %%xmm5,%%xmm4 \n"
|
||||
"punpckhbw %%xmm5,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm5 \n"
|
||||
"movdqu (%0),%%xmm6 \n"
|
||||
"movdqu (%0,%4),%%xmm7 \n"
|
||||
"lea (%0,%4,2),%0 \n"
|
||||
"movdqa %%xmm6,%%xmm8 \n"
|
||||
"punpcklbw %%xmm7,%%xmm6 \n"
|
||||
"neg %4 \n"
|
||||
"lea 0x10(%0,%4,8),%0 \n"
|
||||
"punpckhbw %%xmm7,%%xmm8 \n"
|
||||
"movdqa %%xmm8,%%xmm7 \n"
|
||||
"neg %4 \n"
|
||||
// Second round of bit swap.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"movdqa %%xmm1,%%xmm9 \n"
|
||||
"punpckhwd %%xmm2,%%xmm8 \n"
|
||||
"punpckhwd %%xmm3,%%xmm9 \n"
|
||||
"punpcklwd %%xmm2,%%xmm0 \n"
|
||||
"punpcklwd %%xmm3,%%xmm1 \n"
|
||||
"movdqa %%xmm8,%%xmm2 \n"
|
||||
"movdqa %%xmm9,%%xmm3 \n"
|
||||
"movdqa %%xmm4,%%xmm8 \n"
|
||||
"movdqa %%xmm5,%%xmm9 \n"
|
||||
"punpckhwd %%xmm6,%%xmm8 \n"
|
||||
"punpckhwd %%xmm7,%%xmm9 \n"
|
||||
"punpcklwd %%xmm6,%%xmm4 \n"
|
||||
"punpcklwd %%xmm7,%%xmm5 \n"
|
||||
"movdqa %%xmm8,%%xmm6 \n"
|
||||
"movdqa %%xmm9,%%xmm7 \n"
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
"movdqa %%xmm0,%%xmm8 \n"
|
||||
"punpckldq %%xmm4,%%xmm0 \n"
|
||||
"movlpd %%xmm0,(%1) \n" // Write back U channel
|
||||
"movhpd %%xmm0,(%2) \n" // Write back V channel
|
||||
"punpckhdq %%xmm4,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm2,%%xmm8 \n"
|
||||
"punpckldq %%xmm6,%%xmm2 \n"
|
||||
"movlpd %%xmm2,(%1) \n"
|
||||
"movhpd %%xmm2,(%2) \n"
|
||||
"punpckhdq %%xmm6,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm1,%%xmm8 \n"
|
||||
"punpckldq %%xmm5,%%xmm1 \n"
|
||||
"movlpd %%xmm1,(%1) \n"
|
||||
"movhpd %%xmm1,(%2) \n"
|
||||
"punpckhdq %%xmm5,%%xmm8 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"movdqa %%xmm3,%%xmm8 \n"
|
||||
"punpckldq %%xmm7,%%xmm3 \n"
|
||||
"movlpd %%xmm3,(%1) \n"
|
||||
"movhpd %%xmm3,(%2) \n"
|
||||
"punpckhdq %%xmm7,%%xmm8 \n"
|
||||
"sub $0x8,%3 \n"
|
||||
"movlpd %%xmm8,(%1,%5) \n"
|
||||
"lea (%1,%5,2),%1 \n"
|
||||
"movhpd %%xmm8,(%2,%6) \n"
|
||||
"lea (%2,%6,2),%2 \n"
|
||||
"jg 1b \n"
|
||||
: "+r"(src), // %0
|
||||
"+r"(dst_a), // %1
|
||||
"+r"(dst_b), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"((intptr_t)(src_stride)), // %4
|
||||
"r"((intptr_t)(dst_stride_a)), // %5
|
||||
"r"((intptr_t)(dst_stride_b)) // %6
|
||||
: "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6",
|
||||
"xmm7", "xmm8", "xmm9");
|
||||
}
|
||||
#endif // defined(HAS_TRANSPOSEUVWX8_SSE2)
|
||||
#endif // defined(__x86_64__) || defined(__i386__)
|
||||
|
||||
@@ -1,484 +0,0 @@
|
||||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/rotate_row.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void TransposeWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"addu $t3, $t2, %[src_stride] \n"
|
||||
"addu $t5, $t4, %[src_stride] \n"
|
||||
"addu $t6, $t2, $t4 \n"
|
||||
"andi $t0, %[dst], 0x3 \n"
|
||||
"andi $t1, %[dst_stride], 0x3 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" subu $t7, $t9, %[src_stride] \n"
|
||||
//dst + dst_stride word aligned
|
||||
"1: \n"
|
||||
"lbu $t0, 0(%[src]) \n"
|
||||
"lbux $t1, %[src_stride](%[src]) \n"
|
||||
"lbux $t8, $t2(%[src]) \n"
|
||||
"lbux $t9, $t3(%[src]) \n"
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"or $t8, $t8, $t9 \n"
|
||||
"precr.qb.ph $s0, $t8, $t0 \n"
|
||||
"lbux $t0, $t4(%[src]) \n"
|
||||
"lbux $t1, $t5(%[src]) \n"
|
||||
"lbux $t8, $t6(%[src]) \n"
|
||||
"lbux $t9, $t7(%[src]) \n"
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"or $t8, $t8, $t9 \n"
|
||||
"precr.qb.ph $s1, $t8, $t0 \n"
|
||||
"sw $s0, 0(%[dst]) \n"
|
||||
"addiu %[width], -1 \n"
|
||||
"addiu %[src], 1 \n"
|
||||
"sw $s1, 4(%[dst]) \n"
|
||||
"bnez %[width], 1b \n"
|
||||
" addu %[dst], %[dst], %[dst_stride] \n"
|
||||
"b 2f \n"
|
||||
//dst + dst_stride unaligned
|
||||
"11: \n"
|
||||
"lbu $t0, 0(%[src]) \n"
|
||||
"lbux $t1, %[src_stride](%[src]) \n"
|
||||
"lbux $t8, $t2(%[src]) \n"
|
||||
"lbux $t9, $t3(%[src]) \n"
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"or $t8, $t8, $t9 \n"
|
||||
"precr.qb.ph $s0, $t8, $t0 \n"
|
||||
"lbux $t0, $t4(%[src]) \n"
|
||||
"lbux $t1, $t5(%[src]) \n"
|
||||
"lbux $t8, $t6(%[src]) \n"
|
||||
"lbux $t9, $t7(%[src]) \n"
|
||||
"sll $t1, $t1, 16 \n"
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"or $t8, $t8, $t9 \n"
|
||||
"precr.qb.ph $s1, $t8, $t0 \n"
|
||||
"swr $s0, 0(%[dst]) \n"
|
||||
"swl $s0, 3(%[dst]) \n"
|
||||
"addiu %[width], -1 \n"
|
||||
"addiu %[src], 1 \n"
|
||||
"swr $s1, 4(%[dst]) \n"
|
||||
"swl $s1, 7(%[dst]) \n"
|
||||
"bnez %[width], 11b \n"
|
||||
"addu %[dst], %[dst], %[dst_stride] \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
:[src] "+r" (src),
|
||||
[dst] "+r" (dst),
|
||||
[width] "+r" (width)
|
||||
:[src_stride] "r" (src_stride),
|
||||
[dst_stride] "r" (dst_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1"
|
||||
);
|
||||
}
|
||||
|
||||
void TransposeWx8_Fast_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set noat \n"
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"addu $t3, $t2, %[src_stride] \n"
|
||||
"addu $t5, $t4, %[src_stride] \n"
|
||||
"addu $t6, $t2, $t4 \n"
|
||||
|
||||
"srl $AT, %[width], 0x2 \n"
|
||||
"andi $t0, %[dst], 0x3 \n"
|
||||
"andi $t1, %[dst_stride], 0x3 \n"
|
||||
"or $t0, $t0, $t1 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" subu $t7, $t9, %[src_stride] \n"
|
||||
//dst + dst_stride word aligned
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src]) \n"
|
||||
"lwx $t1, %[src_stride](%[src]) \n"
|
||||
"lwx $t8, $t2(%[src]) \n"
|
||||
"lwx $t9, $t3(%[src]) \n"
|
||||
|
||||
// t0 = | 30 | 20 | 10 | 00 |
|
||||
// t1 = | 31 | 21 | 11 | 01 |
|
||||
// t8 = | 32 | 22 | 12 | 02 |
|
||||
// t9 = | 33 | 23 | 13 | 03 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 21 | 01 | 20 | 00 |
|
||||
// s1 = | 23 | 03 | 22 | 02 |
|
||||
// s2 = | 31 | 11 | 30 | 10 |
|
||||
// s3 = | 33 | 13 | 32 | 12 |
|
||||
|
||||
"precr.qb.ph $s4, $s1, $s0 \n"
|
||||
"precrq.qb.ph $s5, $s1, $s0 \n"
|
||||
"precr.qb.ph $s6, $s3, $s2 \n"
|
||||
"precrq.qb.ph $s7, $s3, $s2 \n"
|
||||
|
||||
// s4 = | 03 | 02 | 01 | 00 |
|
||||
// s5 = | 23 | 22 | 21 | 20 |
|
||||
// s6 = | 13 | 12 | 11 | 10 |
|
||||
// s7 = | 33 | 32 | 31 | 30 |
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n"
|
||||
"lwx $t1, $t5(%[src]) \n"
|
||||
"lwx $t8, $t6(%[src]) \n"
|
||||
"lwx $t9, $t7(%[src]) \n"
|
||||
|
||||
// t0 = | 34 | 24 | 14 | 04 |
|
||||
// t1 = | 35 | 25 | 15 | 05 |
|
||||
// t8 = | 36 | 26 | 16 | 06 |
|
||||
// t9 = | 37 | 27 | 17 | 07 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 25 | 05 | 24 | 04 |
|
||||
// s1 = | 27 | 07 | 26 | 06 |
|
||||
// s2 = | 35 | 15 | 34 | 14 |
|
||||
// s3 = | 37 | 17 | 36 | 16 |
|
||||
|
||||
"precr.qb.ph $t0, $s1, $s0 \n"
|
||||
"precrq.qb.ph $t1, $s1, $s0 \n"
|
||||
"precr.qb.ph $t8, $s3, $s2 \n"
|
||||
"precrq.qb.ph $t9, $s3, $s2 \n"
|
||||
|
||||
// t0 = | 07 | 06 | 05 | 04 |
|
||||
// t1 = | 27 | 26 | 25 | 24 |
|
||||
// t8 = | 17 | 16 | 15 | 14 |
|
||||
// t9 = | 37 | 36 | 35 | 34 |
|
||||
|
||||
"addu $s0, %[dst], %[dst_stride] \n"
|
||||
"addu $s1, $s0, %[dst_stride] \n"
|
||||
"addu $s2, $s1, %[dst_stride] \n"
|
||||
|
||||
"sw $s4, 0(%[dst]) \n"
|
||||
"sw $t0, 4(%[dst]) \n"
|
||||
"sw $s6, 0($s0) \n"
|
||||
"sw $t8, 4($s0) \n"
|
||||
"sw $s5, 0($s1) \n"
|
||||
"sw $t1, 4($s1) \n"
|
||||
"sw $s7, 0($s2) \n"
|
||||
"sw $t9, 4($s2) \n"
|
||||
|
||||
"addiu $AT, -1 \n"
|
||||
"addiu %[src], 4 \n"
|
||||
|
||||
"bnez $AT, 1b \n"
|
||||
" addu %[dst], $s2, %[dst_stride] \n"
|
||||
"b 2f \n"
|
||||
//dst + dst_stride unaligned
|
||||
"11: \n"
|
||||
"lw $t0, 0(%[src]) \n"
|
||||
"lwx $t1, %[src_stride](%[src]) \n"
|
||||
"lwx $t8, $t2(%[src]) \n"
|
||||
"lwx $t9, $t3(%[src]) \n"
|
||||
|
||||
// t0 = | 30 | 20 | 10 | 00 |
|
||||
// t1 = | 31 | 21 | 11 | 01 |
|
||||
// t8 = | 32 | 22 | 12 | 02 |
|
||||
// t9 = | 33 | 23 | 13 | 03 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 21 | 01 | 20 | 00 |
|
||||
// s1 = | 23 | 03 | 22 | 02 |
|
||||
// s2 = | 31 | 11 | 30 | 10 |
|
||||
// s3 = | 33 | 13 | 32 | 12 |
|
||||
|
||||
"precr.qb.ph $s4, $s1, $s0 \n"
|
||||
"precrq.qb.ph $s5, $s1, $s0 \n"
|
||||
"precr.qb.ph $s6, $s3, $s2 \n"
|
||||
"precrq.qb.ph $s7, $s3, $s2 \n"
|
||||
|
||||
// s4 = | 03 | 02 | 01 | 00 |
|
||||
// s5 = | 23 | 22 | 21 | 20 |
|
||||
// s6 = | 13 | 12 | 11 | 10 |
|
||||
// s7 = | 33 | 32 | 31 | 30 |
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n"
|
||||
"lwx $t1, $t5(%[src]) \n"
|
||||
"lwx $t8, $t6(%[src]) \n"
|
||||
"lwx $t9, $t7(%[src]) \n"
|
||||
|
||||
// t0 = | 34 | 24 | 14 | 04 |
|
||||
// t1 = | 35 | 25 | 15 | 05 |
|
||||
// t8 = | 36 | 26 | 16 | 06 |
|
||||
// t9 = | 37 | 27 | 17 | 07 |
|
||||
|
||||
"precr.qb.ph $s0, $t1, $t0 \n"
|
||||
"precr.qb.ph $s1, $t9, $t8 \n"
|
||||
"precrq.qb.ph $s2, $t1, $t0 \n"
|
||||
"precrq.qb.ph $s3, $t9, $t8 \n"
|
||||
|
||||
// s0 = | 25 | 05 | 24 | 04 |
|
||||
// s1 = | 27 | 07 | 26 | 06 |
|
||||
// s2 = | 35 | 15 | 34 | 14 |
|
||||
// s3 = | 37 | 17 | 36 | 16 |
|
||||
|
||||
"precr.qb.ph $t0, $s1, $s0 \n"
|
||||
"precrq.qb.ph $t1, $s1, $s0 \n"
|
||||
"precr.qb.ph $t8, $s3, $s2 \n"
|
||||
"precrq.qb.ph $t9, $s3, $s2 \n"
|
||||
|
||||
// t0 = | 07 | 06 | 05 | 04 |
|
||||
// t1 = | 27 | 26 | 25 | 24 |
|
||||
// t8 = | 17 | 16 | 15 | 14 |
|
||||
// t9 = | 37 | 36 | 35 | 34 |
|
||||
|
||||
"addu $s0, %[dst], %[dst_stride] \n"
|
||||
"addu $s1, $s0, %[dst_stride] \n"
|
||||
"addu $s2, $s1, %[dst_stride] \n"
|
||||
|
||||
"swr $s4, 0(%[dst]) \n"
|
||||
"swl $s4, 3(%[dst]) \n"
|
||||
"swr $t0, 4(%[dst]) \n"
|
||||
"swl $t0, 7(%[dst]) \n"
|
||||
"swr $s6, 0($s0) \n"
|
||||
"swl $s6, 3($s0) \n"
|
||||
"swr $t8, 4($s0) \n"
|
||||
"swl $t8, 7($s0) \n"
|
||||
"swr $s5, 0($s1) \n"
|
||||
"swl $s5, 3($s1) \n"
|
||||
"swr $t1, 4($s1) \n"
|
||||
"swl $t1, 7($s1) \n"
|
||||
"swr $s7, 0($s2) \n"
|
||||
"swl $s7, 3($s2) \n"
|
||||
"swr $t9, 4($s2) \n"
|
||||
"swl $t9, 7($s2) \n"
|
||||
|
||||
"addiu $AT, -1 \n"
|
||||
"addiu %[src], 4 \n"
|
||||
|
||||
"bnez $AT, 11b \n"
|
||||
" addu %[dst], $s2, %[dst_stride] \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
".set at \n"
|
||||
:[src] "+r" (src),
|
||||
[dst] "+r" (dst),
|
||||
[width] "+r" (width)
|
||||
:[src_stride] "r" (src_stride),
|
||||
[dst_stride] "r" (dst_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7"
|
||||
);
|
||||
}
|
||||
|
||||
void TransposeUVWx8_DSPR2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" sll $t2, %[src_stride], 0x1 \n" // src_stride x 2
|
||||
"sll $t4, %[src_stride], 0x2 \n" // src_stride x 4
|
||||
"sll $t9, %[src_stride], 0x3 \n" // src_stride x 8
|
||||
"addu $t3, $t2, %[src_stride] \n"
|
||||
"addu $t5, $t4, %[src_stride] \n"
|
||||
"addu $t6, $t2, $t4 \n"
|
||||
"subu $t7, $t9, %[src_stride] \n"
|
||||
"srl $t1, %[width], 1 \n"
|
||||
|
||||
// check word aligment for dst_a, dst_b, dst_stride_a and dst_stride_b
|
||||
"andi $t0, %[dst_a], 0x3 \n"
|
||||
"andi $t8, %[dst_b], 0x3 \n"
|
||||
"or $t0, $t0, $t8 \n"
|
||||
"andi $t8, %[dst_stride_a], 0x3 \n"
|
||||
"andi $s5, %[dst_stride_b], 0x3 \n"
|
||||
"or $t8, $t8, $s5 \n"
|
||||
"or $t0, $t0, $t8 \n"
|
||||
"bnez $t0, 11f \n"
|
||||
" nop \n"
|
||||
// dst + dst_stride word aligned (both, a & b dst addresses)
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
||||
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
||||
"addu $s5, %[dst_a], %[dst_stride_a] \n"
|
||||
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
||||
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
||||
"addu $s6, %[dst_b], %[dst_stride_b] \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
||||
|
||||
"sw $s3, 0($s5) \n"
|
||||
"sw $s4, 0($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
||||
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
||||
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
||||
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
||||
"sw $s3, 0(%[dst_a]) \n"
|
||||
"sw $s4, 0(%[dst_b]) \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
||||
"sw $s3, 4($s5) \n"
|
||||
"sw $s4, 4($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
||||
|
||||
"addiu %[src], 4 \n"
|
||||
"addiu $t1, -1 \n"
|
||||
"sll $t0, %[dst_stride_a], 1 \n"
|
||||
"sll $t8, %[dst_stride_b], 1 \n"
|
||||
"sw $s3, 4(%[dst_a]) \n"
|
||||
"sw $s4, 4(%[dst_b]) \n"
|
||||
"addu %[dst_a], %[dst_a], $t0 \n"
|
||||
"bnez $t1, 1b \n"
|
||||
" addu %[dst_b], %[dst_b], $t8 \n"
|
||||
"b 2f \n"
|
||||
" nop \n"
|
||||
|
||||
// dst_a or dst_b or dst_stride_a or dst_stride_b not word aligned
|
||||
"11: \n"
|
||||
"lw $t0, 0(%[src]) \n" // |B0|A0|b0|a0|
|
||||
"lwx $t8, %[src_stride](%[src]) \n" // |B1|A1|b1|a1|
|
||||
"addu $s5, %[dst_a], %[dst_stride_a] \n"
|
||||
"lwx $t9, $t2(%[src]) \n" // |B2|A2|b2|a2|
|
||||
"lwx $s0, $t3(%[src]) \n" // |B3|A3|b3|a3|
|
||||
"addu $s6, %[dst_b], %[dst_stride_b] \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B1|A1|B0|A0|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B3|A3|B2|A2|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A3|A2|A1|A0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B3|B2|B1|B0|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b1|a1|b0|a0|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b3|a3|b2|a2|
|
||||
|
||||
"swr $s3, 0($s5) \n"
|
||||
"swl $s3, 3($s5) \n"
|
||||
"swr $s4, 0($s6) \n"
|
||||
"swl $s4, 3($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a3|a2|a1|a0|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b3|b2|b1|b0|
|
||||
|
||||
"lwx $t0, $t4(%[src]) \n" // |B4|A4|b4|a4|
|
||||
"lwx $t8, $t5(%[src]) \n" // |B5|A5|b5|a5|
|
||||
"lwx $t9, $t6(%[src]) \n" // |B6|A6|b6|a6|
|
||||
"lwx $s0, $t7(%[src]) \n" // |B7|A7|b7|a7|
|
||||
"swr $s3, 0(%[dst_a]) \n"
|
||||
"swl $s3, 3(%[dst_a]) \n"
|
||||
"swr $s4, 0(%[dst_b]) \n"
|
||||
"swl $s4, 3(%[dst_b]) \n"
|
||||
|
||||
"precrq.ph.w $s1, $t8, $t0 \n" // |B5|A5|B4|A4|
|
||||
"precrq.ph.w $s2, $s0, $t9 \n" // |B6|A6|B7|A7|
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |A7|A6|A5|A4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |B7|B6|B5|B4|
|
||||
|
||||
"sll $t0, $t0, 16 \n"
|
||||
"packrl.ph $s1, $t8, $t0 \n" // |b5|a5|b4|a4|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"packrl.ph $s2, $s0, $t9 \n" // |b7|a7|b6|a6|
|
||||
|
||||
"swr $s3, 4($s5) \n"
|
||||
"swl $s3, 7($s5) \n"
|
||||
"swr $s4, 4($s6) \n"
|
||||
"swl $s4, 7($s6) \n"
|
||||
|
||||
"precr.qb.ph $s3, $s2, $s1 \n" // |a7|a6|a5|a4|
|
||||
"precrq.qb.ph $s4, $s2, $s1 \n" // |b7|b6|b5|b4|
|
||||
|
||||
"addiu %[src], 4 \n"
|
||||
"addiu $t1, -1 \n"
|
||||
"sll $t0, %[dst_stride_a], 1 \n"
|
||||
"sll $t8, %[dst_stride_b], 1 \n"
|
||||
"swr $s3, 4(%[dst_a]) \n"
|
||||
"swl $s3, 7(%[dst_a]) \n"
|
||||
"swr $s4, 4(%[dst_b]) \n"
|
||||
"swl $s4, 7(%[dst_b]) \n"
|
||||
"addu %[dst_a], %[dst_a], $t0 \n"
|
||||
"bnez $t1, 11b \n"
|
||||
" addu %[dst_b], %[dst_b], $t8 \n"
|
||||
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
: [src] "+r" (src),
|
||||
[dst_a] "+r" (dst_a),
|
||||
[dst_b] "+r" (dst_b),
|
||||
[width] "+r" (width),
|
||||
[src_stride] "+r" (src_stride)
|
||||
: [dst_stride_a] "r" (dst_stride_a),
|
||||
[dst_stride_b] "r" (dst_stride_b)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3",
|
||||
"s4", "s5", "s6"
|
||||
);
|
||||
}
|
||||
|
||||
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
@@ -0,0 +1,291 @@
|
||||
/*
|
||||
* Copyright 2011 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for Mips MMI.
|
||||
#if !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
|
||||
void TransposeWx8_MMI(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
|
||||
uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
|
||||
uint8_t* src_tmp = nullptr;
|
||||
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
"ldc1 %[tmp12], 0x00(%[src]) \n\t"
|
||||
"dadd %[src_tmp], %[src], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp0 = (00 10 01 11 02 12 03 13) */
|
||||
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp1 = (04 14 05 15 06 16 07 17) */
|
||||
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp2 = (20 30 21 31 22 32 23 33) */
|
||||
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp3 = (24 34 25 35 26 36 27 37) */
|
||||
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
/* tmp4 = (00 10 20 30 01 11 21 31) */
|
||||
"punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp5 = (02 12 22 32 03 13 23 33) */
|
||||
"punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp6 = (04 14 24 34 05 15 25 35) */
|
||||
"punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
|
||||
/* tmp7 = (06 16 26 36 07 17 27 37) */
|
||||
"punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp0 = (40 50 41 51 42 52 43 53) */
|
||||
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp1 = (44 54 45 55 46 56 47 57) */
|
||||
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp2 = (60 70 61 71 62 72 63 73) */
|
||||
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp3 = (64 74 65 75 66 76 67 77) */
|
||||
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
/* tmp8 = (40 50 60 70 41 51 61 71) */
|
||||
"punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp9 = (42 52 62 72 43 53 63 73) */
|
||||
"punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp10 = (44 54 64 74 45 55 65 75) */
|
||||
"punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
|
||||
/* tmp11 = (46 56 66 76 47 57 67 77) */
|
||||
"punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
|
||||
|
||||
/* tmp0 = (00 10 20 30 40 50 60 70) */
|
||||
"punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
|
||||
/* tmp1 = (01 11 21 31 41 51 61 71) */
|
||||
"punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
|
||||
|
||||
/* tmp0 = (02 12 22 32 42 52 62 72) */
|
||||
"punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
|
||||
/* tmp1 = (03 13 23 33 43 53 63 73) */
|
||||
"punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
|
||||
|
||||
/* tmp0 = (04 14 24 34 44 54 64 74) */
|
||||
"punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
|
||||
/* tmp1 = (05 15 25 35 45 55 65 75) */
|
||||
"punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
|
||||
|
||||
/* tmp0 = (06 16 26 36 46 56 66 76) */
|
||||
"punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
|
||||
/* tmp1 = (07 17 27 37 47 57 67 77) */
|
||||
"punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst]) \n\t"
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst]) \n\t"
|
||||
|
||||
"dadd %[dst], %[dst], %[dst_stride] \n\t"
|
||||
"daddi %[src], %[src], 0x08 \n\t"
|
||||
"daddi %[width], %[width], -0x08 \n\t"
|
||||
"bnez %[width], 1b \n\t"
|
||||
|
||||
: [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
|
||||
[tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
|
||||
[tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
|
||||
[tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
|
||||
[tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst] "+&r"(dst),
|
||||
[src_tmp] "+&r"(src_tmp)
|
||||
: [src] "r"(src), [width] "r"(width), [src_stride] "r"(src_stride),
|
||||
[dst_stride] "r"(dst_stride)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
void TransposeUVWx8_MMI(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
uint64_t tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
|
||||
uint64_t tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13;
|
||||
uint8_t* src_tmp = nullptr;
|
||||
|
||||
__asm__ volatile(
|
||||
"1: \n\t"
|
||||
/* tmp12 = (u00 v00 u01 v01 u02 v02 u03 v03) */
|
||||
"ldc1 %[tmp12], 0x00(%[src]) \n\t"
|
||||
"dadd %[src_tmp], %[src], %[src_stride] \n\t"
|
||||
/* tmp13 = (u10 v10 u11 v11 u12 v12 u13 v13) */
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp0 = (u00 u10 v00 v10 u01 u11 v01 v11) */
|
||||
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp1 = (u02 u12 v02 v12 u03 u13 v03 v13) */
|
||||
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
/* tmp12 = (u20 v20 u21 v21 u22 v22 u23 v23) */
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
/* tmp13 = (u30 v30 u31 v31 u32 v32 u33 v33) */
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp2 = (u20 u30 v20 v30 u21 u31 v21 v31) */
|
||||
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp3 = (u22 u32 v22 v32 u23 u33 v23 v33) */
|
||||
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
/* tmp4 = (u00 u10 u20 u30 v00 v10 v20 v30) */
|
||||
"punpcklhw %[tmp4], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp5 = (u01 u11 u21 u31 v01 v11 v21 v31) */
|
||||
"punpckhhw %[tmp5], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp6 = (u02 u12 u22 u32 v02 v12 v22 v32) */
|
||||
"punpcklhw %[tmp6], %[tmp1], %[tmp3] \n\t"
|
||||
/* tmp7 = (u03 u13 u23 u33 v03 v13 v23 v33) */
|
||||
"punpckhhw %[tmp7], %[tmp1], %[tmp3] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
/* tmp12 = (u40 v40 u41 v41 u42 v42 u43 v43) */
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
/* tmp13 = (u50 v50 u51 v51 u52 v52 u53 v53) */
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp0 = (u40 u50 v40 v50 u41 u51 v41 v51) */
|
||||
"punpcklbh %[tmp0], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp1 = (u42 u52 v42 v52 u43 u53 v43 v53) */
|
||||
"punpckhbh %[tmp1], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
/* tmp12 = (u60 v60 u61 v61 u62 v62 u63 v63) */
|
||||
"ldc1 %[tmp12], 0x00(%[src_tmp]) \n\t"
|
||||
/* tmp13 = (u70 v70 u71 v71 u72 v72 u73 v73) */
|
||||
"dadd %[src_tmp], %[src_tmp], %[src_stride] \n\t"
|
||||
"ldc1 %[tmp13], 0x00(%[src_tmp]) \n\t"
|
||||
|
||||
/* tmp2 = (u60 u70 v60 v70 u61 u71 v61 v71) */
|
||||
"punpcklbh %[tmp2], %[tmp12], %[tmp13] \n\t"
|
||||
/* tmp3 = (u62 u72 v62 v72 u63 u73 v63 v73) */
|
||||
"punpckhbh %[tmp3], %[tmp12], %[tmp13] \n\t"
|
||||
|
||||
/* tmp8 = (u40 u50 u60 u70 v40 v50 v60 v70) */
|
||||
"punpcklhw %[tmp8], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp9 = (u41 u51 u61 u71 v41 v51 v61 v71) */
|
||||
"punpckhhw %[tmp9], %[tmp0], %[tmp2] \n\t"
|
||||
/* tmp10 = (u42 u52 u62 u72 v42 v52 v62 v72) */
|
||||
"punpcklhw %[tmp10], %[tmp1], %[tmp3] \n\t"
|
||||
/* tmp11 = (u43 u53 u63 u73 v43 v53 v63 v73) */
|
||||
"punpckhhw %[tmp11], %[tmp1], %[tmp3] \n\t"
|
||||
|
||||
/* tmp0 = (u00 u10 u20 u30 u40 u50 u60 u70) */
|
||||
"punpcklwd %[tmp0], %[tmp4], %[tmp8] \n\t"
|
||||
/* tmp1 = (v00 v10 v20 v30 v40 v50 v60 v70) */
|
||||
"punpckhwd %[tmp1], %[tmp4], %[tmp8] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
|
||||
|
||||
/* tmp0 = (u01 u11 u21 u31 u41 u51 u61 u71) */
|
||||
"punpcklwd %[tmp0], %[tmp5], %[tmp9] \n\t"
|
||||
/* tmp1 = (v01 v11 v21 v31 v41 v51 v61 v71) */
|
||||
"punpckhwd %[tmp1], %[tmp5], %[tmp9] \n\t"
|
||||
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
|
||||
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
|
||||
|
||||
/* tmp0 = (u02 u12 u22 u32 u42 u52 u62 u72) */
|
||||
"punpcklwd %[tmp0], %[tmp6], %[tmp10] \n\t"
|
||||
/* tmp1 = (v02 v12 v22 v32 v42 v52 v62 v72) */
|
||||
"punpckhwd %[tmp1], %[tmp6], %[tmp10] \n\t"
|
||||
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
|
||||
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
|
||||
|
||||
/* tmp0 = (u03 u13 u23 u33 u43 u53 u63 u73) */
|
||||
"punpcklwd %[tmp0], %[tmp7], %[tmp11] \n\t"
|
||||
/* tmp1 = (v03 v13 v23 v33 v43 v53 v63 v73) */
|
||||
"punpckhwd %[tmp1], %[tmp7], %[tmp11] \n\t"
|
||||
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
|
||||
"gssdlc1 %[tmp0], 0x07(%[dst_a]) \n\t"
|
||||
"gssdrc1 %[tmp0], 0x00(%[dst_a]) \n\t"
|
||||
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
|
||||
"gssdlc1 %[tmp1], 0x07(%[dst_b]) \n\t"
|
||||
"gssdrc1 %[tmp1], 0x00(%[dst_b]) \n\t"
|
||||
|
||||
"dadd %[dst_a], %[dst_a], %[dst_stride_a] \n\t"
|
||||
"dadd %[dst_b], %[dst_b], %[dst_stride_b] \n\t"
|
||||
"daddiu %[src], %[src], 0x08 \n\t"
|
||||
"daddi %[width], %[width], -0x04 \n\t"
|
||||
"bnez %[width], 1b \n\t"
|
||||
|
||||
: [tmp0] "=&f"(tmp0), [tmp1] "=&f"(tmp1), [tmp2] "=&f"(tmp2),
|
||||
[tmp3] "=&f"(tmp3), [tmp4] "=&f"(tmp4), [tmp5] "=&f"(tmp5),
|
||||
[tmp6] "=&f"(tmp6), [tmp7] "=&f"(tmp7), [tmp8] "=&f"(tmp8),
|
||||
[tmp9] "=&f"(tmp9), [tmp10] "=&f"(tmp10), [tmp11] "=&f"(tmp11),
|
||||
[tmp12] "=&f"(tmp12), [tmp13] "=&f"(tmp13), [dst_a] "+&r"(dst_a),
|
||||
[dst_b] "+&r"(dst_b), [src_tmp] "+&r"(src_tmp)
|
||||
: [src] "r"(src), [width] "r"(width), [dst_stride_a] "r"(dst_stride_a),
|
||||
[dst_stride_b] "r"(dst_stride_b), [src_stride] "r"(src_stride)
|
||||
: "memory");
|
||||
}
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MMI) && defined(_MIPS_ARCH_LOONGSON3A)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
@@ -0,0 +1,250 @@
|
||||
/*
|
||||
* Copyright 2016 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/rotate_row.h"
|
||||
|
||||
// This module is for GCC MSA
|
||||
#if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
#include "libyuv/macros_msa.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ILVRL_B(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
out0 = (v16u8)__msa_ilvr_b((v16i8)in1, (v16i8)in0); \
|
||||
out1 = (v16u8)__msa_ilvl_b((v16i8)in1, (v16i8)in0); \
|
||||
out2 = (v16u8)__msa_ilvr_b((v16i8)in3, (v16i8)in2); \
|
||||
out3 = (v16u8)__msa_ilvl_b((v16i8)in3, (v16i8)in2); \
|
||||
}
|
||||
|
||||
#define ILVRL_H(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
out0 = (v16u8)__msa_ilvr_h((v8i16)in1, (v8i16)in0); \
|
||||
out1 = (v16u8)__msa_ilvl_h((v8i16)in1, (v8i16)in0); \
|
||||
out2 = (v16u8)__msa_ilvr_h((v8i16)in3, (v8i16)in2); \
|
||||
out3 = (v16u8)__msa_ilvl_h((v8i16)in3, (v8i16)in2); \
|
||||
}
|
||||
|
||||
#define ILVRL_W(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
out0 = (v16u8)__msa_ilvr_w((v4i32)in1, (v4i32)in0); \
|
||||
out1 = (v16u8)__msa_ilvl_w((v4i32)in1, (v4i32)in0); \
|
||||
out2 = (v16u8)__msa_ilvr_w((v4i32)in3, (v4i32)in2); \
|
||||
out3 = (v16u8)__msa_ilvl_w((v4i32)in3, (v4i32)in2); \
|
||||
}
|
||||
|
||||
#define ILVRL_D(in0, in1, in2, in3, out0, out1, out2, out3) \
|
||||
{ \
|
||||
out0 = (v16u8)__msa_ilvr_d((v2i64)in1, (v2i64)in0); \
|
||||
out1 = (v16u8)__msa_ilvl_d((v2i64)in1, (v2i64)in0); \
|
||||
out2 = (v16u8)__msa_ilvr_d((v2i64)in3, (v2i64)in2); \
|
||||
out3 = (v16u8)__msa_ilvl_d((v2i64)in3, (v2i64)in2); \
|
||||
}
|
||||
|
||||
void TransposeWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
TransposeWx8_C(src, src_stride, dst, dst_stride, width);
|
||||
TransposeWx8_C((src + 8 * src_stride), src_stride, (dst + 8), dst_stride,
|
||||
width);
|
||||
}
|
||||
|
||||
void TransposeUVWx16_C(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b,
|
||||
width);
|
||||
TransposeUVWx8_C((src + 8 * src_stride), src_stride, (dst_a + 8),
|
||||
dst_stride_a, (dst_b + 8), dst_stride_b, width);
|
||||
}
|
||||
|
||||
void TransposeWx16_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
int x;
|
||||
const uint8_t* s;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
|
||||
v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
|
||||
|
||||
for (x = 0; x < width; x += 16) {
|
||||
s = src;
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
|
||||
ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
|
||||
ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0);
|
||||
ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
|
||||
dst += dst_stride * 4;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1);
|
||||
ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
|
||||
dst += dst_stride * 4;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2);
|
||||
ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
|
||||
dst += dst_stride * 4;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3);
|
||||
ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride);
|
||||
src += 16;
|
||||
dst += dst_stride * 4;
|
||||
}
|
||||
}
|
||||
|
||||
void TransposeUVWx16_MSA(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
int x;
|
||||
const uint8_t* s;
|
||||
v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3;
|
||||
v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7;
|
||||
v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9;
|
||||
|
||||
for (x = 0; x < width; x += 8) {
|
||||
s = src;
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
|
||||
ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3);
|
||||
ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3);
|
||||
src0 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src1 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src2 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
src3 = (v16u8)__msa_ld_b((v16i8*)s, 0);
|
||||
s += src_stride;
|
||||
ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3);
|
||||
ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7);
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0);
|
||||
ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB2(dst0, dst2, dst_a, dst_stride_a);
|
||||
ST_UB2(dst1, dst3, dst_b, dst_stride_b);
|
||||
dst_a += dst_stride_a * 2;
|
||||
dst_b += dst_stride_b * 2;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1);
|
||||
ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB2(dst0, dst2, dst_a, dst_stride_a);
|
||||
ST_UB2(dst1, dst3, dst_b, dst_stride_b);
|
||||
dst_a += dst_stride_a * 2;
|
||||
dst_b += dst_stride_b * 2;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2);
|
||||
ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB2(dst0, dst2, dst_a, dst_stride_a);
|
||||
ST_UB2(dst1, dst3, dst_b, dst_stride_b);
|
||||
dst_a += dst_stride_a * 2;
|
||||
dst_b += dst_stride_b * 2;
|
||||
res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3);
|
||||
res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3);
|
||||
ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3);
|
||||
ST_UB2(dst0, dst2, dst_a, dst_stride_a);
|
||||
ST_UB2(dst1, dst3, dst_b, dst_stride_b);
|
||||
src += 16;
|
||||
dst_a += dst_stride_a * 2;
|
||||
dst_b += dst_stride_b * 2;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
#endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa)
|
||||
+225
-342
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
@@ -21,38 +21,32 @@ extern "C" {
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \
|
||||
!defined(__aarch64__)
|
||||
|
||||
static uvec8 kVTbl4x4Transpose =
|
||||
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
|
||||
static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
|
||||
2, 6, 10, 14, 3, 7, 11, 15};
|
||||
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride,
|
||||
void TransposeWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
const uint8* src_temp;
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %5, #8 \n"
|
||||
const uint8_t* src_temp;
|
||||
asm volatile(
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %5, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d0}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d1}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d2}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d3}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d4}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d5}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d6}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.8 {d7}, [%0] \n"
|
||||
|
||||
"vtrn.8 d1, d0 \n"
|
||||
@@ -77,21 +71,13 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d1}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d0}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d3}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d2}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d5}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d4}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d7}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d6}, [%0] \n"
|
||||
|
||||
"add %1, #8 \n" // src += 8
|
||||
@@ -99,180 +85,138 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
"subs %5, #8 \n" // w -= 8
|
||||
"bge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %5, #8 \n"
|
||||
"beq 4f \n"
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %5, #8 \n"
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %5, #2 \n"
|
||||
"blt 3f \n"
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %5, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
"cmp %5, #4 \n"
|
||||
"blt 2f \n"
|
||||
"cmp %5, #4 \n"
|
||||
"blt 2f \n"
|
||||
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d0[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d0[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d1[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d1[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d2[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d2[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d3[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.32 {d3[1]}, [%0] \n"
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
"vld1.32 {d0[0]}, [%0], %2 \n"
|
||||
"vld1.32 {d0[1]}, [%0], %2 \n"
|
||||
"vld1.32 {d1[0]}, [%0], %2 \n"
|
||||
"vld1.32 {d1[1]}, [%0], %2 \n"
|
||||
"vld1.32 {d2[0]}, [%0], %2 \n"
|
||||
"vld1.32 {d2[1]}, [%0], %2 \n"
|
||||
"vld1.32 {d3[0]}, [%0], %2 \n"
|
||||
"vld1.32 {d3[1]}, [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(6)
|
||||
"vld1.8 {q3}, [%6] \n"
|
||||
"vld1.8 {q3}, [%6] \n"
|
||||
|
||||
"vtbl.8 d4, {d0, d1}, d6 \n"
|
||||
"vtbl.8 d5, {d0, d1}, d7 \n"
|
||||
"vtbl.8 d0, {d2, d3}, d6 \n"
|
||||
"vtbl.8 d1, {d2, d3}, d7 \n"
|
||||
"vtbl.8 d4, {d0, d1}, d6 \n"
|
||||
"vtbl.8 d5, {d0, d1}, d7 \n"
|
||||
"vtbl.8 d0, {d2, d3}, d6 \n"
|
||||
"vtbl.8 d1, {d2, d3}, d7 \n"
|
||||
|
||||
// TODO(frkoenig): Rework shuffle above to
|
||||
// write out with 4 instead of 8 writes.
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d4[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d4[1]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d5[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d5[1]}, [%0] \n"
|
||||
// TODO(frkoenig): Rework shuffle above to
|
||||
// write out with 4 instead of 8 writes.
|
||||
"vst1.32 {d4[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d4[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d5[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d5[1]}, [%0] \n"
|
||||
|
||||
"add %0, %3, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d0[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d0[1]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d1[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d1[1]}, [%0] \n"
|
||||
"add %0, %3, #4 \n"
|
||||
"vst1.32 {d0[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d0[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d1[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d1[1]}, [%0] \n"
|
||||
|
||||
"add %1, #4 \n" // src += 4
|
||||
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
|
||||
"subs %5, #4 \n" // w -= 4
|
||||
"beq 4f \n"
|
||||
"add %1, #4 \n" // src += 4
|
||||
"add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride
|
||||
"subs %5, #4 \n" // w -= 4
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %5, #2 \n"
|
||||
"blt 3f \n"
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %5, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d0[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d1[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d0[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d1[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d0[2]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d1[2]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d0[3]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.16 {d1[3]}, [%0] \n"
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
"vld1.16 {d0[0]}, [%0], %2 \n"
|
||||
"vld1.16 {d1[0]}, [%0], %2 \n"
|
||||
"vld1.16 {d0[1]}, [%0], %2 \n"
|
||||
"vld1.16 {d1[1]}, [%0], %2 \n"
|
||||
"vld1.16 {d0[2]}, [%0], %2 \n"
|
||||
"vld1.16 {d1[2]}, [%0], %2 \n"
|
||||
"vld1.16 {d0[3]}, [%0], %2 \n"
|
||||
"vld1.16 {d1[3]}, [%0] \n"
|
||||
|
||||
"vtrn.8 d0, d1 \n"
|
||||
"vtrn.8 d0, d1 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d0}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d1}, [%0] \n"
|
||||
"vst1.64 {d0}, [%0], %4 \n"
|
||||
"vst1.64 {d1}, [%0] \n"
|
||||
|
||||
"add %1, #2 \n" // src += 2
|
||||
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
|
||||
"subs %5, #2 \n" // w -= 2
|
||||
"beq 4f \n"
|
||||
"add %1, #2 \n" // src += 2
|
||||
"add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride
|
||||
"subs %5, #2 \n" // w -= 2
|
||||
"beq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[0]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[1]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[2]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[3]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[4]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[5]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[6]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld1.8 {d0[7]}, [%1] \n"
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
"vld1.8 {d0[0]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[1]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[2]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[3]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[4]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[5]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[6]}, [%1], %2 \n"
|
||||
"vld1.8 {d0[7]}, [%1] \n"
|
||||
|
||||
MEMACCESS(3)
|
||||
"vst1.64 {d0}, [%3] \n"
|
||||
"vst1.64 {d0}, [%3] \n"
|
||||
|
||||
"4: \n"
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst), // %3
|
||||
"+r"(dst_stride), // %4
|
||||
"+r"(width) // %5
|
||||
: "r"(&kVTbl4x4Transpose) // %6
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3"
|
||||
);
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst), // %3
|
||||
"+r"(dst_stride), // %4
|
||||
"+r"(width) // %5
|
||||
: "r"(&kVTbl4x4Transpose) // %6
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3");
|
||||
}
|
||||
|
||||
static uvec8 kVTbl4x4TransposeDi =
|
||||
{ 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15 };
|
||||
static const uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11,
|
||||
4, 12, 5, 13, 6, 14, 7, 15};
|
||||
|
||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
void TransposeUVWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
const uint8* src_temp;
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %7, #8 \n"
|
||||
const uint8_t* src_temp;
|
||||
asm volatile(
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %7, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d0, d1}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d2, d3}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d4, d5}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d6, d7}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d16, d17}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d18, d19}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d20, d21}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.8 {d22, d23}, [%0] \n"
|
||||
|
||||
"vtrn.8 q1, q0 \n"
|
||||
@@ -301,40 +245,24 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d2}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d0}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d6}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d4}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d18}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d16}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d22}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d20}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d3}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d1}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d7}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d5}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d19}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d17}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d23}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.8 {d21}, [%0] \n"
|
||||
|
||||
"add %1, #8*2 \n" // src += 8*2
|
||||
@@ -343,187 +271,142 @@ void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
"subs %7, #8 \n" // w -= 8
|
||||
"bge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %7, #8 \n"
|
||||
"beq 4f \n"
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %7, #8 \n"
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %7, #2 \n"
|
||||
"blt 3f \n"
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %7, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
"cmp %7, #4 \n"
|
||||
"blt 2f \n"
|
||||
"cmp %7, #4 \n"
|
||||
"blt 2f \n"
|
||||
|
||||
// TODO(frkoenig): Clean this up
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d0}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d1}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d2}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d3}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d4}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d5}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d6}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld1.64 {d7}, [%0] \n"
|
||||
// TODO(frkoenig): Clean this up
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
"vld1.64 {d0}, [%0], %2 \n"
|
||||
"vld1.64 {d1}, [%0], %2 \n"
|
||||
"vld1.64 {d2}, [%0], %2 \n"
|
||||
"vld1.64 {d3}, [%0], %2 \n"
|
||||
"vld1.64 {d4}, [%0], %2 \n"
|
||||
"vld1.64 {d5}, [%0], %2 \n"
|
||||
"vld1.64 {d6}, [%0], %2 \n"
|
||||
"vld1.64 {d7}, [%0] \n"
|
||||
|
||||
MEMACCESS(8)
|
||||
"vld1.8 {q15}, [%8] \n"
|
||||
"vld1.8 {q15}, [%8] \n"
|
||||
|
||||
"vtrn.8 q0, q1 \n"
|
||||
"vtrn.8 q2, q3 \n"
|
||||
"vtrn.8 q0, q1 \n"
|
||||
"vtrn.8 q2, q3 \n"
|
||||
|
||||
"vtbl.8 d16, {d0, d1}, d30 \n"
|
||||
"vtbl.8 d17, {d0, d1}, d31 \n"
|
||||
"vtbl.8 d18, {d2, d3}, d30 \n"
|
||||
"vtbl.8 d19, {d2, d3}, d31 \n"
|
||||
"vtbl.8 d20, {d4, d5}, d30 \n"
|
||||
"vtbl.8 d21, {d4, d5}, d31 \n"
|
||||
"vtbl.8 d22, {d6, d7}, d30 \n"
|
||||
"vtbl.8 d23, {d6, d7}, d31 \n"
|
||||
"vtbl.8 d16, {d0, d1}, d30 \n"
|
||||
"vtbl.8 d17, {d0, d1}, d31 \n"
|
||||
"vtbl.8 d18, {d2, d3}, d30 \n"
|
||||
"vtbl.8 d19, {d2, d3}, d31 \n"
|
||||
"vtbl.8 d20, {d4, d5}, d30 \n"
|
||||
"vtbl.8 d21, {d4, d5}, d31 \n"
|
||||
"vtbl.8 d22, {d6, d7}, d30 \n"
|
||||
"vtbl.8 d23, {d6, d7}, d31 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d16[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d16[1]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d17[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d17[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d16[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d16[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d17[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d17[1]}, [%0], %4 \n"
|
||||
|
||||
"add %0, %3, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d20[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d20[1]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d21[0]}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d21[1]}, [%0] \n"
|
||||
"add %0, %3, #4 \n"
|
||||
"vst1.32 {d20[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d20[1]}, [%0], %4 \n"
|
||||
"vst1.32 {d21[0]}, [%0], %4 \n"
|
||||
"vst1.32 {d21[1]}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
"mov %0, %5 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d18[0]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d18[1]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d19[0]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d19[1]}, [%0], %6 \n"
|
||||
"vst1.32 {d18[0]}, [%0], %6 \n"
|
||||
"vst1.32 {d18[1]}, [%0], %6 \n"
|
||||
"vst1.32 {d19[0]}, [%0], %6 \n"
|
||||
"vst1.32 {d19[1]}, [%0], %6 \n"
|
||||
|
||||
"add %0, %5, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d22[0]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d22[1]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d23[0]}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.32 {d23[1]}, [%0] \n"
|
||||
"add %0, %5, #4 \n"
|
||||
"vst1.32 {d22[0]}, [%0], %6 \n"
|
||||
"vst1.32 {d22[1]}, [%0], %6 \n"
|
||||
"vst1.32 {d23[0]}, [%0], %6 \n"
|
||||
"vst1.32 {d23[1]}, [%0] \n"
|
||||
|
||||
"add %1, #4*2 \n" // src += 4 * 2
|
||||
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 * dst_stride_a
|
||||
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 * dst_stride_b
|
||||
"subs %7, #4 \n" // w -= 4
|
||||
"beq 4f \n"
|
||||
"add %1, #4*2 \n" // src += 4 * 2
|
||||
"add %3, %3, %4, lsl #2 \n" // dst_a += 4 *
|
||||
// dst_stride_a
|
||||
"add %5, %5, %6, lsl #2 \n" // dst_b += 4 *
|
||||
// dst_stride_b
|
||||
"subs %7, #4 \n" // w -= 4
|
||||
"beq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %7, #2 \n"
|
||||
"blt 3f \n"
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %7, #2 \n"
|
||||
"blt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
|
||||
MEMACCESS(0)
|
||||
"vld2.16 {d1[3], d3[3]}, [%0] \n"
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
"vld2.16 {d0[0], d2[0]}, [%0], %2 \n"
|
||||
"vld2.16 {d1[0], d3[0]}, [%0], %2 \n"
|
||||
"vld2.16 {d0[1], d2[1]}, [%0], %2 \n"
|
||||
"vld2.16 {d1[1], d3[1]}, [%0], %2 \n"
|
||||
"vld2.16 {d0[2], d2[2]}, [%0], %2 \n"
|
||||
"vld2.16 {d1[2], d3[2]}, [%0], %2 \n"
|
||||
"vld2.16 {d0[3], d2[3]}, [%0], %2 \n"
|
||||
"vld2.16 {d1[3], d3[3]}, [%0] \n"
|
||||
|
||||
"vtrn.8 d0, d1 \n"
|
||||
"vtrn.8 d2, d3 \n"
|
||||
"vtrn.8 d0, d1 \n"
|
||||
"vtrn.8 d2, d3 \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d0}, [%0], %4 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d2}, [%0] \n"
|
||||
"vst1.64 {d0}, [%0], %4 \n"
|
||||
"vst1.64 {d2}, [%0] \n"
|
||||
|
||||
"mov %0, %5 \n"
|
||||
"mov %0, %5 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d1}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"vst1.64 {d3}, [%0] \n"
|
||||
"vst1.64 {d1}, [%0], %6 \n"
|
||||
"vst1.64 {d3}, [%0] \n"
|
||||
|
||||
"add %1, #2*2 \n" // src += 2 * 2
|
||||
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 * dst_stride_a
|
||||
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 * dst_stride_b
|
||||
"subs %7, #2 \n" // w -= 2
|
||||
"beq 4f \n"
|
||||
"add %1, #2*2 \n" // src += 2 * 2
|
||||
"add %3, %3, %4, lsl #1 \n" // dst_a += 2 *
|
||||
// dst_stride_a
|
||||
"add %5, %5, %6, lsl #1 \n" // dst_b += 2 *
|
||||
// dst_stride_b
|
||||
"subs %7, #2 \n" // w -= 2
|
||||
"beq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
|
||||
MEMACCESS(1)
|
||||
"vld2.8 {d0[7], d1[7]}, [%1] \n"
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
"vld2.8 {d0[0], d1[0]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[1], d1[1]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[2], d1[2]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[3], d1[3]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[4], d1[4]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[5], d1[5]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[6], d1[6]}, [%1], %2 \n"
|
||||
"vld2.8 {d0[7], d1[7]}, [%1] \n"
|
||||
|
||||
MEMACCESS(3)
|
||||
"vst1.64 {d0}, [%3] \n"
|
||||
MEMACCESS(5)
|
||||
"vst1.64 {d1}, [%5] \n"
|
||||
"vst1.64 {d0}, [%3] \n"
|
||||
"vst1.64 {d1}, [%5] \n"
|
||||
|
||||
"4: \n"
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst_a), // %3
|
||||
"+r"(dst_stride_a), // %4
|
||||
"+r"(dst_b), // %5
|
||||
"+r"(dst_stride_b), // %6
|
||||
"+r"(width) // %7
|
||||
: "r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc",
|
||||
"q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"
|
||||
);
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(src_stride), // %2
|
||||
"+r"(dst_a), // %3
|
||||
"+r"(dst_stride_a), // %4
|
||||
"+r"(dst_b), // %5
|
||||
"+r"(dst_stride_b), // %6
|
||||
"+r"(width) // %7
|
||||
: "r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11");
|
||||
}
|
||||
#endif // defined(__ARM_NEON__) && !defined(__aarch64__)
|
||||
|
||||
|
||||
+284
-401
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
|
||||
@@ -21,38 +21,32 @@ extern "C" {
|
||||
// This module is for GCC Neon armv8 64 bit.
|
||||
#if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
static uvec8 kVTbl4x4Transpose =
|
||||
{ 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 };
|
||||
static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13,
|
||||
2, 6, 10, 14, 3, 7, 11, 15};
|
||||
|
||||
void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
const uint8* src_temp;
|
||||
int64 width64 = (int64) width; // Work around clang 3.4 warning.
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %3, %3, #8 \n"
|
||||
void TransposeWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
const uint8_t* src_temp;
|
||||
asm volatile(
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %w3, %w3, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v2.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v3.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v4.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v5.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v6.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v7.8b}, [%0] \n"
|
||||
|
||||
"trn2 v16.8b, v0.8b, v1.8b \n"
|
||||
@@ -84,456 +78,345 @@ void TransposeWx8_NEON(const uint8* src, int src_stride,
|
||||
|
||||
"mov %0, %2 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"st1 {v17.8b}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v16.8b}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v19.8b}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v18.8b}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v21.8b}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v20.8b}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v23.8b}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v22.8b}, [%0] \n"
|
||||
|
||||
"add %1, %1, #8 \n" // src += 8
|
||||
"add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride
|
||||
"subs %3, %3, #8 \n" // w -= 8
|
||||
"subs %w3, %w3, #8 \n" // w -= 8
|
||||
"b.ge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %3, %3, #8 \n"
|
||||
"b.eq 4f \n"
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %w3, %w3, #8 \n"
|
||||
"b.eq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %3, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %w3, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
|
||||
"cmp %3, #4 \n"
|
||||
"b.lt 2f \n"
|
||||
"cmp %w3, #4 \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.s}[0], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.s}[1], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.s}[2], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.s}[3], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.s}[0], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.s}[1], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.s}[2], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.s}[3], [%0] \n"
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
"ld1 {v0.s}[0], [%0], %5 \n"
|
||||
"ld1 {v0.s}[1], [%0], %5 \n"
|
||||
"ld1 {v0.s}[2], [%0], %5 \n"
|
||||
"ld1 {v0.s}[3], [%0], %5 \n"
|
||||
"ld1 {v1.s}[0], [%0], %5 \n"
|
||||
"ld1 {v1.s}[1], [%0], %5 \n"
|
||||
"ld1 {v1.s}[2], [%0], %5 \n"
|
||||
"ld1 {v1.s}[3], [%0] \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
"mov %0, %2 \n"
|
||||
|
||||
MEMACCESS(4)
|
||||
"ld1 {v2.16b}, [%4] \n"
|
||||
"ld1 {v2.16b}, [%4] \n"
|
||||
|
||||
"tbl v3.16b, {v0.16b}, v2.16b \n"
|
||||
"tbl v0.16b, {v1.16b}, v2.16b \n"
|
||||
"tbl v3.16b, {v0.16b}, v2.16b \n"
|
||||
"tbl v0.16b, {v1.16b}, v2.16b \n"
|
||||
|
||||
// TODO(frkoenig): Rework shuffle above to
|
||||
// write out with 4 instead of 8 writes.
|
||||
MEMACCESS(0)
|
||||
"st1 {v3.s}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v3.s}[1], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v3.s}[2], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v3.s}[3], [%0] \n"
|
||||
// TODO(frkoenig): Rework shuffle above to
|
||||
// write out with 4 instead of 8 writes.
|
||||
"st1 {v3.s}[0], [%0], %6 \n"
|
||||
"st1 {v3.s}[1], [%0], %6 \n"
|
||||
"st1 {v3.s}[2], [%0], %6 \n"
|
||||
"st1 {v3.s}[3], [%0] \n"
|
||||
|
||||
"add %0, %2, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v0.s}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v0.s}[1], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v0.s}[2], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v0.s}[3], [%0] \n"
|
||||
"add %0, %2, #4 \n"
|
||||
"st1 {v0.s}[0], [%0], %6 \n"
|
||||
"st1 {v0.s}[1], [%0], %6 \n"
|
||||
"st1 {v0.s}[2], [%0], %6 \n"
|
||||
"st1 {v0.s}[3], [%0] \n"
|
||||
|
||||
"add %1, %1, #4 \n" // src += 4
|
||||
"add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
|
||||
"subs %3, %3, #4 \n" // w -= 4
|
||||
"b.eq 4f \n"
|
||||
"add %1, %1, #4 \n" // src += 4
|
||||
"add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride
|
||||
"subs %w3, %w3, #4 \n" // w -= 4
|
||||
"b.eq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %3, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %w3, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.h}[0], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.h}[0], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.h}[1], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.h}[1], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.h}[2], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.h}[2], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.h}[3], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.h}[3], [%0] \n"
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
"ld1 {v0.h}[0], [%0], %5 \n"
|
||||
"ld1 {v1.h}[0], [%0], %5 \n"
|
||||
"ld1 {v0.h}[1], [%0], %5 \n"
|
||||
"ld1 {v1.h}[1], [%0], %5 \n"
|
||||
"ld1 {v0.h}[2], [%0], %5 \n"
|
||||
"ld1 {v1.h}[2], [%0], %5 \n"
|
||||
"ld1 {v0.h}[3], [%0], %5 \n"
|
||||
"ld1 {v1.h}[3], [%0] \n"
|
||||
|
||||
"trn2 v2.8b, v0.8b, v1.8b \n"
|
||||
"trn1 v3.8b, v0.8b, v1.8b \n"
|
||||
"trn2 v2.8b, v0.8b, v1.8b \n"
|
||||
"trn1 v3.8b, v0.8b, v1.8b \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
"mov %0, %2 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"st1 {v3.8b}, [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v2.8b}, [%0] \n"
|
||||
"st1 {v3.8b}, [%0], %6 \n"
|
||||
"st1 {v2.8b}, [%0] \n"
|
||||
|
||||
"add %1, %1, #2 \n" // src += 2
|
||||
"add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
|
||||
"subs %3, %3, #2 \n" // w -= 2
|
||||
"b.eq 4f \n"
|
||||
"add %1, %1, #2 \n" // src += 2
|
||||
"add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride
|
||||
"subs %w3, %w3, #2 \n" // w -= 2
|
||||
"b.eq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v0.b}[0], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v0.b}[1], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v0.b}[2], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v0.b}[3], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v0.b}[4], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v0.b}[5], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v0.b}[6], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld1 {v0.b}[7], [%1] \n"
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
"ld1 {v0.b}[0], [%1], %5 \n"
|
||||
"ld1 {v0.b}[1], [%1], %5 \n"
|
||||
"ld1 {v0.b}[2], [%1], %5 \n"
|
||||
"ld1 {v0.b}[3], [%1], %5 \n"
|
||||
"ld1 {v0.b}[4], [%1], %5 \n"
|
||||
"ld1 {v0.b}[5], [%1], %5 \n"
|
||||
"ld1 {v0.b}[6], [%1], %5 \n"
|
||||
"ld1 {v0.b}[7], [%1] \n"
|
||||
|
||||
MEMACCESS(2)
|
||||
"st1 {v0.8b}, [%2] \n"
|
||||
"st1 {v0.8b}, [%2] \n"
|
||||
|
||||
"4: \n"
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(width64) // %3
|
||||
: "r"(&kVTbl4x4Transpose), // %4
|
||||
"r"(static_cast<ptrdiff_t>(src_stride)), // %5
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23"
|
||||
);
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst), // %2
|
||||
"+r"(width) // %3
|
||||
: "r"(&kVTbl4x4Transpose), // %4
|
||||
"r"(static_cast<ptrdiff_t>(src_stride)), // %5
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride)) // %6
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23");
|
||||
}
|
||||
|
||||
static uint8 kVTbl4x4TransposeDi[32] =
|
||||
{ 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
|
||||
1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
|
||||
static const uint8_t kVTbl4x4TransposeDi[32] = {
|
||||
0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54,
|
||||
1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55};
|
||||
|
||||
void TransposeUVWx8_NEON(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
void TransposeUVWx8_NEON(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int width) {
|
||||
const uint8* src_temp;
|
||||
int64 width64 = (int64) width; // Work around clang 3.4 warning.
|
||||
asm volatile (
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %4, %4, #8 \n"
|
||||
const uint8_t* src_temp;
|
||||
asm volatile(
|
||||
// loops are on blocks of 8. loop will stop when
|
||||
// counter gets to or below 0. starting the counter
|
||||
// at w-8 allow for this
|
||||
"sub %w4, %w4, #8 \n"
|
||||
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
// handle 8x8 blocks. this should be the majority of the plane
|
||||
"1: \n"
|
||||
"mov %0, %1 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.16b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.16b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v2.16b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v3.16b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v4.16b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v5.16b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v6.16b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v7.16b}, [%0] \n"
|
||||
"ld1 {v0.16b}, [%0], %5 \n"
|
||||
"ld1 {v1.16b}, [%0], %5 \n"
|
||||
"ld1 {v2.16b}, [%0], %5 \n"
|
||||
"ld1 {v3.16b}, [%0], %5 \n"
|
||||
"ld1 {v4.16b}, [%0], %5 \n"
|
||||
"ld1 {v5.16b}, [%0], %5 \n"
|
||||
"ld1 {v6.16b}, [%0], %5 \n"
|
||||
"ld1 {v7.16b}, [%0] \n"
|
||||
|
||||
"trn1 v16.16b, v0.16b, v1.16b \n"
|
||||
"trn2 v17.16b, v0.16b, v1.16b \n"
|
||||
"trn1 v18.16b, v2.16b, v3.16b \n"
|
||||
"trn2 v19.16b, v2.16b, v3.16b \n"
|
||||
"trn1 v20.16b, v4.16b, v5.16b \n"
|
||||
"trn2 v21.16b, v4.16b, v5.16b \n"
|
||||
"trn1 v22.16b, v6.16b, v7.16b \n"
|
||||
"trn2 v23.16b, v6.16b, v7.16b \n"
|
||||
"trn1 v16.16b, v0.16b, v1.16b \n"
|
||||
"trn2 v17.16b, v0.16b, v1.16b \n"
|
||||
"trn1 v18.16b, v2.16b, v3.16b \n"
|
||||
"trn2 v19.16b, v2.16b, v3.16b \n"
|
||||
"trn1 v20.16b, v4.16b, v5.16b \n"
|
||||
"trn2 v21.16b, v4.16b, v5.16b \n"
|
||||
"trn1 v22.16b, v6.16b, v7.16b \n"
|
||||
"trn2 v23.16b, v6.16b, v7.16b \n"
|
||||
|
||||
"trn1 v0.8h, v16.8h, v18.8h \n"
|
||||
"trn2 v1.8h, v16.8h, v18.8h \n"
|
||||
"trn1 v2.8h, v20.8h, v22.8h \n"
|
||||
"trn2 v3.8h, v20.8h, v22.8h \n"
|
||||
"trn1 v4.8h, v17.8h, v19.8h \n"
|
||||
"trn2 v5.8h, v17.8h, v19.8h \n"
|
||||
"trn1 v6.8h, v21.8h, v23.8h \n"
|
||||
"trn2 v7.8h, v21.8h, v23.8h \n"
|
||||
"trn1 v0.8h, v16.8h, v18.8h \n"
|
||||
"trn2 v1.8h, v16.8h, v18.8h \n"
|
||||
"trn1 v2.8h, v20.8h, v22.8h \n"
|
||||
"trn2 v3.8h, v20.8h, v22.8h \n"
|
||||
"trn1 v4.8h, v17.8h, v19.8h \n"
|
||||
"trn2 v5.8h, v17.8h, v19.8h \n"
|
||||
"trn1 v6.8h, v21.8h, v23.8h \n"
|
||||
"trn2 v7.8h, v21.8h, v23.8h \n"
|
||||
|
||||
"trn1 v16.4s, v0.4s, v2.4s \n"
|
||||
"trn2 v17.4s, v0.4s, v2.4s \n"
|
||||
"trn1 v18.4s, v1.4s, v3.4s \n"
|
||||
"trn2 v19.4s, v1.4s, v3.4s \n"
|
||||
"trn1 v20.4s, v4.4s, v6.4s \n"
|
||||
"trn2 v21.4s, v4.4s, v6.4s \n"
|
||||
"trn1 v22.4s, v5.4s, v7.4s \n"
|
||||
"trn2 v23.4s, v5.4s, v7.4s \n"
|
||||
"trn1 v16.4s, v0.4s, v2.4s \n"
|
||||
"trn2 v17.4s, v0.4s, v2.4s \n"
|
||||
"trn1 v18.4s, v1.4s, v3.4s \n"
|
||||
"trn2 v19.4s, v1.4s, v3.4s \n"
|
||||
"trn1 v20.4s, v4.4s, v6.4s \n"
|
||||
"trn2 v21.4s, v4.4s, v6.4s \n"
|
||||
"trn1 v22.4s, v5.4s, v7.4s \n"
|
||||
"trn2 v23.4s, v5.4s, v7.4s \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
"mov %0, %2 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"st1 {v16.d}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v18.d}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v17.d}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v19.d}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v16.d}[1], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v18.d}[1], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v17.d}[1], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v19.d}[1], [%0] \n"
|
||||
"st1 {v16.d}[0], [%0], %6 \n"
|
||||
"st1 {v18.d}[0], [%0], %6 \n"
|
||||
"st1 {v17.d}[0], [%0], %6 \n"
|
||||
"st1 {v19.d}[0], [%0], %6 \n"
|
||||
"st1 {v16.d}[1], [%0], %6 \n"
|
||||
"st1 {v18.d}[1], [%0], %6 \n"
|
||||
"st1 {v17.d}[1], [%0], %6 \n"
|
||||
"st1 {v19.d}[1], [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"st1 {v20.d}[0], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v22.d}[0], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v21.d}[0], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v23.d}[0], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v20.d}[1], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v22.d}[1], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v21.d}[1], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v23.d}[1], [%0] \n"
|
||||
"st1 {v20.d}[0], [%0], %7 \n"
|
||||
"st1 {v22.d}[0], [%0], %7 \n"
|
||||
"st1 {v21.d}[0], [%0], %7 \n"
|
||||
"st1 {v23.d}[0], [%0], %7 \n"
|
||||
"st1 {v20.d}[1], [%0], %7 \n"
|
||||
"st1 {v22.d}[1], [%0], %7 \n"
|
||||
"st1 {v21.d}[1], [%0], %7 \n"
|
||||
"st1 {v23.d}[1], [%0] \n"
|
||||
|
||||
"add %1, %1, #16 \n" // src += 8*2
|
||||
"add %2, %2, %6, lsl #3 \n" // dst_a += 8 * dst_stride_a
|
||||
"add %3, %3, %7, lsl #3 \n" // dst_b += 8 * dst_stride_b
|
||||
"subs %4, %4, #8 \n" // w -= 8
|
||||
"b.ge 1b \n"
|
||||
"add %1, %1, #16 \n" // src += 8*2
|
||||
"add %2, %2, %6, lsl #3 \n" // dst_a += 8 *
|
||||
// dst_stride_a
|
||||
"add %3, %3, %7, lsl #3 \n" // dst_b += 8 *
|
||||
// dst_stride_b
|
||||
"subs %w4, %w4, #8 \n" // w -= 8
|
||||
"b.ge 1b \n"
|
||||
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %4, %4, #8 \n"
|
||||
"b.eq 4f \n"
|
||||
// add 8 back to counter. if the result is 0 there are
|
||||
// no residuals.
|
||||
"adds %w4, %w4, #8 \n"
|
||||
"b.eq 4f \n"
|
||||
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %4, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
// some residual, so between 1 and 7 lines left to transpose
|
||||
"cmp %w4, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
|
||||
"cmp %4, #4 \n"
|
||||
"b.lt 2f \n"
|
||||
"cmp %w4, #4 \n"
|
||||
"b.lt 2f \n"
|
||||
|
||||
// TODO(frkoenig): Clean this up
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v0.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v1.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v2.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v3.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v4.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v5.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v6.8b}, [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld1 {v7.8b}, [%0] \n"
|
||||
// TODO(frkoenig): Clean this up
|
||||
// 4x8 block
|
||||
"mov %0, %1 \n"
|
||||
"ld1 {v0.8b}, [%0], %5 \n"
|
||||
"ld1 {v1.8b}, [%0], %5 \n"
|
||||
"ld1 {v2.8b}, [%0], %5 \n"
|
||||
"ld1 {v3.8b}, [%0], %5 \n"
|
||||
"ld1 {v4.8b}, [%0], %5 \n"
|
||||
"ld1 {v5.8b}, [%0], %5 \n"
|
||||
"ld1 {v6.8b}, [%0], %5 \n"
|
||||
"ld1 {v7.8b}, [%0] \n"
|
||||
|
||||
MEMACCESS(8)
|
||||
"ld1 {v30.16b}, [%8], #16 \n"
|
||||
"ld1 {v31.16b}, [%8] \n"
|
||||
"ld1 {v30.16b}, [%8], #16 \n"
|
||||
"ld1 {v31.16b}, [%8] \n"
|
||||
|
||||
"tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
|
||||
"tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
|
||||
"tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
|
||||
"tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
|
||||
"tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n"
|
||||
"tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n"
|
||||
"tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n"
|
||||
"tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
"mov %0, %2 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"st1 {v16.s}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v16.s}[1], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v16.s}[2], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v16.s}[3], [%0], %6 \n"
|
||||
"st1 {v16.s}[0], [%0], %6 \n"
|
||||
"st1 {v16.s}[1], [%0], %6 \n"
|
||||
"st1 {v16.s}[2], [%0], %6 \n"
|
||||
"st1 {v16.s}[3], [%0], %6 \n"
|
||||
|
||||
"add %0, %2, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v18.s}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v18.s}[1], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v18.s}[2], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v18.s}[3], [%0] \n"
|
||||
"add %0, %2, #4 \n"
|
||||
"st1 {v18.s}[0], [%0], %6 \n"
|
||||
"st1 {v18.s}[1], [%0], %6 \n"
|
||||
"st1 {v18.s}[2], [%0], %6 \n"
|
||||
"st1 {v18.s}[3], [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"st1 {v17.s}[0], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v17.s}[1], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v17.s}[2], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v17.s}[3], [%0], %7 \n"
|
||||
"st1 {v17.s}[0], [%0], %7 \n"
|
||||
"st1 {v17.s}[1], [%0], %7 \n"
|
||||
"st1 {v17.s}[2], [%0], %7 \n"
|
||||
"st1 {v17.s}[3], [%0], %7 \n"
|
||||
|
||||
"add %0, %3, #4 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v19.s}[0], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v19.s}[1], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v19.s}[2], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v19.s}[3], [%0] \n"
|
||||
"add %0, %3, #4 \n"
|
||||
"st1 {v19.s}[0], [%0], %7 \n"
|
||||
"st1 {v19.s}[1], [%0], %7 \n"
|
||||
"st1 {v19.s}[2], [%0], %7 \n"
|
||||
"st1 {v19.s}[3], [%0] \n"
|
||||
|
||||
"add %1, %1, #8 \n" // src += 4 * 2
|
||||
"add %2, %2, %6, lsl #2 \n" // dst_a += 4 * dst_stride_a
|
||||
"add %3, %3, %7, lsl #2 \n" // dst_b += 4 * dst_stride_b
|
||||
"subs %4, %4, #4 \n" // w -= 4
|
||||
"b.eq 4f \n"
|
||||
"add %1, %1, #8 \n" // src += 4 * 2
|
||||
"add %2, %2, %6, lsl #2 \n" // dst_a += 4 *
|
||||
// dst_stride_a
|
||||
"add %3, %3, %7, lsl #2 \n" // dst_b += 4 *
|
||||
// dst_stride_b
|
||||
"subs %w4, %w4, #4 \n" // w -= 4
|
||||
"b.eq 4f \n"
|
||||
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %4, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
// some residual, check to see if it includes a 2x8 block,
|
||||
// or less
|
||||
"cmp %w4, #2 \n"
|
||||
"b.lt 3f \n"
|
||||
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
MEMACCESS(0)
|
||||
"ld2 {v0.h, v1.h}[0], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld2 {v2.h, v3.h}[0], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld2 {v0.h, v1.h}[1], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld2 {v2.h, v3.h}[1], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld2 {v0.h, v1.h}[2], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld2 {v2.h, v3.h}[2], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld2 {v0.h, v1.h}[3], [%0], %5 \n"
|
||||
MEMACCESS(0)
|
||||
"ld2 {v2.h, v3.h}[3], [%0] \n"
|
||||
// 2x8 block
|
||||
"2: \n"
|
||||
"mov %0, %1 \n"
|
||||
"ld2 {v0.h, v1.h}[0], [%0], %5 \n"
|
||||
"ld2 {v2.h, v3.h}[0], [%0], %5 \n"
|
||||
"ld2 {v0.h, v1.h}[1], [%0], %5 \n"
|
||||
"ld2 {v2.h, v3.h}[1], [%0], %5 \n"
|
||||
"ld2 {v0.h, v1.h}[2], [%0], %5 \n"
|
||||
"ld2 {v2.h, v3.h}[2], [%0], %5 \n"
|
||||
"ld2 {v0.h, v1.h}[3], [%0], %5 \n"
|
||||
"ld2 {v2.h, v3.h}[3], [%0] \n"
|
||||
|
||||
"trn1 v4.8b, v0.8b, v2.8b \n"
|
||||
"trn2 v5.8b, v0.8b, v2.8b \n"
|
||||
"trn1 v6.8b, v1.8b, v3.8b \n"
|
||||
"trn2 v7.8b, v1.8b, v3.8b \n"
|
||||
"trn1 v4.8b, v0.8b, v2.8b \n"
|
||||
"trn2 v5.8b, v0.8b, v2.8b \n"
|
||||
"trn1 v6.8b, v1.8b, v3.8b \n"
|
||||
"trn2 v7.8b, v1.8b, v3.8b \n"
|
||||
|
||||
"mov %0, %2 \n"
|
||||
"mov %0, %2 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"st1 {v4.d}[0], [%0], %6 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v6.d}[0], [%0] \n"
|
||||
"st1 {v4.d}[0], [%0], %6 \n"
|
||||
"st1 {v6.d}[0], [%0] \n"
|
||||
|
||||
"mov %0, %3 \n"
|
||||
"mov %0, %3 \n"
|
||||
|
||||
MEMACCESS(0)
|
||||
"st1 {v5.d}[0], [%0], %7 \n"
|
||||
MEMACCESS(0)
|
||||
"st1 {v7.d}[0], [%0] \n"
|
||||
"st1 {v5.d}[0], [%0], %7 \n"
|
||||
"st1 {v7.d}[0], [%0] \n"
|
||||
|
||||
"add %1, %1, #4 \n" // src += 2 * 2
|
||||
"add %2, %2, %6, lsl #1 \n" // dst_a += 2 * dst_stride_a
|
||||
"add %3, %3, %7, lsl #1 \n" // dst_b += 2 * dst_stride_b
|
||||
"subs %4, %4, #2 \n" // w -= 2
|
||||
"b.eq 4f \n"
|
||||
"add %1, %1, #4 \n" // src += 2 * 2
|
||||
"add %2, %2, %6, lsl #1 \n" // dst_a += 2 *
|
||||
// dst_stride_a
|
||||
"add %3, %3, %7, lsl #1 \n" // dst_b += 2 *
|
||||
// dst_stride_b
|
||||
"subs %w4, %w4, #2 \n" // w -= 2
|
||||
"b.eq 4f \n"
|
||||
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
MEMACCESS(1)
|
||||
"ld2 {v0.b, v1.b}[0], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld2 {v0.b, v1.b}[1], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld2 {v0.b, v1.b}[2], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld2 {v0.b, v1.b}[3], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld2 {v0.b, v1.b}[4], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld2 {v0.b, v1.b}[5], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld2 {v0.b, v1.b}[6], [%1], %5 \n"
|
||||
MEMACCESS(1)
|
||||
"ld2 {v0.b, v1.b}[7], [%1] \n"
|
||||
// 1x8 block
|
||||
"3: \n"
|
||||
"ld2 {v0.b, v1.b}[0], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[1], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[2], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[3], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[4], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[5], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[6], [%1], %5 \n"
|
||||
"ld2 {v0.b, v1.b}[7], [%1] \n"
|
||||
|
||||
MEMACCESS(2)
|
||||
"st1 {v0.d}[0], [%2] \n"
|
||||
MEMACCESS(3)
|
||||
"st1 {v1.d}[0], [%3] \n"
|
||||
"st1 {v0.d}[0], [%2] \n"
|
||||
"st1 {v1.d}[0], [%3] \n"
|
||||
|
||||
"4: \n"
|
||||
"4: \n"
|
||||
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst_a), // %2
|
||||
"+r"(dst_b), // %3
|
||||
"+r"(width64) // %4
|
||||
: "r"(static_cast<ptrdiff_t>(src_stride)), // %5
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
|
||||
"r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc",
|
||||
"v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7",
|
||||
"v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23",
|
||||
"v30", "v31"
|
||||
);
|
||||
: "=&r"(src_temp), // %0
|
||||
"+r"(src), // %1
|
||||
"+r"(dst_a), // %2
|
||||
"+r"(dst_b), // %3
|
||||
"+r"(width) // %4
|
||||
: "r"(static_cast<ptrdiff_t>(src_stride)), // %5
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride_a)), // %6
|
||||
"r"(static_cast<ptrdiff_t>(dst_stride_b)), // %7
|
||||
"r"(&kVTbl4x4TransposeDi) // %8
|
||||
: "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16",
|
||||
"v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31");
|
||||
}
|
||||
#endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
|
||||
|
||||
|
||||
@@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
#include "libyuv/rotate_row.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
@@ -17,17 +17,19 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for 32 bit Visual C x86 and clangcl
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86)
|
||||
#if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER)
|
||||
|
||||
__declspec(naked)
|
||||
void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
uint8* dst, int dst_stride, int width) {
|
||||
__declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int width) {
|
||||
__asm {
|
||||
push edi
|
||||
push esi
|
||||
push ebp
|
||||
mov eax, [esp + 12 + 4] // src
|
||||
mov edi, [esp + 12 + 8] // src_stride
|
||||
mov eax, [esp + 12 + 4] // src
|
||||
mov edi, [esp + 12 + 8] // src_stride
|
||||
mov edx, [esp + 12 + 12] // dst
|
||||
mov esi, [esp + 12 + 16] // dst_stride
|
||||
mov ecx, [esp + 12 + 20] // width
|
||||
@@ -110,18 +112,20 @@ void TransposeWx8_SSSE3(const uint8* src, int src_stride,
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(naked)
|
||||
void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
uint8* dst_a, int dst_stride_a,
|
||||
uint8* dst_b, int dst_stride_b,
|
||||
int w) {
|
||||
__declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src,
|
||||
int src_stride,
|
||||
uint8_t* dst_a,
|
||||
int dst_stride_a,
|
||||
uint8_t* dst_b,
|
||||
int dst_stride_b,
|
||||
int w) {
|
||||
__asm {
|
||||
push ebx
|
||||
push esi
|
||||
push edi
|
||||
push ebp
|
||||
mov eax, [esp + 16 + 4] // src
|
||||
mov edi, [esp + 16 + 8] // src_stride
|
||||
mov eax, [esp + 16 + 4] // src
|
||||
mov edi, [esp + 16 + 8] // src_stride
|
||||
mov edx, [esp + 16 + 12] // dst_a
|
||||
mov esi, [esp + 16 + 16] // dst_stride_a
|
||||
mov ebx, [esp + 16 + 20] // dst_b
|
||||
@@ -133,9 +137,9 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
mov ecx, [ecx + 16 + 28] // w
|
||||
|
||||
align 4
|
||||
convertloop:
|
||||
// Read in the data from the source pointer.
|
||||
// First round of bit swap.
|
||||
convertloop:
|
||||
movdqu xmm0, [eax]
|
||||
movdqu xmm1, [eax + edi]
|
||||
lea eax, [eax + 2 * edi]
|
||||
@@ -162,13 +166,13 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
lea eax, [eax + 2 * edi]
|
||||
movdqu [esp], xmm5 // backup xmm5
|
||||
neg edi
|
||||
movdqa xmm5, xmm6 // use xmm5 as temp register.
|
||||
movdqa xmm5, xmm6 // use xmm5 as temp register.
|
||||
punpcklbw xmm6, xmm7
|
||||
punpckhbw xmm5, xmm7
|
||||
movdqa xmm7, xmm5
|
||||
lea eax, [eax + 8 * edi + 16]
|
||||
neg edi
|
||||
// Second round of bit swap.
|
||||
// Second round of bit swap.
|
||||
movdqa xmm5, xmm0
|
||||
punpcklwd xmm0, xmm2
|
||||
punpckhwd xmm5, xmm2
|
||||
@@ -183,12 +187,13 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
movdqa xmm6, xmm5
|
||||
movdqu xmm5, [esp] // restore xmm5
|
||||
movdqu [esp], xmm6 // backup xmm6
|
||||
movdqa xmm6, xmm5 // use xmm6 as temp register.
|
||||
movdqa xmm6, xmm5 // use xmm6 as temp register.
|
||||
punpcklwd xmm5, xmm7
|
||||
punpckhwd xmm6, xmm7
|
||||
movdqa xmm7, xmm6
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
|
||||
// Third round of bit swap.
|
||||
// Write to the destination pointer.
|
||||
movdqa xmm6, xmm0
|
||||
punpckldq xmm0, xmm4
|
||||
punpckhdq xmm6, xmm4
|
||||
@@ -200,7 +205,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm4
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm2 // use xmm0 as the temp register.
|
||||
movdqa xmm0, xmm2 // use xmm0 as the temp register.
|
||||
punpckldq xmm2, xmm6
|
||||
movlpd qword ptr [edx], xmm2
|
||||
movhpd qword ptr [ebx], xmm2
|
||||
@@ -209,7 +214,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm0
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm1 // use xmm0 as the temp register.
|
||||
movdqa xmm0, xmm1 // use xmm0 as the temp register.
|
||||
punpckldq xmm1, xmm5
|
||||
movlpd qword ptr [edx], xmm1
|
||||
movhpd qword ptr [ebx], xmm1
|
||||
@@ -218,7 +223,7 @@ void TransposeUVWx8_SSE2(const uint8* src, int src_stride,
|
||||
lea edx, [edx + 2 * esi]
|
||||
movhpd qword ptr [ebx + ebp], xmm0
|
||||
lea ebx, [ebx + 2 * ebp]
|
||||
movdqa xmm0, xmm3 // use xmm0 as the temp register.
|
||||
movdqa xmm0, xmm3 // use xmm0 as the temp register.
|
||||
punpckldq xmm3, xmm7
|
||||
movlpd qword ptr [edx], xmm3
|
||||
movhpd qword ptr [ebx], xmm3
|
||||
|
||||
+832
-275
File diff suppressed because it is too large
Load Diff
+1562
-952
File diff suppressed because it is too large
Load Diff
+5565
-4422
File diff suppressed because it is too large
Load Diff
@@ -1,782 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The LibYuv project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// The following are available on Mips platforms:
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips__) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
#ifdef HAS_COPYROW_MIPS
|
||||
void CopyRow_MIPS(const uint8* src, uint8* dst, int count) {
|
||||
__asm__ __volatile__ (
|
||||
".set noreorder \n"
|
||||
".set noat \n"
|
||||
"slti $at, %[count], 8 \n"
|
||||
"bne $at ,$zero, $last8 \n"
|
||||
"xor $t8, %[src], %[dst] \n"
|
||||
"andi $t8, $t8, 0x3 \n"
|
||||
|
||||
"bne $t8, $zero, unaligned \n"
|
||||
"negu $a3, %[dst] \n"
|
||||
// make dst/src aligned
|
||||
"andi $a3, $a3, 0x3 \n"
|
||||
"beq $a3, $zero, $chk16w \n"
|
||||
// word-aligned now count is the remining bytes count
|
||||
"subu %[count], %[count], $a3 \n"
|
||||
|
||||
"lwr $t8, 0(%[src]) \n"
|
||||
"addu %[src], %[src], $a3 \n"
|
||||
"swr $t8, 0(%[dst]) \n"
|
||||
"addu %[dst], %[dst], $a3 \n"
|
||||
|
||||
// Now the dst/src are mutually word-aligned with word-aligned addresses
|
||||
"$chk16w: \n"
|
||||
"andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
|
||||
// t8 is the byte count after 64-byte chunks
|
||||
"beq %[count], $t8, chk8w \n"
|
||||
// There will be at most 1 32-byte chunk after it
|
||||
"subu $a3, %[count], $t8 \n" // the reminder
|
||||
// Here a3 counts bytes in 16w chunks
|
||||
"addu $a3, %[dst], $a3 \n"
|
||||
// Now a3 is the final dst after 64-byte chunks
|
||||
"addu $t0, %[dst], %[count] \n"
|
||||
// t0 is the "past the end" address
|
||||
|
||||
// When in the loop we exercise "pref 30,x(a1)", the a1+x should not be past
|
||||
// the "t0-32" address
|
||||
// This means: for x=128 the last "safe" a1 address is "t0-160"
|
||||
// Alternatively, for x=64 the last "safe" a1 address is "t0-96"
|
||||
// we will use "pref 30,128(a1)", so "t0-160" is the limit
|
||||
"subu $t9, $t0, 160 \n"
|
||||
// t9 is the "last safe pref 30,128(a1)" address
|
||||
"pref 0, 0(%[src]) \n" // first line of src
|
||||
"pref 0, 32(%[src]) \n" // second line of src
|
||||
"pref 0, 64(%[src]) \n"
|
||||
"pref 30, 32(%[dst]) \n"
|
||||
// In case the a1 > t9 don't use "pref 30" at all
|
||||
"sgtu $v1, %[dst], $t9 \n"
|
||||
"bgtz $v1, $loop16w \n"
|
||||
"nop \n"
|
||||
// otherwise, start with using pref30
|
||||
"pref 30, 64(%[dst]) \n"
|
||||
"$loop16w: \n"
|
||||
"pref 0, 96(%[src]) \n"
|
||||
"lw $t0, 0(%[src]) \n"
|
||||
"bgtz $v1, $skip_pref30_96 \n" // skip
|
||||
"lw $t1, 4(%[src]) \n"
|
||||
"pref 30, 96(%[dst]) \n" // continue
|
||||
"$skip_pref30_96: \n"
|
||||
"lw $t2, 8(%[src]) \n"
|
||||
"lw $t3, 12(%[src]) \n"
|
||||
"lw $t4, 16(%[src]) \n"
|
||||
"lw $t5, 20(%[src]) \n"
|
||||
"lw $t6, 24(%[src]) \n"
|
||||
"lw $t7, 28(%[src]) \n"
|
||||
"pref 0, 128(%[src]) \n"
|
||||
// bring the next lines of src, addr 128
|
||||
"sw $t0, 0(%[dst]) \n"
|
||||
"sw $t1, 4(%[dst]) \n"
|
||||
"sw $t2, 8(%[dst]) \n"
|
||||
"sw $t3, 12(%[dst]) \n"
|
||||
"sw $t4, 16(%[dst]) \n"
|
||||
"sw $t5, 20(%[dst]) \n"
|
||||
"sw $t6, 24(%[dst]) \n"
|
||||
"sw $t7, 28(%[dst]) \n"
|
||||
"lw $t0, 32(%[src]) \n"
|
||||
"bgtz $v1, $skip_pref30_128 \n" // skip pref 30,128(a1)
|
||||
"lw $t1, 36(%[src]) \n"
|
||||
"pref 30, 128(%[dst]) \n" // set dest, addr 128
|
||||
"$skip_pref30_128: \n"
|
||||
"lw $t2, 40(%[src]) \n"
|
||||
"lw $t3, 44(%[src]) \n"
|
||||
"lw $t4, 48(%[src]) \n"
|
||||
"lw $t5, 52(%[src]) \n"
|
||||
"lw $t6, 56(%[src]) \n"
|
||||
"lw $t7, 60(%[src]) \n"
|
||||
"pref 0, 160(%[src]) \n"
|
||||
// bring the next lines of src, addr 160
|
||||
"sw $t0, 32(%[dst]) \n"
|
||||
"sw $t1, 36(%[dst]) \n"
|
||||
"sw $t2, 40(%[dst]) \n"
|
||||
"sw $t3, 44(%[dst]) \n"
|
||||
"sw $t4, 48(%[dst]) \n"
|
||||
"sw $t5, 52(%[dst]) \n"
|
||||
"sw $t6, 56(%[dst]) \n"
|
||||
"sw $t7, 60(%[dst]) \n"
|
||||
|
||||
"addiu %[dst], %[dst], 64 \n" // adding 64 to dest
|
||||
"sgtu $v1, %[dst], $t9 \n"
|
||||
"bne %[dst], $a3, $loop16w \n"
|
||||
" addiu %[src], %[src], 64 \n" // adding 64 to src
|
||||
"move %[count], $t8 \n"
|
||||
|
||||
// Here we have src and dest word-aligned but less than 64-bytes to go
|
||||
|
||||
"chk8w: \n"
|
||||
"pref 0, 0x0(%[src]) \n"
|
||||
"andi $t8, %[count], 0x1f \n" // 32-byte chunk?
|
||||
// the t8 is the reminder count past 32-bytes
|
||||
"beq %[count], $t8, chk1w \n"
|
||||
// count=t8,no 32-byte chunk
|
||||
" nop \n"
|
||||
|
||||
"lw $t0, 0(%[src]) \n"
|
||||
"lw $t1, 4(%[src]) \n"
|
||||
"lw $t2, 8(%[src]) \n"
|
||||
"lw $t3, 12(%[src]) \n"
|
||||
"lw $t4, 16(%[src]) \n"
|
||||
"lw $t5, 20(%[src]) \n"
|
||||
"lw $t6, 24(%[src]) \n"
|
||||
"lw $t7, 28(%[src]) \n"
|
||||
"addiu %[src], %[src], 32 \n"
|
||||
|
||||
"sw $t0, 0(%[dst]) \n"
|
||||
"sw $t1, 4(%[dst]) \n"
|
||||
"sw $t2, 8(%[dst]) \n"
|
||||
"sw $t3, 12(%[dst]) \n"
|
||||
"sw $t4, 16(%[dst]) \n"
|
||||
"sw $t5, 20(%[dst]) \n"
|
||||
"sw $t6, 24(%[dst]) \n"
|
||||
"sw $t7, 28(%[dst]) \n"
|
||||
"addiu %[dst], %[dst], 32 \n"
|
||||
|
||||
"chk1w: \n"
|
||||
"andi %[count], $t8, 0x3 \n"
|
||||
// now count is the reminder past 1w chunks
|
||||
"beq %[count], $t8, $last8 \n"
|
||||
" subu $a3, $t8, %[count] \n"
|
||||
// a3 is count of bytes in 1w chunks
|
||||
"addu $a3, %[dst], $a3 \n"
|
||||
// now a3 is the dst address past the 1w chunks
|
||||
// copying in words (4-byte chunks)
|
||||
"$wordCopy_loop: \n"
|
||||
"lw $t3, 0(%[src]) \n"
|
||||
// the first t3 may be equal t0 ... optimize?
|
||||
"addiu %[src], %[src],4 \n"
|
||||
"addiu %[dst], %[dst],4 \n"
|
||||
"bne %[dst], $a3,$wordCopy_loop \n"
|
||||
" sw $t3, -4(%[dst]) \n"
|
||||
|
||||
// For the last (<8) bytes
|
||||
"$last8: \n"
|
||||
"blez %[count], leave \n"
|
||||
" addu $a3, %[dst], %[count] \n" // a3 -last dst address
|
||||
"$last8loop: \n"
|
||||
"lb $v1, 0(%[src]) \n"
|
||||
"addiu %[src], %[src], 1 \n"
|
||||
"addiu %[dst], %[dst], 1 \n"
|
||||
"bne %[dst], $a3, $last8loop \n"
|
||||
" sb $v1, -1(%[dst]) \n"
|
||||
|
||||
"leave: \n"
|
||||
" j $ra \n"
|
||||
" nop \n"
|
||||
|
||||
//
|
||||
// UNALIGNED case
|
||||
//
|
||||
|
||||
"unaligned: \n"
|
||||
// got here with a3="negu a1"
|
||||
"andi $a3, $a3, 0x3 \n" // a1 is word aligned?
|
||||
"beqz $a3, $ua_chk16w \n"
|
||||
" subu %[count], %[count], $a3 \n"
|
||||
// bytes left after initial a3 bytes
|
||||
"lwr $v1, 0(%[src]) \n"
|
||||
"lwl $v1, 3(%[src]) \n"
|
||||
"addu %[src], %[src], $a3 \n" // a3 may be 1, 2 or 3
|
||||
"swr $v1, 0(%[dst]) \n"
|
||||
"addu %[dst], %[dst], $a3 \n"
|
||||
// below the dst will be word aligned (NOTE1)
|
||||
"$ua_chk16w: \n"
|
||||
"andi $t8, %[count], 0x3f \n" // whole 64-B chunks?
|
||||
// t8 is the byte count after 64-byte chunks
|
||||
"beq %[count], $t8, ua_chk8w \n"
|
||||
// if a2==t8, no 64-byte chunks
|
||||
// There will be at most 1 32-byte chunk after it
|
||||
"subu $a3, %[count], $t8 \n" // the reminder
|
||||
// Here a3 counts bytes in 16w chunks
|
||||
"addu $a3, %[dst], $a3 \n"
|
||||
// Now a3 is the final dst after 64-byte chunks
|
||||
"addu $t0, %[dst], %[count] \n" // t0 "past the end"
|
||||
"subu $t9, $t0, 160 \n"
|
||||
// t9 is the "last safe pref 30,128(a1)" address
|
||||
"pref 0, 0(%[src]) \n" // first line of src
|
||||
"pref 0, 32(%[src]) \n" // second line addr 32
|
||||
"pref 0, 64(%[src]) \n"
|
||||
"pref 30, 32(%[dst]) \n"
|
||||
// safe, as we have at least 64 bytes ahead
|
||||
// In case the a1 > t9 don't use "pref 30" at all
|
||||
"sgtu $v1, %[dst], $t9 \n"
|
||||
"bgtz $v1, $ua_loop16w \n"
|
||||
// skip "pref 30,64(a1)" for too short arrays
|
||||
" nop \n"
|
||||
// otherwise, start with using pref30
|
||||
"pref 30, 64(%[dst]) \n"
|
||||
"$ua_loop16w: \n"
|
||||
"pref 0, 96(%[src]) \n"
|
||||
"lwr $t0, 0(%[src]) \n"
|
||||
"lwl $t0, 3(%[src]) \n"
|
||||
"lwr $t1, 4(%[src]) \n"
|
||||
"bgtz $v1, $ua_skip_pref30_96 \n"
|
||||
" lwl $t1, 7(%[src]) \n"
|
||||
"pref 30, 96(%[dst]) \n"
|
||||
// continue setting up the dest, addr 96
|
||||
"$ua_skip_pref30_96: \n"
|
||||
"lwr $t2, 8(%[src]) \n"
|
||||
"lwl $t2, 11(%[src]) \n"
|
||||
"lwr $t3, 12(%[src]) \n"
|
||||
"lwl $t3, 15(%[src]) \n"
|
||||
"lwr $t4, 16(%[src]) \n"
|
||||
"lwl $t4, 19(%[src]) \n"
|
||||
"lwr $t5, 20(%[src]) \n"
|
||||
"lwl $t5, 23(%[src]) \n"
|
||||
"lwr $t6, 24(%[src]) \n"
|
||||
"lwl $t6, 27(%[src]) \n"
|
||||
"lwr $t7, 28(%[src]) \n"
|
||||
"lwl $t7, 31(%[src]) \n"
|
||||
"pref 0, 128(%[src]) \n"
|
||||
// bring the next lines of src, addr 128
|
||||
"sw $t0, 0(%[dst]) \n"
|
||||
"sw $t1, 4(%[dst]) \n"
|
||||
"sw $t2, 8(%[dst]) \n"
|
||||
"sw $t3, 12(%[dst]) \n"
|
||||
"sw $t4, 16(%[dst]) \n"
|
||||
"sw $t5, 20(%[dst]) \n"
|
||||
"sw $t6, 24(%[dst]) \n"
|
||||
"sw $t7, 28(%[dst]) \n"
|
||||
"lwr $t0, 32(%[src]) \n"
|
||||
"lwl $t0, 35(%[src]) \n"
|
||||
"lwr $t1, 36(%[src]) \n"
|
||||
"bgtz $v1, ua_skip_pref30_128 \n"
|
||||
" lwl $t1, 39(%[src]) \n"
|
||||
"pref 30, 128(%[dst]) \n"
|
||||
// continue setting up the dest, addr 128
|
||||
"ua_skip_pref30_128: \n"
|
||||
|
||||
"lwr $t2, 40(%[src]) \n"
|
||||
"lwl $t2, 43(%[src]) \n"
|
||||
"lwr $t3, 44(%[src]) \n"
|
||||
"lwl $t3, 47(%[src]) \n"
|
||||
"lwr $t4, 48(%[src]) \n"
|
||||
"lwl $t4, 51(%[src]) \n"
|
||||
"lwr $t5, 52(%[src]) \n"
|
||||
"lwl $t5, 55(%[src]) \n"
|
||||
"lwr $t6, 56(%[src]) \n"
|
||||
"lwl $t6, 59(%[src]) \n"
|
||||
"lwr $t7, 60(%[src]) \n"
|
||||
"lwl $t7, 63(%[src]) \n"
|
||||
"pref 0, 160(%[src]) \n"
|
||||
// bring the next lines of src, addr 160
|
||||
"sw $t0, 32(%[dst]) \n"
|
||||
"sw $t1, 36(%[dst]) \n"
|
||||
"sw $t2, 40(%[dst]) \n"
|
||||
"sw $t3, 44(%[dst]) \n"
|
||||
"sw $t4, 48(%[dst]) \n"
|
||||
"sw $t5, 52(%[dst]) \n"
|
||||
"sw $t6, 56(%[dst]) \n"
|
||||
"sw $t7, 60(%[dst]) \n"
|
||||
|
||||
"addiu %[dst],%[dst],64 \n" // adding 64 to dest
|
||||
"sgtu $v1,%[dst],$t9 \n"
|
||||
"bne %[dst],$a3,$ua_loop16w \n"
|
||||
" addiu %[src],%[src],64 \n" // adding 64 to src
|
||||
"move %[count],$t8 \n"
|
||||
|
||||
// Here we have src and dest word-aligned but less than 64-bytes to go
|
||||
|
||||
"ua_chk8w: \n"
|
||||
"pref 0, 0x0(%[src]) \n"
|
||||
"andi $t8, %[count], 0x1f \n" // 32-byte chunk?
|
||||
// the t8 is the reminder count
|
||||
"beq %[count], $t8, $ua_chk1w \n"
|
||||
// when count==t8, no 32-byte chunk
|
||||
|
||||
"lwr $t0, 0(%[src]) \n"
|
||||
"lwl $t0, 3(%[src]) \n"
|
||||
"lwr $t1, 4(%[src]) \n"
|
||||
"lwl $t1, 7(%[src]) \n"
|
||||
"lwr $t2, 8(%[src]) \n"
|
||||
"lwl $t2, 11(%[src]) \n"
|
||||
"lwr $t3, 12(%[src]) \n"
|
||||
"lwl $t3, 15(%[src]) \n"
|
||||
"lwr $t4, 16(%[src]) \n"
|
||||
"lwl $t4, 19(%[src]) \n"
|
||||
"lwr $t5, 20(%[src]) \n"
|
||||
"lwl $t5, 23(%[src]) \n"
|
||||
"lwr $t6, 24(%[src]) \n"
|
||||
"lwl $t6, 27(%[src]) \n"
|
||||
"lwr $t7, 28(%[src]) \n"
|
||||
"lwl $t7, 31(%[src]) \n"
|
||||
"addiu %[src], %[src], 32 \n"
|
||||
|
||||
"sw $t0, 0(%[dst]) \n"
|
||||
"sw $t1, 4(%[dst]) \n"
|
||||
"sw $t2, 8(%[dst]) \n"
|
||||
"sw $t3, 12(%[dst]) \n"
|
||||
"sw $t4, 16(%[dst]) \n"
|
||||
"sw $t5, 20(%[dst]) \n"
|
||||
"sw $t6, 24(%[dst]) \n"
|
||||
"sw $t7, 28(%[dst]) \n"
|
||||
"addiu %[dst], %[dst], 32 \n"
|
||||
|
||||
"$ua_chk1w: \n"
|
||||
"andi %[count], $t8, 0x3 \n"
|
||||
// now count is the reminder past 1w chunks
|
||||
"beq %[count], $t8, ua_smallCopy \n"
|
||||
"subu $a3, $t8, %[count] \n"
|
||||
// a3 is count of bytes in 1w chunks
|
||||
"addu $a3, %[dst], $a3 \n"
|
||||
// now a3 is the dst address past the 1w chunks
|
||||
|
||||
// copying in words (4-byte chunks)
|
||||
"$ua_wordCopy_loop: \n"
|
||||
"lwr $v1, 0(%[src]) \n"
|
||||
"lwl $v1, 3(%[src]) \n"
|
||||
"addiu %[src], %[src], 4 \n"
|
||||
"addiu %[dst], %[dst], 4 \n"
|
||||
// note: dst=a1 is word aligned here, see NOTE1
|
||||
"bne %[dst], $a3, $ua_wordCopy_loop \n"
|
||||
" sw $v1,-4(%[dst]) \n"
|
||||
|
||||
// Now less than 4 bytes (value in count) left to copy
|
||||
"ua_smallCopy: \n"
|
||||
"beqz %[count], leave \n"
|
||||
" addu $a3, %[dst], %[count] \n" // a3 = last dst address
|
||||
"$ua_smallCopy_loop: \n"
|
||||
"lb $v1, 0(%[src]) \n"
|
||||
"addiu %[src], %[src], 1 \n"
|
||||
"addiu %[dst], %[dst], 1 \n"
|
||||
"bne %[dst],$a3,$ua_smallCopy_loop \n"
|
||||
" sb $v1, -1(%[dst]) \n"
|
||||
|
||||
"j $ra \n"
|
||||
" nop \n"
|
||||
".set at \n"
|
||||
".set reorder \n"
|
||||
: [dst] "+r" (dst), [src] "+r" (src)
|
||||
: [count] "r" (count)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7",
|
||||
"t8", "t9", "a3", "v1", "at"
|
||||
);
|
||||
}
|
||||
#endif // HAS_COPYROW_MIPS
|
||||
|
||||
// DSPR2 functions
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && defined(__mips_dsp) && \
|
||||
(__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32) && (__mips_isa_rev < 6)
|
||||
|
||||
void SplitUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"srl $t4, %[width], 4 \n" // multiplies of 16
|
||||
"blez $t4, 2f \n"
|
||||
" andi %[width], %[width], 0xf \n" // residual
|
||||
|
||||
"1: \n"
|
||||
"addiu $t4, $t4, -1 \n"
|
||||
"lw $t0, 0(%[src_uv]) \n" // V1 | U1 | V0 | U0
|
||||
"lw $t1, 4(%[src_uv]) \n" // V3 | U3 | V2 | U2
|
||||
"lw $t2, 8(%[src_uv]) \n" // V5 | U5 | V4 | U4
|
||||
"lw $t3, 12(%[src_uv]) \n" // V7 | U7 | V6 | U6
|
||||
"lw $t5, 16(%[src_uv]) \n" // V9 | U9 | V8 | U8
|
||||
"lw $t6, 20(%[src_uv]) \n" // V11 | U11 | V10 | U10
|
||||
"lw $t7, 24(%[src_uv]) \n" // V13 | U13 | V12 | U12
|
||||
"lw $t8, 28(%[src_uv]) \n" // V15 | U15 | V14 | U14
|
||||
"addiu %[src_uv], %[src_uv], 32 \n"
|
||||
"precrq.qb.ph $t9, $t1, $t0 \n" // V3 | V2 | V1 | V0
|
||||
"precr.qb.ph $t0, $t1, $t0 \n" // U3 | U2 | U1 | U0
|
||||
"precrq.qb.ph $t1, $t3, $t2 \n" // V7 | V6 | V5 | V4
|
||||
"precr.qb.ph $t2, $t3, $t2 \n" // U7 | U6 | U5 | U4
|
||||
"precrq.qb.ph $t3, $t6, $t5 \n" // V11 | V10 | V9 | V8
|
||||
"precr.qb.ph $t5, $t6, $t5 \n" // U11 | U10 | U9 | U8
|
||||
"precrq.qb.ph $t6, $t8, $t7 \n" // V15 | V14 | V13 | V12
|
||||
"precr.qb.ph $t7, $t8, $t7 \n" // U15 | U14 | U13 | U12
|
||||
"sw $t9, 0(%[dst_v]) \n"
|
||||
"sw $t0, 0(%[dst_u]) \n"
|
||||
"sw $t1, 4(%[dst_v]) \n"
|
||||
"sw $t2, 4(%[dst_u]) \n"
|
||||
"sw $t3, 8(%[dst_v]) \n"
|
||||
"sw $t5, 8(%[dst_u]) \n"
|
||||
"sw $t6, 12(%[dst_v]) \n"
|
||||
"sw $t7, 12(%[dst_u]) \n"
|
||||
"addiu %[dst_v], %[dst_v], 16 \n"
|
||||
"bgtz $t4, 1b \n"
|
||||
" addiu %[dst_u], %[dst_u], 16 \n"
|
||||
|
||||
"beqz %[width], 3f \n"
|
||||
" nop \n"
|
||||
|
||||
"2: \n"
|
||||
"lbu $t0, 0(%[src_uv]) \n"
|
||||
"lbu $t1, 1(%[src_uv]) \n"
|
||||
"addiu %[src_uv], %[src_uv], 2 \n"
|
||||
"addiu %[width], %[width], -1 \n"
|
||||
"sb $t0, 0(%[dst_u]) \n"
|
||||
"sb $t1, 0(%[dst_v]) \n"
|
||||
"addiu %[dst_u], %[dst_u], 1 \n"
|
||||
"bgtz %[width], 2b \n"
|
||||
" addiu %[dst_v], %[dst_v], 1 \n"
|
||||
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_uv] "+r" (src_uv),
|
||||
[width] "+r" (width),
|
||||
[dst_u] "+r" (dst_u),
|
||||
[dst_v] "+r" (dst_v)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3",
|
||||
"t4", "t5", "t6", "t7", "t8", "t9"
|
||||
);
|
||||
}
|
||||
|
||||
void MirrorRow_DSPR2(const uint8* src, uint8* dst, int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"srl $t4, %[width], 4 \n" // multiplies of 16
|
||||
"andi $t5, %[width], 0xf \n"
|
||||
"blez $t4, 2f \n"
|
||||
" addu %[src], %[src], %[width] \n" // src += width
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, -16(%[src]) \n" // |3|2|1|0|
|
||||
"lw $t1, -12(%[src]) \n" // |7|6|5|4|
|
||||
"lw $t2, -8(%[src]) \n" // |11|10|9|8|
|
||||
"lw $t3, -4(%[src]) \n" // |15|14|13|12|
|
||||
"wsbh $t0, $t0 \n" // |2|3|0|1|
|
||||
"wsbh $t1, $t1 \n" // |6|7|4|5|
|
||||
"wsbh $t2, $t2 \n" // |10|11|8|9|
|
||||
"wsbh $t3, $t3 \n" // |14|15|12|13|
|
||||
"rotr $t0, $t0, 16 \n" // |0|1|2|3|
|
||||
"rotr $t1, $t1, 16 \n" // |4|5|6|7|
|
||||
"rotr $t2, $t2, 16 \n" // |8|9|10|11|
|
||||
"rotr $t3, $t3, 16 \n" // |12|13|14|15|
|
||||
"addiu %[src], %[src], -16 \n"
|
||||
"addiu $t4, $t4, -1 \n"
|
||||
"sw $t3, 0(%[dst]) \n" // |15|14|13|12|
|
||||
"sw $t2, 4(%[dst]) \n" // |11|10|9|8|
|
||||
"sw $t1, 8(%[dst]) \n" // |7|6|5|4|
|
||||
"sw $t0, 12(%[dst]) \n" // |3|2|1|0|
|
||||
"bgtz $t4, 1b \n"
|
||||
" addiu %[dst], %[dst], 16 \n"
|
||||
"beqz $t5, 3f \n"
|
||||
" nop \n"
|
||||
|
||||
"2: \n"
|
||||
"lbu $t0, -1(%[src]) \n"
|
||||
"addiu $t5, $t5, -1 \n"
|
||||
"addiu %[src], %[src], -1 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
"bgez $t5, 2b \n"
|
||||
" addiu %[dst], %[dst], 1 \n"
|
||||
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src] "+r" (src), [dst] "+r" (dst)
|
||||
: [width] "r" (width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5"
|
||||
);
|
||||
}
|
||||
|
||||
void MirrorUVRow_DSPR2(const uint8* src_uv, uint8* dst_u, uint8* dst_v,
|
||||
int width) {
|
||||
int x;
|
||||
int y;
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"addu $t4, %[width], %[width] \n"
|
||||
"srl %[x], %[width], 4 \n"
|
||||
"andi %[y], %[width], 0xf \n"
|
||||
"blez %[x], 2f \n"
|
||||
" addu %[src_uv], %[src_uv], $t4 \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, -32(%[src_uv]) \n" // |3|2|1|0|
|
||||
"lw $t1, -28(%[src_uv]) \n" // |7|6|5|4|
|
||||
"lw $t2, -24(%[src_uv]) \n" // |11|10|9|8|
|
||||
"lw $t3, -20(%[src_uv]) \n" // |15|14|13|12|
|
||||
"lw $t4, -16(%[src_uv]) \n" // |19|18|17|16|
|
||||
"lw $t6, -12(%[src_uv]) \n" // |23|22|21|20|
|
||||
"lw $t7, -8(%[src_uv]) \n" // |27|26|25|24|
|
||||
"lw $t8, -4(%[src_uv]) \n" // |31|30|29|28|
|
||||
|
||||
"rotr $t0, $t0, 16 \n" // |1|0|3|2|
|
||||
"rotr $t1, $t1, 16 \n" // |5|4|7|6|
|
||||
"rotr $t2, $t2, 16 \n" // |9|8|11|10|
|
||||
"rotr $t3, $t3, 16 \n" // |13|12|15|14|
|
||||
"rotr $t4, $t4, 16 \n" // |17|16|19|18|
|
||||
"rotr $t6, $t6, 16 \n" // |21|20|23|22|
|
||||
"rotr $t7, $t7, 16 \n" // |25|24|27|26|
|
||||
"rotr $t8, $t8, 16 \n" // |29|28|31|30|
|
||||
"precr.qb.ph $t9, $t0, $t1 \n" // |0|2|4|6|
|
||||
"precrq.qb.ph $t5, $t0, $t1 \n" // |1|3|5|7|
|
||||
"precr.qb.ph $t0, $t2, $t3 \n" // |8|10|12|14|
|
||||
"precrq.qb.ph $t1, $t2, $t3 \n" // |9|11|13|15|
|
||||
"precr.qb.ph $t2, $t4, $t6 \n" // |16|18|20|22|
|
||||
"precrq.qb.ph $t3, $t4, $t6 \n" // |17|19|21|23|
|
||||
"precr.qb.ph $t4, $t7, $t8 \n" // |24|26|28|30|
|
||||
"precrq.qb.ph $t6, $t7, $t8 \n" // |25|27|29|31|
|
||||
"addiu %[src_uv], %[src_uv], -32 \n"
|
||||
"addiu %[x], %[x], -1 \n"
|
||||
"swr $t4, 0(%[dst_u]) \n"
|
||||
"swl $t4, 3(%[dst_u]) \n" // |30|28|26|24|
|
||||
"swr $t6, 0(%[dst_v]) \n"
|
||||
"swl $t6, 3(%[dst_v]) \n" // |31|29|27|25|
|
||||
"swr $t2, 4(%[dst_u]) \n"
|
||||
"swl $t2, 7(%[dst_u]) \n" // |22|20|18|16|
|
||||
"swr $t3, 4(%[dst_v]) \n"
|
||||
"swl $t3, 7(%[dst_v]) \n" // |23|21|19|17|
|
||||
"swr $t0, 8(%[dst_u]) \n"
|
||||
"swl $t0, 11(%[dst_u]) \n" // |14|12|10|8|
|
||||
"swr $t1, 8(%[dst_v]) \n"
|
||||
"swl $t1, 11(%[dst_v]) \n" // |15|13|11|9|
|
||||
"swr $t9, 12(%[dst_u]) \n"
|
||||
"swl $t9, 15(%[dst_u]) \n" // |6|4|2|0|
|
||||
"swr $t5, 12(%[dst_v]) \n"
|
||||
"swl $t5, 15(%[dst_v]) \n" // |7|5|3|1|
|
||||
"addiu %[dst_v], %[dst_v], 16 \n"
|
||||
"bgtz %[x], 1b \n"
|
||||
" addiu %[dst_u], %[dst_u], 16 \n"
|
||||
"beqz %[y], 3f \n"
|
||||
" nop \n"
|
||||
"b 2f \n"
|
||||
" nop \n"
|
||||
|
||||
"2: \n"
|
||||
"lbu $t0, -2(%[src_uv]) \n"
|
||||
"lbu $t1, -1(%[src_uv]) \n"
|
||||
"addiu %[src_uv], %[src_uv], -2 \n"
|
||||
"addiu %[y], %[y], -1 \n"
|
||||
"sb $t0, 0(%[dst_u]) \n"
|
||||
"sb $t1, 0(%[dst_v]) \n"
|
||||
"addiu %[dst_u], %[dst_u], 1 \n"
|
||||
"bgtz %[y], 2b \n"
|
||||
" addiu %[dst_v], %[dst_v], 1 \n"
|
||||
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_uv] "+r" (src_uv),
|
||||
[dst_u] "+r" (dst_u),
|
||||
[dst_v] "+r" (dst_v),
|
||||
[x] "=&r" (x),
|
||||
[y] "=&r" (y)
|
||||
: [width] "r" (width)
|
||||
: "t0", "t1", "t2", "t3", "t4",
|
||||
"t5", "t7", "t8", "t9"
|
||||
);
|
||||
}
|
||||
|
||||
// Convert (4 Y and 2 VU) I422 and arrange RGB values into
|
||||
// t5 = | 0 | B0 | 0 | b0 |
|
||||
// t4 = | 0 | B1 | 0 | b1 |
|
||||
// t9 = | 0 | G0 | 0 | g0 |
|
||||
// t8 = | 0 | G1 | 0 | g1 |
|
||||
// t2 = | 0 | R0 | 0 | r0 |
|
||||
// t1 = | 0 | R1 | 0 | r1 |
|
||||
#define YUVTORGB \
|
||||
"lw $t0, 0(%[y_buf]) \n" \
|
||||
"lhu $t1, 0(%[u_buf]) \n" \
|
||||
"lhu $t2, 0(%[v_buf]) \n" \
|
||||
"preceu.ph.qbr $t1, $t1 \n" \
|
||||
"preceu.ph.qbr $t2, $t2 \n" \
|
||||
"preceu.ph.qbra $t3, $t0 \n" \
|
||||
"preceu.ph.qbla $t0, $t0 \n" \
|
||||
"subu.ph $t1, $t1, $s5 \n" \
|
||||
"subu.ph $t2, $t2, $s5 \n" \
|
||||
"subu.ph $t3, $t3, $s4 \n" \
|
||||
"subu.ph $t0, $t0, $s4 \n" \
|
||||
"mul.ph $t3, $t3, $s0 \n" \
|
||||
"mul.ph $t0, $t0, $s0 \n" \
|
||||
"shll.ph $t4, $t1, 0x7 \n" \
|
||||
"subu.ph $t4, $t4, $t1 \n" \
|
||||
"mul.ph $t6, $t1, $s1 \n" \
|
||||
"mul.ph $t1, $t2, $s2 \n" \
|
||||
"addq_s.ph $t5, $t4, $t3 \n" \
|
||||
"addq_s.ph $t4, $t4, $t0 \n" \
|
||||
"shra.ph $t5, $t5, 6 \n" \
|
||||
"shra.ph $t4, $t4, 6 \n" \
|
||||
"addiu %[u_buf], 2 \n" \
|
||||
"addiu %[v_buf], 2 \n" \
|
||||
"addu.ph $t6, $t6, $t1 \n" \
|
||||
"mul.ph $t1, $t2, $s3 \n" \
|
||||
"addu.ph $t9, $t6, $t3 \n" \
|
||||
"addu.ph $t8, $t6, $t0 \n" \
|
||||
"shra.ph $t9, $t9, 6 \n" \
|
||||
"shra.ph $t8, $t8, 6 \n" \
|
||||
"addu.ph $t2, $t1, $t3 \n" \
|
||||
"addu.ph $t1, $t1, $t0 \n" \
|
||||
"shra.ph $t2, $t2, 6 \n" \
|
||||
"shra.ph $t1, $t1, 6 \n" \
|
||||
"subu.ph $t5, $t5, $s5 \n" \
|
||||
"subu.ph $t4, $t4, $s5 \n" \
|
||||
"subu.ph $t9, $t9, $s5 \n" \
|
||||
"subu.ph $t8, $t8, $s5 \n" \
|
||||
"subu.ph $t2, $t2, $s5 \n" \
|
||||
"subu.ph $t1, $t1, $s5 \n" \
|
||||
"shll_s.ph $t5, $t5, 8 \n" \
|
||||
"shll_s.ph $t4, $t4, 8 \n" \
|
||||
"shll_s.ph $t9, $t9, 8 \n" \
|
||||
"shll_s.ph $t8, $t8, 8 \n" \
|
||||
"shll_s.ph $t2, $t2, 8 \n" \
|
||||
"shll_s.ph $t1, $t1, 8 \n" \
|
||||
"shra.ph $t5, $t5, 8 \n" \
|
||||
"shra.ph $t4, $t4, 8 \n" \
|
||||
"shra.ph $t9, $t9, 8 \n" \
|
||||
"shra.ph $t8, $t8, 8 \n" \
|
||||
"shra.ph $t2, $t2, 8 \n" \
|
||||
"shra.ph $t1, $t1, 8 \n" \
|
||||
"addu.ph $t5, $t5, $s5 \n" \
|
||||
"addu.ph $t4, $t4, $s5 \n" \
|
||||
"addu.ph $t9, $t9, $s5 \n" \
|
||||
"addu.ph $t8, $t8, $s5 \n" \
|
||||
"addu.ph $t2, $t2, $s5 \n" \
|
||||
"addu.ph $t1, $t1, $s5 \n"
|
||||
|
||||
// TODO(fbarchard): accept yuv conversion constants.
|
||||
void I422ToARGBRow_DSPR2(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
const struct YuvConstants* yuvconstants,
|
||||
int width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"beqz %[width], 2f \n"
|
||||
" repl.ph $s0, 74 \n" // |YG|YG| = |74|74|
|
||||
"repl.ph $s1, -25 \n" // |UG|UG| = |-25|-25|
|
||||
"repl.ph $s2, -52 \n" // |VG|VG| = |-52|-52|
|
||||
"repl.ph $s3, 102 \n" // |VR|VR| = |102|102|
|
||||
"repl.ph $s4, 16 \n" // |0|16|0|16|
|
||||
"repl.ph $s5, 128 \n" // |128|128| // clipping
|
||||
"lui $s6, 0xff00 \n"
|
||||
"ori $s6, 0xff00 \n" // |ff|00|ff|00|ff|
|
||||
|
||||
"1: \n"
|
||||
YUVTORGB
|
||||
// Arranging into argb format
|
||||
"precr.qb.ph $t4, $t8, $t4 \n" // |G1|g1|B1|b1|
|
||||
"precr.qb.ph $t5, $t9, $t5 \n" // |G0|g0|B0|b0|
|
||||
"addiu %[width], -4 \n"
|
||||
"precrq.qb.ph $t8, $t4, $t5 \n" // |G1|B1|G0|B0|
|
||||
"precr.qb.ph $t9, $t4, $t5 \n" // |g1|b1|g0|b0|
|
||||
"precr.qb.ph $t2, $t1, $t2 \n" // |R1|r1|R0|r0|
|
||||
|
||||
"addiu %[y_buf], 4 \n"
|
||||
"preceu.ph.qbla $t1, $t2 \n" // |0 |R1|0 |R0|
|
||||
"preceu.ph.qbra $t2, $t2 \n" // |0 |r1|0 |r0|
|
||||
"or $t1, $t1, $s6 \n" // |ff|R1|ff|R0|
|
||||
"or $t2, $t2, $s6 \n" // |ff|r1|ff|r0|
|
||||
"precrq.ph.w $t0, $t2, $t9 \n" // |ff|r1|g1|b1|
|
||||
"precrq.ph.w $t3, $t1, $t8 \n" // |ff|R1|G1|B1|
|
||||
"sll $t9, $t9, 16 \n"
|
||||
"sll $t8, $t8, 16 \n"
|
||||
"packrl.ph $t2, $t2, $t9 \n" // |ff|r0|g0|b0|
|
||||
"packrl.ph $t1, $t1, $t8 \n" // |ff|R0|G0|B0|
|
||||
// Store results.
|
||||
"sw $t2, 0(%[rgb_buf]) \n"
|
||||
"sw $t0, 4(%[rgb_buf]) \n"
|
||||
"sw $t1, 8(%[rgb_buf]) \n"
|
||||
"sw $t3, 12(%[rgb_buf]) \n"
|
||||
"bnez %[width], 1b \n"
|
||||
" addiu %[rgb_buf], 16 \n"
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
:[y_buf] "+r" (y_buf),
|
||||
[u_buf] "+r" (u_buf),
|
||||
[v_buf] "+r" (v_buf),
|
||||
[width] "+r" (width),
|
||||
[rgb_buf] "+r" (rgb_buf)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9",
|
||||
"s0", "s1", "s2", "s3",
|
||||
"s4", "s5", "s6"
|
||||
);
|
||||
}
|
||||
|
||||
// Bilinear filter 8x2 -> 8x1
|
||||
void InterpolateRow_DSPR2(uint8* dst_ptr, const uint8* src_ptr,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) {
|
||||
int y0_fraction = 256 - source_y_fraction;
|
||||
const uint8* src_ptr1 = src_ptr + src_stride;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"replv.ph $t0, %[y0_fraction] \n"
|
||||
"replv.ph $t1, %[source_y_fraction] \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t2, 0(%[src_ptr]) \n"
|
||||
"lw $t3, 0(%[src_ptr1]) \n"
|
||||
"lw $t4, 4(%[src_ptr]) \n"
|
||||
"lw $t5, 4(%[src_ptr1]) \n"
|
||||
"muleu_s.ph.qbl $t6, $t2, $t0 \n"
|
||||
"muleu_s.ph.qbr $t7, $t2, $t0 \n"
|
||||
"muleu_s.ph.qbl $t8, $t3, $t1 \n"
|
||||
"muleu_s.ph.qbr $t9, $t3, $t1 \n"
|
||||
"muleu_s.ph.qbl $t2, $t4, $t0 \n"
|
||||
"muleu_s.ph.qbr $t3, $t4, $t0 \n"
|
||||
"muleu_s.ph.qbl $t4, $t5, $t1 \n"
|
||||
"muleu_s.ph.qbr $t5, $t5, $t1 \n"
|
||||
"addq.ph $t6, $t6, $t8 \n"
|
||||
"addq.ph $t7, $t7, $t9 \n"
|
||||
"addq.ph $t2, $t2, $t4 \n"
|
||||
"addq.ph $t3, $t3, $t5 \n"
|
||||
"shra.ph $t6, $t6, 8 \n"
|
||||
"shra.ph $t7, $t7, 8 \n"
|
||||
"shra.ph $t2, $t2, 8 \n"
|
||||
"shra.ph $t3, $t3, 8 \n"
|
||||
"precr.qb.ph $t6, $t6, $t7 \n"
|
||||
"precr.qb.ph $t2, $t2, $t3 \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 8 \n"
|
||||
"addiu %[src_ptr1], %[src_ptr1], 8 \n"
|
||||
"addiu %[dst_width], %[dst_width], -8 \n"
|
||||
"sw $t6, 0(%[dst_ptr]) \n"
|
||||
"sw $t2, 4(%[dst_ptr]) \n"
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" addiu %[dst_ptr], %[dst_ptr], 8 \n"
|
||||
|
||||
".set pop \n"
|
||||
: [dst_ptr] "+r" (dst_ptr),
|
||||
[src_ptr1] "+r" (src_ptr1),
|
||||
[src_ptr] "+r" (src_ptr),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [source_y_fraction] "r" (source_y_fraction),
|
||||
[y0_fraction] "r" (y0_fraction),
|
||||
[src_stride] "r" (src_stride)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9"
|
||||
);
|
||||
}
|
||||
#endif // __mips_dsp_rev >= 2
|
||||
|
||||
#endif // defined(__mips__)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
File diff suppressed because it is too large
Load Diff
+3455
-4
File diff suppressed because it is too large
Load Diff
+2112
-2262
File diff suppressed because it is too large
Load Diff
+2111
-2036
File diff suppressed because it is too large
Load Diff
+1954
-1989
File diff suppressed because it is too large
Load Diff
+599
-449
File diff suppressed because it is too large
Load Diff
+477
-123
@@ -8,6 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <string.h> // For memset/memcpy
|
||||
|
||||
#include "libyuv/scale.h"
|
||||
#include "libyuv/scale_row.h"
|
||||
|
||||
@@ -20,184 +22,533 @@ extern "C" {
|
||||
|
||||
// Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols
|
||||
#define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \
|
||||
void NAMEANY(uint8* dst_ptr, const uint8* src_ptr, \
|
||||
int dst_width, int x, int dx) { \
|
||||
int n = dst_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
|
||||
} \
|
||||
TERP_C(dst_ptr + n * BPP, src_ptr, \
|
||||
dst_width & MASK, x + n * dx, dx); \
|
||||
}
|
||||
void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \
|
||||
int dx) { \
|
||||
int r = dst_width & MASK; \
|
||||
int n = dst_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \
|
||||
} \
|
||||
TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEFILTERCOLS_NEON
|
||||
CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEFILTERCOLS_MSA
|
||||
CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBCOLS_NEON
|
||||
CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBCOLS_MSA
|
||||
CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBCOLS_MMI
|
||||
CANY(ScaleARGBCols_Any_MMI, ScaleARGBCols_MMI, ScaleARGBCols_C, 4, 0)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBFILTERCOLS_NEON
|
||||
CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON,
|
||||
ScaleARGBFilterCols_C, 4, 3)
|
||||
CANY(ScaleARGBFilterCols_Any_NEON,
|
||||
ScaleARGBFilterCols_NEON,
|
||||
ScaleARGBFilterCols_C,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBFILTERCOLS_MSA
|
||||
CANY(ScaleARGBFilterCols_Any_MSA,
|
||||
ScaleARGBFilterCols_MSA,
|
||||
ScaleARGBFilterCols_C,
|
||||
4,
|
||||
7)
|
||||
#endif
|
||||
#undef CANY
|
||||
|
||||
// Fixed scale down.
|
||||
// Mask may be non-power of 2, so use MOD
|
||||
#define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
|
||||
uint8* dst_ptr, int dst_width) { \
|
||||
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
|
||||
int dst_width) { \
|
||||
int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
// Fixed scale down for odd source width. Used by I420Blend subsampling.
|
||||
// Since dst_width is (width + 1) / 2, this function scales one less pixel
|
||||
// and copies the last pixel.
|
||||
#define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, \
|
||||
uint8* dst_ptr, int dst_width) { \
|
||||
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \
|
||||
int dst_width) { \
|
||||
int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \
|
||||
int n = (dst_width - 1) - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \
|
||||
dst_ptr + n * BPP, r + 1); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEROWDOWN2_SSSE3
|
||||
SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3,
|
||||
ScaleRowDown2Linear_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C,
|
||||
2, 1, 15)
|
||||
SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_Odd_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_SSSE3,
|
||||
ScaleRowDown2Linear_SSSE3,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
SDANY(ScaleRowDown2Box_Any_SSSE3,
|
||||
ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
SDODD(ScaleRowDown2Box_Odd_SSSE3,
|
||||
ScaleRowDown2Box_SSSE3,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_AVX2
|
||||
SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31)
|
||||
SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2,
|
||||
ScaleRowDown2Linear_C, 2, 1, 31)
|
||||
SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C,
|
||||
2, 1, 31)
|
||||
SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C,
|
||||
2, 1, 31)
|
||||
SDANY(ScaleRowDown2Linear_Any_AVX2,
|
||||
ScaleRowDown2Linear_AVX2,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
SDANY(ScaleRowDown2Box_Any_AVX2,
|
||||
ScaleRowDown2Box_AVX2,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
SDODD(ScaleRowDown2Box_Odd_AVX2,
|
||||
ScaleRowDown2Box_AVX2,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_NEON
|
||||
SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON,
|
||||
ScaleRowDown2Linear_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_C, 2, 1, 15)
|
||||
SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_Odd_C, 2, 1, 15)
|
||||
SDANY(ScaleRowDown2Linear_Any_NEON,
|
||||
ScaleRowDown2Linear_NEON,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
SDANY(ScaleRowDown2Box_Any_NEON,
|
||||
ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
SDODD(ScaleRowDown2Box_Odd_NEON,
|
||||
ScaleRowDown2Box_NEON,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_MSA
|
||||
SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31)
|
||||
SDANY(ScaleRowDown2Linear_Any_MSA,
|
||||
ScaleRowDown2Linear_MSA,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
SDANY(ScaleRowDown2Box_Any_MSA,
|
||||
ScaleRowDown2Box_MSA,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
31)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN2_MMI
|
||||
SDANY(ScaleRowDown2_Any_MMI, ScaleRowDown2_MMI, ScaleRowDown2_C, 2, 1, 7)
|
||||
SDANY(ScaleRowDown2Linear_Any_MMI,
|
||||
ScaleRowDown2Linear_MMI,
|
||||
ScaleRowDown2Linear_C,
|
||||
2,
|
||||
1,
|
||||
7)
|
||||
SDANY(ScaleRowDown2Box_Any_MMI,
|
||||
ScaleRowDown2Box_MMI,
|
||||
ScaleRowDown2Box_C,
|
||||
2,
|
||||
1,
|
||||
7)
|
||||
SDODD(ScaleRowDown2Box_Odd_MMI,
|
||||
ScaleRowDown2Box_MMI,
|
||||
ScaleRowDown2Box_Odd_C,
|
||||
2,
|
||||
1,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_SSSE3
|
||||
SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C,
|
||||
4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_SSSE3,
|
||||
ScaleRowDown4Box_SSSE3,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_AVX2
|
||||
SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15)
|
||||
SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C,
|
||||
4, 1, 15)
|
||||
SDANY(ScaleRowDown4Box_Any_AVX2,
|
||||
ScaleRowDown4Box_AVX2,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_NEON
|
||||
SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C,
|
||||
4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_NEON,
|
||||
ScaleRowDown4Box_NEON,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_MSA
|
||||
SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15)
|
||||
SDANY(ScaleRowDown4Box_Any_MSA,
|
||||
ScaleRowDown4Box_MSA,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN4_MMI
|
||||
SDANY(ScaleRowDown4_Any_MMI, ScaleRowDown4_MMI, ScaleRowDown4_C, 4, 1, 7)
|
||||
SDANY(ScaleRowDown4Box_Any_MMI,
|
||||
ScaleRowDown4Box_MMI,
|
||||
ScaleRowDown4Box_C,
|
||||
4,
|
||||
1,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN34_SSSE3
|
||||
SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3,
|
||||
ScaleRowDown34_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3,
|
||||
ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3,
|
||||
ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_Any_SSSE3,
|
||||
ScaleRowDown34_SSSE3,
|
||||
ScaleRowDown34_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_SSSE3,
|
||||
ScaleRowDown34_0_Box_SSSE3,
|
||||
ScaleRowDown34_0_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_SSSE3,
|
||||
ScaleRowDown34_1_Box_SSSE3,
|
||||
ScaleRowDown34_1_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN34_NEON
|
||||
SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON,
|
||||
ScaleRowDown34_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON,
|
||||
ScaleRowDown34_0_Box_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON,
|
||||
ScaleRowDown34_1_Box_C, 4 / 3, 1, 23)
|
||||
SDANY(ScaleRowDown34_Any_NEON,
|
||||
ScaleRowDown34_NEON,
|
||||
ScaleRowDown34_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_NEON,
|
||||
ScaleRowDown34_0_Box_NEON,
|
||||
ScaleRowDown34_0_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_NEON,
|
||||
ScaleRowDown34_1_Box_NEON,
|
||||
ScaleRowDown34_1_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
23)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN34_MSA
|
||||
SDANY(ScaleRowDown34_Any_MSA,
|
||||
ScaleRowDown34_MSA,
|
||||
ScaleRowDown34_C,
|
||||
4 / 3,
|
||||
1,
|
||||
47)
|
||||
SDANY(ScaleRowDown34_0_Box_Any_MSA,
|
||||
ScaleRowDown34_0_Box_MSA,
|
||||
ScaleRowDown34_0_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
47)
|
||||
SDANY(ScaleRowDown34_1_Box_Any_MSA,
|
||||
ScaleRowDown34_1_Box_MSA,
|
||||
ScaleRowDown34_1_Box_C,
|
||||
4 / 3,
|
||||
1,
|
||||
47)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN38_SSSE3
|
||||
SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3,
|
||||
ScaleRowDown38_C, 8 / 3, 1, 11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3,
|
||||
ScaleRowDown38_3_Box_C, 8 / 3, 1, 5)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3,
|
||||
ScaleRowDown38_2_Box_C, 8 / 3, 1, 5)
|
||||
SDANY(ScaleRowDown38_Any_SSSE3,
|
||||
ScaleRowDown38_SSSE3,
|
||||
ScaleRowDown38_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_SSSE3,
|
||||
ScaleRowDown38_3_Box_SSSE3,
|
||||
ScaleRowDown38_3_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
5)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_SSSE3,
|
||||
ScaleRowDown38_2_Box_SSSE3,
|
||||
ScaleRowDown38_2_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
5)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN38_NEON
|
||||
SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON,
|
||||
ScaleRowDown38_C, 8 / 3, 1, 11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON,
|
||||
ScaleRowDown38_3_Box_C, 8 / 3, 1, 11)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON,
|
||||
ScaleRowDown38_2_Box_C, 8 / 3, 1, 11)
|
||||
SDANY(ScaleRowDown38_Any_NEON,
|
||||
ScaleRowDown38_NEON,
|
||||
ScaleRowDown38_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_NEON,
|
||||
ScaleRowDown38_3_Box_NEON,
|
||||
ScaleRowDown38_3_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_NEON,
|
||||
ScaleRowDown38_2_Box_NEON,
|
||||
ScaleRowDown38_2_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
#endif
|
||||
#ifdef HAS_SCALEROWDOWN38_MSA
|
||||
SDANY(ScaleRowDown38_Any_MSA,
|
||||
ScaleRowDown38_MSA,
|
||||
ScaleRowDown38_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
SDANY(ScaleRowDown38_3_Box_Any_MSA,
|
||||
ScaleRowDown38_3_Box_MSA,
|
||||
ScaleRowDown38_3_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
SDANY(ScaleRowDown38_2_Box_Any_MSA,
|
||||
ScaleRowDown38_2_Box_MSA,
|
||||
ScaleRowDown38_2_Box_C,
|
||||
8 / 3,
|
||||
1,
|
||||
11)
|
||||
#endif
|
||||
|
||||
#ifdef HAS_SCALEARGBROWDOWN2_SSE2
|
||||
SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2,
|
||||
ScaleARGBRowDown2_C, 2, 4, 3)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2,
|
||||
ScaleARGBRowDown2Linear_C, 2, 4, 3)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2,
|
||||
ScaleARGBRowDown2Box_C, 2, 4, 3)
|
||||
SDANY(ScaleARGBRowDown2_Any_SSE2,
|
||||
ScaleARGBRowDown2_SSE2,
|
||||
ScaleARGBRowDown2_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_SSE2,
|
||||
ScaleARGBRowDown2Linear_SSE2,
|
||||
ScaleARGBRowDown2Linear_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_SSE2,
|
||||
ScaleARGBRowDown2Box_SSE2,
|
||||
ScaleARGBRowDown2Box_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBROWDOWN2_NEON
|
||||
SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON,
|
||||
ScaleARGBRowDown2_C, 2, 4, 7)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON,
|
||||
ScaleARGBRowDown2Linear_C, 2, 4, 7)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON,
|
||||
ScaleARGBRowDown2Box_C, 2, 4, 7)
|
||||
SDANY(ScaleARGBRowDown2_Any_NEON,
|
||||
ScaleARGBRowDown2_NEON,
|
||||
ScaleARGBRowDown2_C,
|
||||
2,
|
||||
4,
|
||||
7)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_NEON,
|
||||
ScaleARGBRowDown2Linear_NEON,
|
||||
ScaleARGBRowDown2Linear_C,
|
||||
2,
|
||||
4,
|
||||
7)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_NEON,
|
||||
ScaleARGBRowDown2Box_NEON,
|
||||
ScaleARGBRowDown2Box_C,
|
||||
2,
|
||||
4,
|
||||
7)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBROWDOWN2_MSA
|
||||
SDANY(ScaleARGBRowDown2_Any_MSA,
|
||||
ScaleARGBRowDown2_MSA,
|
||||
ScaleARGBRowDown2_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_MSA,
|
||||
ScaleARGBRowDown2Linear_MSA,
|
||||
ScaleARGBRowDown2Linear_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_MSA,
|
||||
ScaleARGBRowDown2Box_MSA,
|
||||
ScaleARGBRowDown2Box_C,
|
||||
2,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBROWDOWN2_MMI
|
||||
SDANY(ScaleARGBRowDown2_Any_MMI,
|
||||
ScaleARGBRowDown2_MMI,
|
||||
ScaleARGBRowDown2_C,
|
||||
2,
|
||||
4,
|
||||
1)
|
||||
SDANY(ScaleARGBRowDown2Linear_Any_MMI,
|
||||
ScaleARGBRowDown2Linear_MMI,
|
||||
ScaleARGBRowDown2Linear_C,
|
||||
2,
|
||||
4,
|
||||
1)
|
||||
SDANY(ScaleARGBRowDown2Box_Any_MMI,
|
||||
ScaleARGBRowDown2Box_MMI,
|
||||
ScaleARGBRowDown2Box_C,
|
||||
2,
|
||||
4,
|
||||
1)
|
||||
#endif
|
||||
#undef SDANY
|
||||
|
||||
// Scale down by even scale factor.
|
||||
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, ptrdiff_t src_stride, int src_stepx, \
|
||||
uint8* dst_ptr, int dst_width) { \
|
||||
int r = (int)((unsigned int)dst_width % (MASK + 1)); \
|
||||
int n = dst_width - r; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, \
|
||||
src_stepx, dst_ptr + n * BPP, r); \
|
||||
}
|
||||
#define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \
|
||||
uint8_t* dst_ptr, int dst_width) { \
|
||||
int r = dst_width & MASK; \
|
||||
int n = dst_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \
|
||||
} \
|
||||
SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \
|
||||
dst_ptr + n * BPP, r); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2
|
||||
SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2,
|
||||
ScaleARGBRowDownEven_C, 4, 3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2,
|
||||
ScaleARGBRowDownEvenBox_C, 4, 3)
|
||||
SDAANY(ScaleARGBRowDownEven_Any_SSE2,
|
||||
ScaleARGBRowDownEven_SSE2,
|
||||
ScaleARGBRowDownEven_C,
|
||||
4,
|
||||
3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2,
|
||||
ScaleARGBRowDownEvenBox_SSE2,
|
||||
ScaleARGBRowDownEvenBox_C,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBROWDOWNEVEN_NEON
|
||||
SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON,
|
||||
ScaleARGBRowDownEven_C, 4, 3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON,
|
||||
ScaleARGBRowDownEvenBox_C, 4, 3)
|
||||
SDAANY(ScaleARGBRowDownEven_Any_NEON,
|
||||
ScaleARGBRowDownEven_NEON,
|
||||
ScaleARGBRowDownEven_C,
|
||||
4,
|
||||
3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_NEON,
|
||||
ScaleARGBRowDownEvenBox_NEON,
|
||||
ScaleARGBRowDownEvenBox_C,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBROWDOWNEVEN_MSA
|
||||
SDAANY(ScaleARGBRowDownEven_Any_MSA,
|
||||
ScaleARGBRowDownEven_MSA,
|
||||
ScaleARGBRowDownEven_C,
|
||||
4,
|
||||
3)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_MSA,
|
||||
ScaleARGBRowDownEvenBox_MSA,
|
||||
ScaleARGBRowDownEvenBox_C,
|
||||
4,
|
||||
3)
|
||||
#endif
|
||||
#ifdef HAS_SCALEARGBROWDOWNEVEN_MMI
|
||||
SDAANY(ScaleARGBRowDownEven_Any_MMI,
|
||||
ScaleARGBRowDownEven_MMI,
|
||||
ScaleARGBRowDownEven_C,
|
||||
4,
|
||||
1)
|
||||
SDAANY(ScaleARGBRowDownEvenBox_Any_MMI,
|
||||
ScaleARGBRowDownEvenBox_MMI,
|
||||
ScaleARGBRowDownEvenBox_C,
|
||||
4,
|
||||
1)
|
||||
#endif
|
||||
|
||||
#ifdef SASIMDONLY
|
||||
// This also works and uses memcpy and SIMD instead of C, but is slower on ARM
|
||||
|
||||
// Add rows box filter scale down. Using macro from row_any
|
||||
#define SAROW(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int width) { \
|
||||
SIMD_ALIGNED(uint16_t dst_temp[32]); \
|
||||
SIMD_ALIGNED(uint8_t src_temp[32]); \
|
||||
memset(dst_temp, 0, 32 * 2); /* for msan */ \
|
||||
int r = width & MASK; \
|
||||
int n = width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
ANY_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
memcpy(src_temp, src_ptr + n * SBPP, r * SBPP); \
|
||||
memcpy(dst_temp, dst_ptr + n * BPP, r * BPP); \
|
||||
ANY_SIMD(src_temp, dst_temp, MASK + 1); \
|
||||
memcpy(dst_ptr + n * BPP, dst_temp, r * BPP); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEADDROW_SSE2
|
||||
SAROW(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, 1, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_AVX2
|
||||
SAROW(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, 1, 2, 31)
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_NEON
|
||||
SAROW(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, 1, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_MSA
|
||||
SAROW(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, 1, 2, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_MMI
|
||||
SAROW(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, 1, 2, 7)
|
||||
#endif
|
||||
#undef SAANY
|
||||
|
||||
#else
|
||||
|
||||
// Add rows box filter scale down.
|
||||
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
|
||||
void NAMEANY(const uint8* src_ptr, uint16* dst_ptr, int src_width) { \
|
||||
int n = src_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
|
||||
}
|
||||
#define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \
|
||||
void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \
|
||||
int n = src_width & ~MASK; \
|
||||
if (n > 0) { \
|
||||
SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \
|
||||
} \
|
||||
SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \
|
||||
}
|
||||
|
||||
#ifdef HAS_SCALEADDROW_SSE2
|
||||
SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15)
|
||||
@@ -208,14 +559,17 @@ SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31)
|
||||
#ifdef HAS_SCALEADDROW_NEON
|
||||
SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_MSA
|
||||
SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15)
|
||||
#endif
|
||||
#ifdef HAS_SCALEADDROW_MMI
|
||||
SAANY(ScaleAddRow_Any_MMI, ScaleAddRow_MMI, ScaleAddRow_C, 7)
|
||||
#endif
|
||||
#undef SAANY
|
||||
|
||||
#endif // SASIMDONLY
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+435
-211
@@ -30,20 +30,31 @@ static __inline int Abs(int v) {
|
||||
// ScaleARGB ARGB, 1/2
|
||||
// This is an optimized version for scaling down a ARGB to 1/2 of
|
||||
// its original size.
|
||||
static void ScaleARGBDown2(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
static void ScaleARGBDown2(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) =
|
||||
filtering == kFilterNone ? ScaleARGBRowDown2_C :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C :
|
||||
ScaleARGBRowDown2Box_C);
|
||||
assert(dx == 65536 * 2); // Test scale factor of 2.
|
||||
void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
|
||||
uint8_t* dst_argb, int dst_width) =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_C
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C
|
||||
: ScaleARGBRowDown2Box_C);
|
||||
(void)src_width;
|
||||
(void)src_height;
|
||||
(void)dx;
|
||||
assert(dx == 65536 * 2); // Test scale factor of 2.
|
||||
assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2.
|
||||
// Advance to odd row, even column.
|
||||
if (filtering == kFilterBilinear) {
|
||||
@@ -54,25 +65,65 @@ static void ScaleARGBDown2(int src_width, int src_height,
|
||||
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 :
|
||||
ScaleARGBRowDown2Box_Any_SSE2);
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_Any_SSE2
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2
|
||||
: ScaleARGBRowDown2Box_Any_SSE2);
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 :
|
||||
ScaleARGBRowDown2Box_SSE2);
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_SSE2
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2
|
||||
: ScaleARGBRowDown2Box_SSE2);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON :
|
||||
ScaleARGBRowDown2Box_Any_NEON);
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_Any_NEON
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON
|
||||
: ScaleARGBRowDown2Box_Any_NEON);
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON :
|
||||
(filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON :
|
||||
ScaleARGBRowDown2Box_NEON);
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_NEON
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON
|
||||
: ScaleARGBRowDown2Box_NEON);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_Any_MSA
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA
|
||||
: ScaleARGBRowDown2Box_Any_MSA);
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_MSA
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA
|
||||
: ScaleARGBRowDown2Box_MSA);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_Any_MMI
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MMI
|
||||
: ScaleARGBRowDown2Box_Any_MMI);
|
||||
if (IS_ALIGNED(dst_width, 2)) {
|
||||
ScaleARGBRowDown2 =
|
||||
filtering == kFilterNone
|
||||
? ScaleARGBRowDown2_MMI
|
||||
: (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MMI
|
||||
: ScaleARGBRowDown2Box_MMI);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -90,21 +141,32 @@ static void ScaleARGBDown2(int src_width, int src_height,
|
||||
// ScaleARGB ARGB, 1/4
|
||||
// This is an optimized version for scaling down a ARGB to 1/4 of
|
||||
// its original size.
|
||||
static void ScaleARGBDown4Box(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
static void ScaleARGBDown4Box(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy) {
|
||||
int j;
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (dst_width * 2 * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
int row_stride = src_stride * (dy >> 16);
|
||||
void (*ScaleARGBRowDown2)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
uint8* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C;
|
||||
void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride,
|
||||
uint8_t* dst_argb, int dst_width) =
|
||||
ScaleARGBRowDown2Box_C;
|
||||
// Advance to odd row, even column.
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
assert(dx == 65536 * 4); // Test scale factor of 4.
|
||||
(void)src_width;
|
||||
(void)src_height;
|
||||
(void)dx;
|
||||
assert(dx == 65536 * 4); // Test scale factor of 4.
|
||||
assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4.
|
||||
#if defined(HAS_SCALEARGBROWDOWN2_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
@@ -125,8 +187,8 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2);
|
||||
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride,
|
||||
row + kRowSize, dst_width * 2);
|
||||
ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize,
|
||||
dst_width * 2);
|
||||
ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width);
|
||||
src_argb += row_stride;
|
||||
dst_argb += dst_stride;
|
||||
@@ -137,38 +199,67 @@ static void ScaleARGBDown4Box(int src_width, int src_height,
|
||||
// ScaleARGB ARGB Even
|
||||
// This is an optimized version for scaling down a ARGB to even
|
||||
// multiple of its original size.
|
||||
static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
static void ScaleARGBDownEven(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
int col_step = dx >> 16;
|
||||
int row_stride = (dy >> 16) * src_stride;
|
||||
void (*ScaleARGBRowDownEven)(const uint8* src_argb, ptrdiff_t src_stride,
|
||||
int src_step, uint8* dst_argb, int dst_width) =
|
||||
void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride,
|
||||
int src_step, uint8_t* dst_argb, int dst_width) =
|
||||
filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C;
|
||||
(void)src_width;
|
||||
(void)src_height;
|
||||
assert(IS_ALIGNED(src_width, 2));
|
||||
assert(IS_ALIGNED(src_height, 2));
|
||||
src_argb += (y >> 16) * src_stride + (x >> 16) * 4;
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 :
|
||||
ScaleARGBRowDownEven_Any_SSE2;
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2
|
||||
: ScaleARGBRowDownEven_Any_SSE2;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 :
|
||||
ScaleARGBRowDownEven_SSE2;
|
||||
ScaleARGBRowDownEven =
|
||||
filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_NEON)
|
||||
if (TestCpuFlag(kCpuHasNEON)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON :
|
||||
ScaleARGBRowDownEven_Any_NEON;
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON
|
||||
: ScaleARGBRowDownEven_Any_NEON;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON :
|
||||
ScaleARGBRowDownEven_NEON;
|
||||
ScaleARGBRowDownEven =
|
||||
filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA
|
||||
: ScaleARGBRowDownEven_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBRowDownEven =
|
||||
filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBROWDOWNEVEN_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MMI
|
||||
: ScaleARGBRowDownEven_Any_MMI;
|
||||
if (IS_ALIGNED(dst_width, 2)) {
|
||||
ScaleARGBRowDownEven =
|
||||
filtering ? ScaleARGBRowDownEvenBox_MMI : ScaleARGBRowDownEven_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -184,25 +275,32 @@ static void ScaleARGBDownEven(int src_width, int src_height,
|
||||
}
|
||||
|
||||
// Scale ARGB down with bilinear interpolation.
|
||||
static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
static void ScaleARGBBilinearDown(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C;
|
||||
int64 xlast = x + (int64)(dst_width - 1) * dx;
|
||||
int64 xl = (dx >= 0) ? x : xlast;
|
||||
int64 xr = (dx >= 0) ? xlast : x;
|
||||
int64_t xlast = x + (int64_t)(dst_width - 1) * dx;
|
||||
int64_t xl = (dx >= 0) ? x : xlast;
|
||||
int64_t xr = (dx >= 0) ? xlast : x;
|
||||
int clip_src_width;
|
||||
xl = (xl >> 16) & ~3; // Left edge aligned.
|
||||
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
|
||||
xl = (xl >> 16) & ~3; // Left edge aligned.
|
||||
xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels.
|
||||
xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel.
|
||||
if (xr > src_width) {
|
||||
xr = src_width;
|
||||
@@ -234,12 +332,11 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(src_argb, 4) && IS_ALIGNED(src_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_Any_DSPR2;
|
||||
if (IS_ALIGNED(clip_src_width, 4)) {
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
#if defined(HAS_INTERPOLATEROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
InterpolateRow = InterpolateRow_Any_MSA;
|
||||
if (IS_ALIGNED(clip_src_width, 32)) {
|
||||
InterpolateRow = InterpolateRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
@@ -255,6 +352,14 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
// TODO(fbarchard): Consider not allocating row buffer for kFilterLinear.
|
||||
// Allocate a row of ARGB.
|
||||
@@ -267,7 +372,7 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
}
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
int yi = y >> 16;
|
||||
const uint8* src = src_argb + yi * src_stride;
|
||||
const uint8_t* src = src_argb + yi * src_stride;
|
||||
if (filtering == kFilterLinear) {
|
||||
ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx);
|
||||
} else {
|
||||
@@ -286,18 +391,25 @@ static void ScaleARGBBilinearDown(int src_width, int src_height,
|
||||
}
|
||||
|
||||
// Scale ARGB up with bilinear interpolation.
|
||||
static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
static void ScaleARGBBilinearUp(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
const int max_y = (src_height - 1) << 16;
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
@@ -324,15 +436,25 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride, 4)) {
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
#if defined(HAS_INTERPOLATEROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
InterpolateRow = InterpolateRow_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
InterpolateRow = InterpolateRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
InterpolateRow = InterpolateRow_Any_MMI;
|
||||
if (IS_ALIGNED(dst_width, 2)) {
|
||||
InterpolateRow = InterpolateRow_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (src_width >= 32768) {
|
||||
ScaleARGBFilterCols = filtering ?
|
||||
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
ScaleARGBFilterCols =
|
||||
filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
}
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
||||
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
@@ -347,6 +469,14 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
|
||||
if (filtering && TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
|
||||
@@ -359,6 +489,22 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
ScaleARGBFilterCols = ScaleARGBCols_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_MSA)
|
||||
if (!filtering && TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_MMI)
|
||||
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
|
||||
if (IS_ALIGNED(dst_width, 1)) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
|
||||
@@ -366,6 +512,11 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -375,13 +526,13 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
|
||||
{
|
||||
int yi = y >> 16;
|
||||
const uint8* src = src_argb + yi * src_stride;
|
||||
const uint8_t* src = src_argb + yi * src_stride;
|
||||
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (dst_width * 4 + 31) & ~31;
|
||||
align_buffer_64(row, kRowSize * 2);
|
||||
|
||||
uint8* rowptr = row;
|
||||
uint8_t* rowptr = row;
|
||||
int rowstride = kRowSize;
|
||||
int lasty = yi;
|
||||
|
||||
@@ -423,24 +574,27 @@ static void ScaleARGBBilinearUp(int src_width, int src_height,
|
||||
|
||||
#ifdef YUVSCALEUP
|
||||
// Scale YUV to ARGB up with bilinear interpolation.
|
||||
static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
static void ScaleYUVToARGBBilinearUp(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride_y,
|
||||
int src_stride_u,
|
||||
int src_stride_v,
|
||||
int dst_stride_argb,
|
||||
const uint8* src_y,
|
||||
const uint8* src_u,
|
||||
const uint8* src_v,
|
||||
uint8* dst_argb,
|
||||
int x, int dx, int y, int dy,
|
||||
const uint8_t* src_y,
|
||||
const uint8_t* src_u,
|
||||
const uint8_t* src_v,
|
||||
uint8_t* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy,
|
||||
enum FilterMode filtering) {
|
||||
int j;
|
||||
void (*I422ToARGBRow)(const uint8* y_buf,
|
||||
const uint8* u_buf,
|
||||
const uint8* v_buf,
|
||||
uint8* rgb_buf,
|
||||
int width) = I422ToARGBRow_C;
|
||||
void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf,
|
||||
const uint8_t* v_buf, uint8_t* rgb_buf, int width) =
|
||||
I422ToARGBRow_C;
|
||||
#if defined(HAS_I422TOARGBROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_SSSE3;
|
||||
@@ -465,19 +619,18 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_I422TOARGBROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) && IS_ALIGNED(src_width, 4) &&
|
||||
IS_ALIGNED(src_y, 4) && IS_ALIGNED(src_stride_y, 4) &&
|
||||
IS_ALIGNED(src_u, 2) && IS_ALIGNED(src_stride_u, 2) &&
|
||||
IS_ALIGNED(src_v, 2) && IS_ALIGNED(src_stride_v, 2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
I422ToARGBRow = I422ToARGBRow_DSPR2;
|
||||
#if defined(HAS_I422TOARGBROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
I422ToARGBRow = I422ToARGBRow_Any_MSA;
|
||||
if (IS_ALIGNED(src_width, 8)) {
|
||||
I422ToARGBRow = I422ToARGBRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*InterpolateRow)(uint8* dst_argb, const uint8* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width, int source_y_fraction) =
|
||||
InterpolateRow_C;
|
||||
void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
ptrdiff_t src_stride, int dst_width,
|
||||
int source_y_fraction) = InterpolateRow_C;
|
||||
#if defined(HAS_INTERPOLATEROW_SSSE3)
|
||||
if (TestCpuFlag(kCpuHasSSSE3)) {
|
||||
InterpolateRow = InterpolateRow_Any_SSSE3;
|
||||
@@ -502,19 +655,21 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_INTERPOLATEROW_DSPR2)
|
||||
if (TestCpuFlag(kCpuHasDSPR2) &&
|
||||
IS_ALIGNED(dst_argb, 4) && IS_ALIGNED(dst_stride_argb, 4)) {
|
||||
InterpolateRow = InterpolateRow_DSPR2;
|
||||
#if defined(HAS_INTERPOLATEROW_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
InterpolateRow = InterpolateRow_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
InterpolateRow = InterpolateRow_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void (*ScaleARGBFilterCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C;
|
||||
if (src_width >= 32768) {
|
||||
ScaleARGBFilterCols = filtering ?
|
||||
ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
ScaleARGBFilterCols =
|
||||
filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C;
|
||||
}
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_SSSE3)
|
||||
if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) {
|
||||
@@ -529,6 +684,14 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBFILTERCOLS_MSA)
|
||||
if (filtering && TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBFilterCols = ScaleARGBFilterCols_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||
if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_SSE2;
|
||||
@@ -541,6 +704,22 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
ScaleARGBFilterCols = ScaleARGBCols_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_MSA)
|
||||
if (!filtering && TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_MMI)
|
||||
if (!filtering && TestCpuFlag(kCpuHasMMI)) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_Any_MMI;
|
||||
if (IS_ALIGNED(dst_width, 1)) {
|
||||
ScaleARGBFilterCols = ScaleARGBCols_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (!filtering && src_width * 2 == dst_width && x < 0x8000) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_C;
|
||||
@@ -548,6 +727,11 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBFilterCols = ScaleARGBColsUp2_MMI;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -558,9 +742,9 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate.
|
||||
int yi = y >> 16;
|
||||
int uv_yi = yi >> kYShift;
|
||||
const uint8* src_row_y = src_y + yi * src_stride_y;
|
||||
const uint8* src_row_u = src_u + uv_yi * src_stride_u;
|
||||
const uint8* src_row_v = src_v + uv_yi * src_stride_v;
|
||||
const uint8_t* src_row_y = src_y + yi * src_stride_y;
|
||||
const uint8_t* src_row_u = src_u + uv_yi * src_stride_u;
|
||||
const uint8_t* src_row_v = src_v + uv_yi * src_stride_v;
|
||||
|
||||
// Allocate 2 rows of ARGB.
|
||||
const int kRowSize = (dst_width * 4 + 31) & ~31;
|
||||
@@ -569,7 +753,7 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
// Allocate 1 row of ARGB for source conversion.
|
||||
align_buffer_64(argb_row, src_width * 4);
|
||||
|
||||
uint8* rowptr = row;
|
||||
uint8_t* rowptr = row;
|
||||
int rowstride = kRowSize;
|
||||
int lasty = yi;
|
||||
|
||||
@@ -635,15 +819,23 @@ static void ScaleYUVToARGBBilinearUp(int src_width, int src_height,
|
||||
// of x and dx is the integer part of the source position and
|
||||
// the lower 16 bits are the fixed decimal part.
|
||||
|
||||
static void ScaleARGBSimple(int src_width, int src_height,
|
||||
int dst_width, int dst_height,
|
||||
int src_stride, int dst_stride,
|
||||
const uint8* src_argb, uint8* dst_argb,
|
||||
int x, int dx, int y, int dy) {
|
||||
static void ScaleARGBSimple(int src_width,
|
||||
int src_height,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int src_stride,
|
||||
int dst_stride,
|
||||
const uint8_t* src_argb,
|
||||
uint8_t* dst_argb,
|
||||
int x,
|
||||
int dx,
|
||||
int y,
|
||||
int dy) {
|
||||
int j;
|
||||
void (*ScaleARGBCols)(uint8* dst_argb, const uint8* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb,
|
||||
int dst_width, int x, int dx) =
|
||||
(src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C;
|
||||
(void)src_height;
|
||||
#if defined(HAS_SCALEARGBCOLS_SSE2)
|
||||
if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) {
|
||||
ScaleARGBCols = ScaleARGBCols_SSE2;
|
||||
@@ -656,6 +848,22 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
||||
ScaleARGBCols = ScaleARGBCols_NEON;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_MSA)
|
||||
if (TestCpuFlag(kCpuHasMSA)) {
|
||||
ScaleARGBCols = ScaleARGBCols_Any_MSA;
|
||||
if (IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBCols = ScaleARGBCols_MSA;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLS_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI)) {
|
||||
ScaleARGBCols = ScaleARGBCols_Any_MMI;
|
||||
if (IS_ALIGNED(dst_width, 1)) {
|
||||
ScaleARGBCols = ScaleARGBCols_MMI;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
if (src_width * 2 == dst_width && x < 0x8000) {
|
||||
ScaleARGBCols = ScaleARGBColsUp2_C;
|
||||
@@ -663,12 +871,17 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
||||
if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) {
|
||||
ScaleARGBCols = ScaleARGBColsUp2_SSE2;
|
||||
}
|
||||
#endif
|
||||
#if defined(HAS_SCALEARGBCOLSUP2_MMI)
|
||||
if (TestCpuFlag(kCpuHasMMI) && IS_ALIGNED(dst_width, 4)) {
|
||||
ScaleARGBCols = ScaleARGBColsUp2_MMI;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
for (j = 0; j < dst_height; ++j) {
|
||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride,
|
||||
dst_width, x, dx);
|
||||
ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x,
|
||||
dx);
|
||||
dst_argb += dst_stride;
|
||||
y += dy;
|
||||
}
|
||||
@@ -677,11 +890,18 @@ static void ScaleARGBSimple(int src_width, int src_height,
|
||||
// ScaleARGB a ARGB.
|
||||
// This function in turn calls a scaling function
|
||||
// suitable for handling the desired resolutions.
|
||||
static void ScaleARGB(const uint8* src, int src_stride,
|
||||
int src_width, int src_height,
|
||||
uint8* dst, int dst_stride,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
static void ScaleARGB(const uint8_t* src,
|
||||
int src_stride,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst,
|
||||
int dst_stride,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
// Initial source x/y coordinate and step values as 16.16 fixed point.
|
||||
int x = 0;
|
||||
@@ -690,8 +910,7 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
||||
int dy = 0;
|
||||
// ARGB does not support box filter yet, but allow the user to pass it.
|
||||
// Simplify filtering when possible.
|
||||
filtering = ScaleFilterReduce(src_width, src_height,
|
||||
dst_width, dst_height,
|
||||
filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height,
|
||||
filtering);
|
||||
|
||||
// Negative src_height means invert the image.
|
||||
@@ -700,17 +919,17 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
||||
src = src + (src_height - 1) * src_stride;
|
||||
src_stride = -src_stride;
|
||||
}
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering,
|
||||
&x, &y, &dx, &dy);
|
||||
ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y,
|
||||
&dx, &dy);
|
||||
src_width = Abs(src_width);
|
||||
if (clip_x) {
|
||||
int64 clipf = (int64)(clip_x) * dx;
|
||||
int64_t clipf = (int64_t)(clip_x)*dx;
|
||||
x += (clipf & 0xffff);
|
||||
src += (clipf >> 16) * 4;
|
||||
dst += clip_x * 4;
|
||||
}
|
||||
if (clip_y) {
|
||||
int64 clipf = (int64)(clip_y) * dy;
|
||||
int64_t clipf = (int64_t)(clip_y)*dy;
|
||||
y += (clipf & 0xffff);
|
||||
src += (clipf >> 16) * src_stride;
|
||||
dst += clip_y * dst_stride;
|
||||
@@ -725,24 +944,20 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
||||
if (!(dx & 0x10000) && !(dy & 0x10000)) {
|
||||
if (dx == 0x20000) {
|
||||
// Optimized 1/2 downsample.
|
||||
ScaleARGBDown2(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
ScaleARGBDown2(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||
filtering);
|
||||
return;
|
||||
}
|
||||
if (dx == 0x40000 && filtering == kFilterBox) {
|
||||
// Optimized 1/4 box downsample.
|
||||
ScaleARGBDown4Box(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy);
|
||||
ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy);
|
||||
return;
|
||||
}
|
||||
ScaleARGBDownEven(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
ScaleARGBDownEven(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||
filtering);
|
||||
return;
|
||||
}
|
||||
// Optimized odd scale down. ie 3, 5, 7, 9x.
|
||||
@@ -759,96 +974,105 @@ static void ScaleARGB(const uint8* src, int src_stride,
|
||||
}
|
||||
if (dx == 0x10000 && (x & 0xffff) == 0) {
|
||||
// Arbitrary scale vertically, but unscaled vertically.
|
||||
ScalePlaneVertical(src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, y, dy, 4, filtering);
|
||||
ScalePlaneVertical(src_height, clip_width, clip_height, src_stride,
|
||||
dst_stride, src, dst, x, y, dy, 4, filtering);
|
||||
return;
|
||||
}
|
||||
if (filtering && dy < 65536) {
|
||||
ScaleARGBBilinearUp(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||
filtering);
|
||||
return;
|
||||
}
|
||||
if (filtering) {
|
||||
ScaleARGBBilinearDown(src_width, src_height,
|
||||
clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy, filtering);
|
||||
ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst, x, dx, y, dy,
|
||||
filtering);
|
||||
return;
|
||||
}
|
||||
ScaleARGBSimple(src_width, src_height, clip_width, clip_height,
|
||||
src_stride, dst_stride, src, dst,
|
||||
x, dx, y, dy);
|
||||
ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride,
|
||||
dst_stride, src, dst, x, dx, y, dy);
|
||||
}
|
||||
|
||||
LIBYUV_API
|
||||
int ARGBScaleClip(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
int ARGBScaleClip(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_argb || src_width == 0 || src_height == 0 ||
|
||||
!dst_argb || dst_width <= 0 || dst_height <= 0 ||
|
||||
clip_x < 0 || clip_y < 0 ||
|
||||
if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb ||
|
||||
dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 ||
|
||||
clip_width > 32768 || clip_height > 32768 ||
|
||||
(clip_x + clip_width) > dst_width ||
|
||||
(clip_y + clip_height) > dst_height) {
|
||||
return -1;
|
||||
}
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
|
||||
dst_argb, dst_stride_argb, dst_width, dst_height,
|
||||
clip_x, clip_y, clip_width, clip_height, filtering);
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
|
||||
dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width,
|
||||
clip_height, filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Scale an ARGB image.
|
||||
LIBYUV_API
|
||||
int ARGBScale(const uint8* src_argb, int src_stride_argb,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
int dst_width, int dst_height,
|
||||
int ARGBScale(const uint8_t* src_argb,
|
||||
int src_stride_argb,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
enum FilterMode filtering) {
|
||||
if (!src_argb || src_width == 0 || src_height == 0 ||
|
||||
src_width > 32768 || src_height > 32768 ||
|
||||
!dst_argb || dst_width <= 0 || dst_height <= 0) {
|
||||
if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 ||
|
||||
src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) {
|
||||
return -1;
|
||||
}
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height,
|
||||
dst_argb, dst_stride_argb, dst_width, dst_height,
|
||||
0, 0, dst_width, dst_height, filtering);
|
||||
ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb,
|
||||
dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height,
|
||||
filtering);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Scale with YUV conversion to ARGB and clipping.
|
||||
LIBYUV_API
|
||||
int YUVToARGBScaleClip(const uint8* src_y, int src_stride_y,
|
||||
const uint8* src_u, int src_stride_u,
|
||||
const uint8* src_v, int src_stride_v,
|
||||
uint32 src_fourcc,
|
||||
int src_width, int src_height,
|
||||
uint8* dst_argb, int dst_stride_argb,
|
||||
uint32 dst_fourcc,
|
||||
int dst_width, int dst_height,
|
||||
int clip_x, int clip_y, int clip_width, int clip_height,
|
||||
int YUVToARGBScaleClip(const uint8_t* src_y,
|
||||
int src_stride_y,
|
||||
const uint8_t* src_u,
|
||||
int src_stride_u,
|
||||
const uint8_t* src_v,
|
||||
int src_stride_v,
|
||||
uint32_t src_fourcc,
|
||||
int src_width,
|
||||
int src_height,
|
||||
uint8_t* dst_argb,
|
||||
int dst_stride_argb,
|
||||
uint32_t dst_fourcc,
|
||||
int dst_width,
|
||||
int dst_height,
|
||||
int clip_x,
|
||||
int clip_y,
|
||||
int clip_width,
|
||||
int clip_height,
|
||||
enum FilterMode filtering) {
|
||||
uint8* argb_buffer = (uint8*)malloc(src_width * src_height * 4);
|
||||
uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4);
|
||||
int r;
|
||||
I420ToARGB(src_y, src_stride_y,
|
||||
src_u, src_stride_u,
|
||||
src_v, src_stride_v,
|
||||
argb_buffer, src_width * 4,
|
||||
src_width, src_height);
|
||||
(void)src_fourcc; // TODO(fbarchard): implement and/or assert.
|
||||
(void)dst_fourcc;
|
||||
I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
|
||||
argb_buffer, src_width * 4, src_width, src_height);
|
||||
|
||||
r = ARGBScaleClip(argb_buffer, src_width * 4,
|
||||
src_width, src_height,
|
||||
dst_argb, dst_stride_argb,
|
||||
dst_width, dst_height,
|
||||
clip_x, clip_y, clip_width, clip_height,
|
||||
filtering);
|
||||
r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb,
|
||||
dst_stride_argb, dst_width, dst_height, clip_x, clip_y,
|
||||
clip_width, clip_height, filtering);
|
||||
free(argb_buffer);
|
||||
return r;
|
||||
}
|
||||
|
||||
+495
-321
File diff suppressed because it is too large
Load Diff
+1166
-1114
File diff suppressed because it is too large
Load Diff
@@ -1,644 +0,0 @@
|
||||
/*
|
||||
* Copyright 2012 The LibYuv Project Authors. All rights reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "libyuv/basic_types.h"
|
||||
#include "libyuv/row.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
namespace libyuv {
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// This module is for GCC MIPS DSPR2
|
||||
#if !defined(LIBYUV_DISABLE_MIPS) && \
|
||||
defined(__mips_dsp) && (__mips_dsp_rev >= 2) && \
|
||||
(_MIPS_SIM == _MIPS_SIM_ABI32)
|
||||
|
||||
void ScaleRowDown2_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__(
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"srl $t9, %[dst_width], 4 \n" // iterations -> by 16
|
||||
"beqz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
|
||||
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
|
||||
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||
// TODO(fbarchard): Use odd pixels instead of even.
|
||||
"precr.qb.ph $t8, $t1, $t0 \n" // |6|4|2|0|
|
||||
"precr.qb.ph $t0, $t3, $t2 \n" // |14|12|10|8|
|
||||
"precr.qb.ph $t1, $t5, $t4 \n" // |22|20|18|16|
|
||||
"precr.qb.ph $t2, $t7, $t6 \n" // |30|28|26|24|
|
||||
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sw $t8, 0(%[dst]) \n"
|
||||
"sw $t0, 4(%[dst]) \n"
|
||||
"sw $t1, 8(%[dst]) \n"
|
||||
"sw $t2, 12(%[dst]) \n"
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 16 \n"
|
||||
|
||||
"2: \n"
|
||||
"andi $t9, %[dst_width], 0xf \n" // residue
|
||||
"beqz $t9, 3f \n"
|
||||
" nop \n"
|
||||
|
||||
"21: \n"
|
||||
"lbu $t0, 0(%[src_ptr]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 2 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
"bgtz $t9, 21b \n"
|
||||
" addiu %[dst], %[dst], 1 \n"
|
||||
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst)
|
||||
: [dst_width] "r" (dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown2Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
const uint8* t = src_ptr + src_stride;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"srl $t9, %[dst_width], 3 \n" // iterations -> step 8
|
||||
"bltz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t4, 0(%[t]) \n" // |19|18|17|16|
|
||||
"lw $t5, 4(%[t]) \n" // |23|22|21|20|
|
||||
"lw $t6, 8(%[t]) \n" // |27|26|25|24|
|
||||
"lw $t7, 12(%[t]) \n" // |31|30|29|28|
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"srl $t8, $t0, 16 \n" // |X|X|3|2|
|
||||
"ins $t0, $t4, 16, 16 \n" // |17|16|1|0|
|
||||
"ins $t4, $t8, 0, 16 \n" // |19|18|3|2|
|
||||
"raddu.w.qb $t0, $t0 \n" // |17+16+1+0|
|
||||
"raddu.w.qb $t4, $t4 \n" // |19+18+3+2|
|
||||
"shra_r.w $t0, $t0, 2 \n" // |t0+2|>>2
|
||||
"shra_r.w $t4, $t4, 2 \n" // |t4+2|>>2
|
||||
"srl $t8, $t1, 16 \n" // |X|X|7|6|
|
||||
"ins $t1, $t5, 16, 16 \n" // |21|20|5|4|
|
||||
"ins $t5, $t8, 0, 16 \n" // |22|23|7|6|
|
||||
"raddu.w.qb $t1, $t1 \n" // |21+20+5+4|
|
||||
"raddu.w.qb $t5, $t5 \n" // |23+22+7+6|
|
||||
"shra_r.w $t1, $t1, 2 \n" // |t1+2|>>2
|
||||
"shra_r.w $t5, $t5, 2 \n" // |t5+2|>>2
|
||||
"srl $t8, $t2, 16 \n" // |X|X|11|10|
|
||||
"ins $t2, $t6, 16, 16 \n" // |25|24|9|8|
|
||||
"ins $t6, $t8, 0, 16 \n" // |27|26|11|10|
|
||||
"raddu.w.qb $t2, $t2 \n" // |25+24+9+8|
|
||||
"raddu.w.qb $t6, $t6 \n" // |27+26+11+10|
|
||||
"shra_r.w $t2, $t2, 2 \n" // |t2+2|>>2
|
||||
"shra_r.w $t6, $t6, 2 \n" // |t5+2|>>2
|
||||
"srl $t8, $t3, 16 \n" // |X|X|15|14|
|
||||
"ins $t3, $t7, 16, 16 \n" // |29|28|13|12|
|
||||
"ins $t7, $t8, 0, 16 \n" // |31|30|15|14|
|
||||
"raddu.w.qb $t3, $t3 \n" // |29+28+13+12|
|
||||
"raddu.w.qb $t7, $t7 \n" // |31+30+15+14|
|
||||
"shra_r.w $t3, $t3, 2 \n" // |t3+2|>>2
|
||||
"shra_r.w $t7, $t7, 2 \n" // |t7+2|>>2
|
||||
"addiu %[src_ptr], %[src_ptr], 16 \n"
|
||||
"addiu %[t], %[t], 16 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
"sb $t4, 1(%[dst]) \n"
|
||||
"sb $t1, 2(%[dst]) \n"
|
||||
"sb $t5, 3(%[dst]) \n"
|
||||
"sb $t2, 4(%[dst]) \n"
|
||||
"sb $t6, 5(%[dst]) \n"
|
||||
"sb $t3, 6(%[dst]) \n"
|
||||
"sb $t7, 7(%[dst]) \n"
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 8 \n"
|
||||
|
||||
"2: \n"
|
||||
"andi $t9, %[dst_width], 0x7 \n" // x = residue
|
||||
"beqz $t9, 3f \n"
|
||||
" nop \n"
|
||||
|
||||
"21: \n"
|
||||
"lwr $t1, 0(%[src_ptr]) \n"
|
||||
"lwl $t1, 3(%[src_ptr]) \n"
|
||||
"lwr $t2, 0(%[t]) \n"
|
||||
"lwl $t2, 3(%[t]) \n"
|
||||
"srl $t8, $t1, 16 \n"
|
||||
"ins $t1, $t2, 16, 16 \n"
|
||||
"ins $t2, $t8, 0, 16 \n"
|
||||
"raddu.w.qb $t1, $t1 \n"
|
||||
"raddu.w.qb $t2, $t2 \n"
|
||||
"shra_r.w $t1, $t1, 2 \n"
|
||||
"shra_r.w $t2, $t2, 2 \n"
|
||||
"sb $t1, 0(%[dst]) \n"
|
||||
"sb $t2, 1(%[dst]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
||||
"addiu $t9, $t9, -2 \n"
|
||||
"addiu %[t], %[t], 4 \n"
|
||||
"bgtz $t9, 21b \n"
|
||||
" addiu %[dst], %[dst], 2 \n"
|
||||
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst), [t] "+r" (t)
|
||||
: [dst_width] "r" (dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"srl $t9, %[dst_width], 3 \n"
|
||||
"beqz $t9, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
|
||||
"lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
|
||||
"lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||
"lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||
"precr.qb.ph $t1, $t2, $t1 \n" // |6|4|2|0|
|
||||
"precr.qb.ph $t2, $t4, $t3 \n" // |14|12|10|8|
|
||||
"precr.qb.ph $t5, $t6, $t5 \n" // |22|20|18|16|
|
||||
"precr.qb.ph $t6, $t8, $t7 \n" // |30|28|26|24|
|
||||
"precr.qb.ph $t1, $t2, $t1 \n" // |12|8|4|0|
|
||||
"precr.qb.ph $t5, $t6, $t5 \n" // |28|24|20|16|
|
||||
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sw $t1, 0(%[dst]) \n"
|
||||
"sw $t5, 4(%[dst]) \n"
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 8 \n"
|
||||
|
||||
"2: \n"
|
||||
"andi $t9, %[dst_width], 7 \n" // residue
|
||||
"beqz $t9, 3f \n"
|
||||
" nop \n"
|
||||
|
||||
"21: \n"
|
||||
"lbu $t1, 0(%[src_ptr]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"sb $t1, 0(%[dst]) \n"
|
||||
"bgtz $t9, 21b \n"
|
||||
" addiu %[dst], %[dst], 1 \n"
|
||||
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst)
|
||||
: [dst_width] "r" (dst_width)
|
||||
: "t1", "t2", "t3", "t4", "t5",
|
||||
"t6", "t7", "t8", "t9"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown4Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* s1 = src_ptr + stride;
|
||||
const uint8* s2 = s1 + stride;
|
||||
const uint8* s3 = s2 + stride;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"srl $t9, %[dst_width], 1 \n"
|
||||
"andi $t8, %[dst_width], 1 \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
|
||||
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
|
||||
"lw $t3, 0(%[s3]) \n" // |15|14|13|12|
|
||||
"lw $t4, 4(%[src_ptr]) \n" // |19|18|17|16|
|
||||
"lw $t5, 4(%[s1]) \n" // |23|22|21|20|
|
||||
"lw $t6, 4(%[s2]) \n" // |27|26|25|24|
|
||||
"lw $t7, 4(%[s3]) \n" // |31|30|29|28|
|
||||
"raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
|
||||
"raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
|
||||
"raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
|
||||
"raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
|
||||
"raddu.w.qb $t4, $t4 \n" // |19 + 18 + 17 + 16|
|
||||
"raddu.w.qb $t5, $t5 \n" // |23 + 22 + 21 + 20|
|
||||
"raddu.w.qb $t6, $t6 \n" // |27 + 26 + 25 + 24|
|
||||
"raddu.w.qb $t7, $t7 \n" // |31 + 30 + 29 + 28|
|
||||
"add $t0, $t0, $t1 \n"
|
||||
"add $t1, $t2, $t3 \n"
|
||||
"add $t0, $t0, $t1 \n"
|
||||
"add $t4, $t4, $t5 \n"
|
||||
"add $t6, $t6, $t7 \n"
|
||||
"add $t4, $t4, $t6 \n"
|
||||
"shra_r.w $t0, $t0, 4 \n"
|
||||
"shra_r.w $t4, $t4, 4 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
"sb $t4, 1(%[dst]) \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 8 \n"
|
||||
"addiu %[s1], %[s1], 8 \n"
|
||||
"addiu %[s2], %[s2], 8 \n"
|
||||
"addiu %[s3], %[s3], 8 \n"
|
||||
"addiu $t9, $t9, -1 \n"
|
||||
"bgtz $t9, 1b \n"
|
||||
" addiu %[dst], %[dst], 2 \n"
|
||||
"beqz $t8, 2f \n"
|
||||
" nop \n"
|
||||
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 0(%[s1]) \n" // |7|6|5|4|
|
||||
"lw $t2, 0(%[s2]) \n" // |11|10|9|8|
|
||||
"lw $t3, 0(%[s3]) \n" // |15|14|13|12|
|
||||
"raddu.w.qb $t0, $t0 \n" // |3 + 2 + 1 + 0|
|
||||
"raddu.w.qb $t1, $t1 \n" // |7 + 6 + 5 + 4|
|
||||
"raddu.w.qb $t2, $t2 \n" // |11 + 10 + 9 + 8|
|
||||
"raddu.w.qb $t3, $t3 \n" // |15 + 14 + 13 + 12|
|
||||
"add $t0, $t0, $t1 \n"
|
||||
"add $t1, $t2, $t3 \n"
|
||||
"add $t0, $t0, $t1 \n"
|
||||
"shra_r.w $t0, $t0, 4 \n"
|
||||
"sb $t0, 0(%[dst]) \n"
|
||||
|
||||
"2: \n"
|
||||
".set pop \n"
|
||||
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst),
|
||||
[s1] "+r" (s1),
|
||||
[s2] "+r" (s2),
|
||||
[s3] "+r" (s3)
|
||||
: [dst_width] "r" (dst_width)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6","t7", "t8", "t9"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown34_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"1: \n"
|
||||
"lw $t1, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t2, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t3, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t4, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t5, 16(%[src_ptr]) \n" // |19|18|17|16|
|
||||
"lw $t6, 20(%[src_ptr]) \n" // |23|22|21|20|
|
||||
"lw $t7, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||
"lw $t8, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||
"precrq.qb.ph $t0, $t2, $t4 \n" // |7|5|15|13|
|
||||
"precrq.qb.ph $t9, $t6, $t8 \n" // |23|21|31|30|
|
||||
"addiu %[dst_width], %[dst_width], -24 \n"
|
||||
"ins $t1, $t1, 8, 16 \n" // |3|1|0|X|
|
||||
"ins $t4, $t0, 8, 16 \n" // |X|15|13|12|
|
||||
"ins $t5, $t5, 8, 16 \n" // |19|17|16|X|
|
||||
"ins $t8, $t9, 8, 16 \n" // |X|31|29|28|
|
||||
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
||||
"packrl.ph $t0, $t3, $t0 \n" // |9|8|7|5|
|
||||
"packrl.ph $t9, $t7, $t9 \n" // |25|24|23|21|
|
||||
"prepend $t1, $t2, 8 \n" // |4|3|1|0|
|
||||
"prepend $t3, $t4, 24 \n" // |15|13|12|11|
|
||||
"prepend $t5, $t6, 8 \n" // |20|19|17|16|
|
||||
"prepend $t7, $t8, 24 \n" // |31|29|28|27|
|
||||
"sw $t1, 0(%[dst]) \n"
|
||||
"sw $t0, 4(%[dst]) \n"
|
||||
"sw $t3, 8(%[dst]) \n"
|
||||
"sw $t5, 12(%[dst]) \n"
|
||||
"sw $t9, 16(%[dst]) \n"
|
||||
"sw $t7, 20(%[dst]) \n"
|
||||
"bnez %[dst_width], 1b \n"
|
||||
" addiu %[dst], %[dst], 24 \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst),
|
||||
[dst_width] "+r" (dst_width)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5",
|
||||
"t6","t7", "t8", "t9"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown34_0_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"repl.ph $t3, 3 \n" // 0x00030003
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
|
||||
"rotr $t2, $t0, 8 \n" // |S0|S3|S2|S1|
|
||||
"rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
|
||||
"muleu_s.ph.qbl $t4, $t2, $t3 \n" // |S0*3|S3*3|
|
||||
"muleu_s.ph.qbl $t5, $t6, $t3 \n" // |T0*3|T3*3|
|
||||
"andi $t0, $t2, 0xFFFF \n" // |0|0|S2|S1|
|
||||
"andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
|
||||
"raddu.w.qb $t0, $t0 \n"
|
||||
"raddu.w.qb $t1, $t1 \n"
|
||||
"shra_r.w $t0, $t0, 1 \n"
|
||||
"shra_r.w $t1, $t1, 1 \n"
|
||||
"preceu.ph.qbr $t2, $t2 \n" // |0|S2|0|S1|
|
||||
"preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
|
||||
"rotr $t2, $t2, 16 \n" // |0|S1|0|S2|
|
||||
"rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
|
||||
"addu.ph $t2, $t2, $t4 \n"
|
||||
"addu.ph $t6, $t6, $t5 \n"
|
||||
"sll $t5, $t0, 1 \n"
|
||||
"add $t0, $t5, $t0 \n"
|
||||
"shra_r.ph $t2, $t2, 2 \n"
|
||||
"shra_r.ph $t6, $t6, 2 \n"
|
||||
"shll.ph $t4, $t2, 1 \n"
|
||||
"addq.ph $t4, $t4, $t2 \n"
|
||||
"addu $t0, $t0, $t1 \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
||||
"shra_r.w $t0, $t0, 2 \n"
|
||||
"addu.ph $t6, $t6, $t4 \n"
|
||||
"shra_r.ph $t6, $t6, 2 \n"
|
||||
"srl $t1, $t6, 16 \n"
|
||||
"addiu %[dst_width], %[dst_width], -3 \n"
|
||||
"sb $t1, 0(%[d]) \n"
|
||||
"sb $t0, 1(%[d]) \n"
|
||||
"sb $t6, 2(%[d]) \n"
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" addiu %[d], %[d], 3 \n"
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[src_stride] "+r" (src_stride),
|
||||
[d] "+r" (d),
|
||||
[dst_width] "+r" (dst_width)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3",
|
||||
"t4", "t5", "t6"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown34_1_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* d, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
"repl.ph $t2, 3 \n" // 0x00030003
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lwx $t1, %[src_stride](%[src_ptr]) \n" // |T3|T2|T1|T0|
|
||||
"rotr $t4, $t0, 8 \n" // |S0|S3|S2|S1|
|
||||
"rotr $t6, $t1, 8 \n" // |T0|T3|T2|T1|
|
||||
"muleu_s.ph.qbl $t3, $t4, $t2 \n" // |S0*3|S3*3|
|
||||
"muleu_s.ph.qbl $t5, $t6, $t2 \n" // |T0*3|T3*3|
|
||||
"andi $t0, $t4, 0xFFFF \n" // |0|0|S2|S1|
|
||||
"andi $t1, $t6, 0xFFFF \n" // |0|0|T2|T1|
|
||||
"raddu.w.qb $t0, $t0 \n"
|
||||
"raddu.w.qb $t1, $t1 \n"
|
||||
"shra_r.w $t0, $t0, 1 \n"
|
||||
"shra_r.w $t1, $t1, 1 \n"
|
||||
"preceu.ph.qbr $t4, $t4 \n" // |0|S2|0|S1|
|
||||
"preceu.ph.qbr $t6, $t6 \n" // |0|T2|0|T1|
|
||||
"rotr $t4, $t4, 16 \n" // |0|S1|0|S2|
|
||||
"rotr $t6, $t6, 16 \n" // |0|T1|0|T2|
|
||||
"addu.ph $t4, $t4, $t3 \n"
|
||||
"addu.ph $t6, $t6, $t5 \n"
|
||||
"shra_r.ph $t6, $t6, 2 \n"
|
||||
"shra_r.ph $t4, $t4, 2 \n"
|
||||
"addu.ph $t6, $t6, $t4 \n"
|
||||
"addiu %[src_ptr], %[src_ptr], 4 \n"
|
||||
"shra_r.ph $t6, $t6, 1 \n"
|
||||
"addu $t0, $t0, $t1 \n"
|
||||
"addiu %[dst_width], %[dst_width], -3 \n"
|
||||
"shra_r.w $t0, $t0, 1 \n"
|
||||
"srl $t1, $t6, 16 \n"
|
||||
"sb $t1, 0(%[d]) \n"
|
||||
"sb $t0, 1(%[d]) \n"
|
||||
"sb $t6, 2(%[d]) \n"
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" addiu %[d], %[d], 3 \n"
|
||||
"3: \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[src_stride] "+r" (src_stride),
|
||||
[d] "+r" (d),
|
||||
[dst_width] "+r" (dst_width)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3",
|
||||
"t4", "t5", "t6"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown38_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst, int dst_width) {
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |3|2|1|0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |7|6|5|4|
|
||||
"lw $t2, 8(%[src_ptr]) \n" // |11|10|9|8|
|
||||
"lw $t3, 12(%[src_ptr]) \n" // |15|14|13|12|
|
||||
"lw $t4, 16(%[src_ptr]) \n" // |19|18|17|16|
|
||||
"lw $t5, 20(%[src_ptr]) \n" // |23|22|21|20|
|
||||
"lw $t6, 24(%[src_ptr]) \n" // |27|26|25|24|
|
||||
"lw $t7, 28(%[src_ptr]) \n" // |31|30|29|28|
|
||||
"wsbh $t0, $t0 \n" // |2|3|0|1|
|
||||
"wsbh $t6, $t6 \n" // |26|27|24|25|
|
||||
"srl $t0, $t0, 8 \n" // |X|2|3|0|
|
||||
"srl $t3, $t3, 16 \n" // |X|X|15|14|
|
||||
"srl $t5, $t5, 16 \n" // |X|X|23|22|
|
||||
"srl $t7, $t7, 16 \n" // |X|X|31|30|
|
||||
"ins $t1, $t2, 24, 8 \n" // |8|6|5|4|
|
||||
"ins $t6, $t5, 0, 8 \n" // |26|27|24|22|
|
||||
"ins $t1, $t0, 0, 16 \n" // |8|6|3|0|
|
||||
"ins $t6, $t7, 24, 8 \n" // |30|27|24|22|
|
||||
"prepend $t2, $t3, 24 \n" // |X|15|14|11|
|
||||
"ins $t4, $t4, 16, 8 \n" // |19|16|17|X|
|
||||
"ins $t4, $t2, 0, 16 \n" // |19|16|14|11|
|
||||
"addiu %[src_ptr], %[src_ptr], 32 \n"
|
||||
"addiu %[dst_width], %[dst_width], -12 \n"
|
||||
"addiu $t8,%[dst_width], -12 \n"
|
||||
"sw $t1, 0(%[dst]) \n"
|
||||
"sw $t4, 4(%[dst]) \n"
|
||||
"sw $t6, 8(%[dst]) \n"
|
||||
"bgez $t8, 1b \n"
|
||||
" addiu %[dst], %[dst], 12 \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst] "+r" (dst),
|
||||
[dst_width] "+r" (dst_width)
|
||||
:
|
||||
: "t0", "t1", "t2", "t3", "t4",
|
||||
"t5", "t6", "t7", "t8"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown38_2_Box_DSPR2(const uint8* src_ptr, ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* t = src_ptr + stride;
|
||||
const int c = 0x2AAA;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
|
||||
"lw $t2, 0(%[t]) \n" // |T3|T2|T1|T0|
|
||||
"lw $t3, 4(%[t]) \n" // |T7|T6|T5|T4|
|
||||
"rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
|
||||
"packrl.ph $t4, $t1, $t3 \n" // |S7|S6|T7|T6|
|
||||
"packrl.ph $t5, $t3, $t1 \n" // |T5|T4|S5|S4|
|
||||
"raddu.w.qb $t4, $t4 \n" // S7+S6+T7+T6
|
||||
"raddu.w.qb $t5, $t5 \n" // T5+T4+S5+S4
|
||||
"precrq.qb.ph $t6, $t0, $t2 \n" // |S3|S1|T3|T1|
|
||||
"precrq.qb.ph $t6, $t6, $t6 \n" // |S3|T3|S3|T3|
|
||||
"srl $t4, $t4, 2 \n" // t4 / 4
|
||||
"srl $t6, $t6, 16 \n" // |0|0|S3|T3|
|
||||
"raddu.w.qb $t6, $t6 \n" // 0+0+S3+T3
|
||||
"addu $t6, $t5, $t6 \n"
|
||||
"mul $t6, $t6, %[c] \n" // t6 * 0x2AAA
|
||||
"sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
|
||||
"sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
|
||||
"raddu.w.qb $t0, $t0 \n" // S2+S1+S0+0
|
||||
"raddu.w.qb $t2, $t2 \n" // T2+T1+T0+0
|
||||
"addu $t0, $t0, $t2 \n"
|
||||
"mul $t0, $t0, %[c] \n" // t0 * 0x2AAA
|
||||
"addiu %[src_ptr], %[src_ptr], 8 \n"
|
||||
"addiu %[t], %[t], 8 \n"
|
||||
"addiu %[dst_width], %[dst_width], -3 \n"
|
||||
"addiu %[dst_ptr], %[dst_ptr], 3 \n"
|
||||
"srl $t6, $t6, 16 \n"
|
||||
"srl $t0, $t0, 16 \n"
|
||||
"sb $t4, -1(%[dst_ptr]) \n"
|
||||
"sb $t6, -2(%[dst_ptr]) \n"
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" sb $t0, -3(%[dst_ptr]) \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst_ptr] "+r" (dst_ptr),
|
||||
[t] "+r" (t),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [c] "r" (c)
|
||||
: "t0", "t1", "t2", "t3", "t4", "t5", "t6"
|
||||
);
|
||||
}
|
||||
|
||||
void ScaleRowDown38_3_Box_DSPR2(const uint8* src_ptr,
|
||||
ptrdiff_t src_stride,
|
||||
uint8* dst_ptr, int dst_width) {
|
||||
intptr_t stride = src_stride;
|
||||
const uint8* s1 = src_ptr + stride;
|
||||
stride += stride;
|
||||
const uint8* s2 = src_ptr + stride;
|
||||
const int c1 = 0x1C71;
|
||||
const int c2 = 0x2AAA;
|
||||
|
||||
__asm__ __volatile__ (
|
||||
".set push \n"
|
||||
".set noreorder \n"
|
||||
|
||||
"1: \n"
|
||||
"lw $t0, 0(%[src_ptr]) \n" // |S3|S2|S1|S0|
|
||||
"lw $t1, 4(%[src_ptr]) \n" // |S7|S6|S5|S4|
|
||||
"lw $t2, 0(%[s1]) \n" // |T3|T2|T1|T0|
|
||||
"lw $t3, 4(%[s1]) \n" // |T7|T6|T5|T4|
|
||||
"lw $t4, 0(%[s2]) \n" // |R3|R2|R1|R0|
|
||||
"lw $t5, 4(%[s2]) \n" // |R7|R6|R5|R4|
|
||||
"rotr $t1, $t1, 16 \n" // |S5|S4|S7|S6|
|
||||
"packrl.ph $t6, $t1, $t3 \n" // |S7|S6|T7|T6|
|
||||
"raddu.w.qb $t6, $t6 \n" // S7+S6+T7+T6
|
||||
"packrl.ph $t7, $t3, $t1 \n" // |T5|T4|S5|S4|
|
||||
"raddu.w.qb $t7, $t7 \n" // T5+T4+S5+S4
|
||||
"sll $t8, $t5, 16 \n" // |R5|R4|0|0|
|
||||
"raddu.w.qb $t8, $t8 \n" // R5+R4
|
||||
"addu $t7, $t7, $t8 \n"
|
||||
"srl $t8, $t5, 16 \n" // |0|0|R7|R6|
|
||||
"raddu.w.qb $t8, $t8 \n" // R7 + R6
|
||||
"addu $t6, $t6, $t8 \n"
|
||||
"mul $t6, $t6, %[c2] \n" // t6 * 0x2AAA
|
||||
"precrq.qb.ph $t8, $t0, $t2 \n" // |S3|S1|T3|T1|
|
||||
"precrq.qb.ph $t8, $t8, $t4 \n" // |S3|T3|R3|R1|
|
||||
"srl $t8, $t8, 8 \n" // |0|S3|T3|R3|
|
||||
"raddu.w.qb $t8, $t8 \n" // S3 + T3 + R3
|
||||
"addu $t7, $t7, $t8 \n"
|
||||
"mul $t7, $t7, %[c1] \n" // t7 * 0x1C71
|
||||
"sll $t0, $t0, 8 \n" // |S2|S1|S0|0|
|
||||
"sll $t2, $t2, 8 \n" // |T2|T1|T0|0|
|
||||
"sll $t4, $t4, 8 \n" // |R2|R1|R0|0|
|
||||
"raddu.w.qb $t0, $t0 \n"
|
||||
"raddu.w.qb $t2, $t2 \n"
|
||||
"raddu.w.qb $t4, $t4 \n"
|
||||
"addu $t0, $t0, $t2 \n"
|
||||
"addu $t0, $t0, $t4 \n"
|
||||
"mul $t0, $t0, %[c1] \n" // t0 * 0x1C71
|
||||
"addiu %[src_ptr], %[src_ptr], 8 \n"
|
||||
"addiu %[s1], %[s1], 8 \n"
|
||||
"addiu %[s2], %[s2], 8 \n"
|
||||
"addiu %[dst_width], %[dst_width], -3 \n"
|
||||
"addiu %[dst_ptr], %[dst_ptr], 3 \n"
|
||||
"srl $t6, $t6, 16 \n"
|
||||
"srl $t7, $t7, 16 \n"
|
||||
"srl $t0, $t0, 16 \n"
|
||||
"sb $t6, -1(%[dst_ptr]) \n"
|
||||
"sb $t7, -2(%[dst_ptr]) \n"
|
||||
"bgtz %[dst_width], 1b \n"
|
||||
" sb $t0, -3(%[dst_ptr]) \n"
|
||||
".set pop \n"
|
||||
: [src_ptr] "+r" (src_ptr),
|
||||
[dst_ptr] "+r" (dst_ptr),
|
||||
[s1] "+r" (s1),
|
||||
[s2] "+r" (s2),
|
||||
[dst_width] "+r" (dst_width)
|
||||
: [c1] "r" (c1), [c2] "r" (c2)
|
||||
: "t0", "t1", "t2", "t3", "t4",
|
||||
"t5", "t6", "t7", "t8"
|
||||
);
|
||||
}
|
||||
|
||||
#endif // defined(__mips_dsp) && (__mips_dsp_rev >= 2)
|
||||
|
||||
#ifdef __cplusplus
|
||||
} // extern "C"
|
||||
} // namespace libyuv
|
||||
#endif
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user