diff --git a/WORKSPACE b/WORKSPACE index 4d6ed65..987d26c 100755 --- a/WORKSPACE +++ b/WORKSPACE @@ -18,8 +18,19 @@ new_http_archive( new_http_archive( name = "jpeg_archive", - url = "http://www.ijg.org/files/jpegsrc.v9b.tar.gz", - sha256 = "240fd398da741669bf3c90366f58452ea59041cacc741a489b99f2f6a0bad052", - strip_prefix = "jpeg-9b", + url = "https://github.com/libjpeg-turbo/libjpeg-turbo/archive/1.5.1.zip", + sha256 = "0444be83e5bf455eafcab3ff0521fd9ef3b3ff1cbccf273a7a5d4ff2eb4e241d", + strip_prefix = "libjpeg-turbo-1.5.1", build_file = "jpeg.BUILD", ) + +new_http_archive( + name = "nasm", + urls = [ + "http://bazel-mirror.storage.googleapis.com/www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2", + "http://www.nasm.us/pub/nasm/releasebuilds/2.12.02/nasm-2.12.02.tar.bz2", + ], + sha256 = "00b0891c678c065446ca59bcee64719d0096d54d6886e6e472aeee2e170ae324", + strip_prefix = "nasm-2.12.02", + build_file = "nasm.BUILD", + ) diff --git a/butteraugli/Makefile b/butteraugli/Makefile index 76b3a9b..c9a45fe 100755 --- a/butteraugli/Makefile +++ b/butteraugli/Makefile @@ -1,6 +1,7 @@ LDLIBS += -lpng -ljpeg CXXFLAGS += -std=c++11 -I.. LINK.o = $(LINK.cc) +CFLAGS += $(echo 'void* my_alloc1() __attribute__((assume_aligned(16)));' | gcc -x c - -c -o /dev/null -Werror && echo -DHAS_ASSUME_ALIGNED) all: butteraugli.o butteraugli_main.o butteraugli diff --git a/butteraugli/butteraugli.h b/butteraugli/butteraugli.h index 31824b8..844fd8b 100755 --- a/butteraugli/butteraugli.h +++ b/butteraugli/butteraugli.h @@ -123,10 +123,11 @@ bool ButteraugliAdaptiveQuantization(size_t xsize, size_t ysize, #define BUTTERAUGLI_RESTRICT __restrict__ #endif -#ifdef _MSC_VER -#define BUTTERAUGLI_CACHE_ALIGNED_RETURN /* not supported */ -#else + +#ifdef HAS_ASSUME_ALIGNED #define BUTTERAUGLI_CACHE_ALIGNED_RETURN __attribute__((assume_aligned(64))) +#else +#define BUTTERAUGLI_CACHE_ALIGNED_RETURN /* not supported */ #endif // Alias for unchangeable, non-aliased pointers. T is a pointer type, diff --git a/jpeg.BUILD b/jpeg.BUILD index 92c9ddc..cbc1e86 100755 --- a/jpeg.BUILD +++ b/jpeg.BUILD @@ -1,13 +1,40 @@ # Description: -# The Independent JPEG Group's JPEG runtime library. +# libjpeg-turbo is a drop in replacement for jpeglib optimized with SIMD. -licenses(["notice"]) # custom notice-style license, see LICENSE +licenses(["notice"]) # custom notice-style license, see LICENSE.md + +exports_files(["LICENSE.md"]) + +libjpegturbo_nocopts = "-[W]error" + +libjpegturbo_copts = select({ + ":android": [ + "-O2", + "-fPIE", + "-w", + ], + ":windows": [ + "/Ox", + "/w14711", # function 'function' selected for inline expansion + "/w14710", # 'function' : function not inlined + ], + "//conditions:default": [ + "-O3", + "-w", + ], +}) + select({ + ":armeabi-v7a": [ + "-D__ARM_NEON__", + "-march=armv7-a", + "-mfloat-abi=softfp", + "-fprefetch-loop-arrays", + ], + "//conditions:default": [], +}) cc_library( name = "jpeg", srcs = [ - "cderror.h", - "cdjpeg.h", "jaricom.c", "jcapimin.c", "jcapistd.c", @@ -16,13 +43,16 @@ cc_library( "jccolor.c", "jcdctmgr.c", "jchuff.c", + "jchuff.h", "jcinit.c", "jcmainct.c", "jcmarker.c", "jcmaster.c", "jcomapi.c", "jconfig.h", + "jconfigint.h", "jcparam.c", + "jcphuff.c", "jcprepct.c", "jcsample.c", "jctrans.c", @@ -32,17 +62,23 @@ cc_library( "jdatadst.c", "jdatasrc.c", "jdcoefct.c", + "jdcoefct.h", "jdcolor.c", "jdct.h", "jddctmgr.c", "jdhuff.c", + "jdhuff.h", "jdinput.c", "jdmainct.c", + "jdmainct.h", "jdmarker.c", "jdmaster.c", + "jdmaster.h", "jdmerge.c", + "jdphuff.c", "jdpostct.c", "jdsample.c", + "jdsample.h", "jdtrans.c", "jerror.c", "jfdctflt.c", @@ -51,39 +87,330 @@ cc_library( "jidctflt.c", "jidctfst.c", "jidctint.c", + "jidctred.c", "jinclude.h", "jmemmgr.c", "jmemnobs.c", "jmemsys.h", - "jmorecfg.h", + "jpeg_nbits_table.h", + "jpegcomp.h", "jquant1.c", "jquant2.c", "jutils.c", "jversion.h", - "transupp.h", ], hdrs = [ + "jccolext.c", # should have been named .inc + "jdcol565.c", # should have been named .inc + "jdcolext.c", # should have been named .inc + "jdmrg565.c", # should have been named .inc + "jdmrgext.c", # should have been named .inc "jerror.h", + "jmorecfg.h", "jpegint.h", "jpeglib.h", + "jstdhuff.c", # should have been named .inc ], - includes = ["."], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, visibility = ["//visibility:public"], + deps = select({ + ":k8": [":simd_x86_64"], + ":armeabi-v7a": [":simd_armv7a"], + ":arm64-v8a": [":simd_armv8a"], + "//conditions:default": [":simd_none"], + }), +) + +cc_library( + name = "simd_x86_64", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jerror.h", + "jinclude.h", + "jmorecfg.h", + "jpegint.h", + "jpeglib.h", + "jsimd.h", + "jsimddct.h", + "simd/jccolor-sse2-64.o", + "simd/jcgray-sse2-64.o", + "simd/jchuff-sse2-64.o", + "simd/jcsample-sse2-64.o", + "simd/jdcolor-sse2-64.o", + "simd/jdmerge-sse2-64.o", + "simd/jdsample-sse2-64.o", + "simd/jfdctflt-sse-64.o", + "simd/jfdctfst-sse2-64.o", + "simd/jfdctint-sse2-64.o", + "simd/jidctflt-sse2-64.o", + "simd/jidctfst-sse2-64.o", + "simd/jidctint-sse2-64.o", + "simd/jidctred-sse2-64.o", + "simd/jquantf-sse2-64.o", + "simd/jquanti-sse2-64.o", + "simd/jsimd.h", + "simd/jsimd_x86_64.c", + ], + copts = libjpegturbo_copts, + linkstatic = 1, + nocopts = libjpegturbo_nocopts, +) + +genrule( + name = "simd_x86_64_assemblage23", + srcs = [ + "simd/jccolext-sse2-64.asm", + "simd/jccolor-sse2-64.asm", + "simd/jcgray-sse2-64.asm", + "simd/jcgryext-sse2-64.asm", + "simd/jchuff-sse2-64.asm", + "simd/jcolsamp.inc", + "simd/jcsample-sse2-64.asm", + "simd/jdcolext-sse2-64.asm", + "simd/jdcolor-sse2-64.asm", + "simd/jdct.inc", + "simd/jdmerge-sse2-64.asm", + "simd/jdmrgext-sse2-64.asm", + "simd/jdsample-sse2-64.asm", + "simd/jfdctflt-sse-64.asm", + "simd/jfdctfst-sse2-64.asm", + "simd/jfdctint-sse2-64.asm", + "simd/jidctflt-sse2-64.asm", + "simd/jidctfst-sse2-64.asm", + "simd/jidctint-sse2-64.asm", + "simd/jidctred-sse2-64.asm", + "simd/jpeg_nbits_table.inc", + "simd/jquantf-sse2-64.asm", + "simd/jquanti-sse2-64.asm", + "simd/jsimdcfg.inc", + "simd/jsimdext.inc", + ], + outs = [ + "simd/jccolor-sse2-64.o", + "simd/jcgray-sse2-64.o", + "simd/jchuff-sse2-64.o", + "simd/jcsample-sse2-64.o", + "simd/jdcolor-sse2-64.o", + "simd/jdmerge-sse2-64.o", + "simd/jdsample-sse2-64.o", + "simd/jfdctflt-sse-64.o", + "simd/jfdctfst-sse2-64.o", + "simd/jfdctint-sse2-64.o", + "simd/jidctflt-sse2-64.o", + "simd/jidctfst-sse2-64.o", + "simd/jidctint-sse2-64.o", + "simd/jidctred-sse2-64.o", + "simd/jquantf-sse2-64.o", + "simd/jquanti-sse2-64.o", + ], + cmd = "for out in $(OUTS); do\n" + + " $(location @nasm//:nasm) -f elf64" + + " -DELF -DPIC -DRGBX_FILLER_0XFF -D__x86_64__ -DARCH_X86_64" + + " -I $$(dirname $(location simd/jdct.inc))/" + + " -I $$(dirname $(location simd/jsimdcfg.inc))/" + + " -o $$out" + + " $$(dirname $(location simd/jdct.inc))/$$(basename $${out%.o}.asm)\n" + + "done", + tools = ["@nasm//:nasm"], +) + +cc_library( + name = "simd_armv7a", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jinclude.h", + "jmorecfg.h", + "jpeglib.h", + "jsimd.h", + "jsimddct.h", + "simd/jsimd.h", + "simd/jsimd_arm.c", + "simd/jsimd_arm_neon.S", + ], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, +) + +cc_library( + name = "simd_armv8a", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jinclude.h", + "jmorecfg.h", + "jpeglib.h", + "jsimd.h", + "jsimddct.h", + "simd/jsimd.h", + "simd/jsimd_arm64.c", + "simd/jsimd_arm64_neon.S", + ], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, +) + +cc_library( + name = "simd_none", + srcs = [ + "jchuff.h", + "jconfig.h", + "jdct.h", + "jerror.h", + "jinclude.h", + "jmorecfg.h", + "jpegint.h", + "jpeglib.h", + "jsimd.h", + "jsimd_none.c", + "jsimddct.h", + ], + copts = libjpegturbo_copts, + nocopts = libjpegturbo_nocopts, ) genrule( name = "configure", outs = ["jconfig.h"], - cmd = "cat <$@\n" + - "#define HAVE_PROTOTYPES 1\n" + - "#define HAVE_UNSIGNED_CHAR 1\n" + - "#define HAVE_UNSIGNED_SHORT 1\n" + + cmd = "cat <<'EOF' >$@\n" + + "#define JPEG_LIB_VERSION 62\n" + + "#define LIBJPEG_TURBO_VERSION 1.5.1\n" + + "#define LIBJPEG_TURBO_VERSION_NUMBER 1005001\n" + + "#define C_ARITH_CODING_SUPPORTED 1\n" + + "#define D_ARITH_CODING_SUPPORTED 1\n" + + "#define BITS_IN_JSAMPLE 8\n" + + "#define HAVE_LOCALE_H 1\n" + "#define HAVE_STDDEF_H 1\n" + "#define HAVE_STDLIB_H 1\n" + - "#ifdef WIN32\n" + + "#define HAVE_UNSIGNED_CHAR 1\n" + + "#define HAVE_UNSIGNED_SHORT 1\n" + + "#define MEM_SRCDST_SUPPORTED 1\n" + + "#define NEED_SYS_TYPES_H 1\n" + + select({ + ":k8": "#define WITH_SIMD 1\n", + ":armeabi-v7a": "#define WITH_SIMD 1\n", + ":arm64-v8a": "#define WITH_SIMD 1\n", + "//conditions:default": "", + }) + + "EOF", +) + +genrule( + name = "configure_internal", + outs = ["jconfigint.h"], + cmd = "cat <<'EOF' >$@\n" + + "#define BUILD \"20161115\"\n" + + "#ifdef _MSC_VER /* Windows */\n" + "#define INLINE __inline\n" + "#else\n" + - "#define INLINE __inline__\n" + + "#define INLINE inline __attribute__((always_inline))\n" + "#endif\n" + - "EOF\n", + "#define PACKAGE_NAME \"libjpeg-turbo\"\n" + + "#define VERSION \"1.5.1\"\n" + + "#if (__WORDSIZE==64 && !defined(__native_client__)) || defined(_WIN64)\n" + + "#define SIZEOF_SIZE_T 8\n" + + "#else\n" + + "#define SIZEOF_SIZE_T 4\n" + + "#endif\n" + + "EOF", +) + +# jiminy cricket the way this file is generated is completely outrageous +genrule( + name = "configure_simd", + outs = ["simd/jsimdcfg.inc"], + cmd = "cat <<'EOF' >$@\n" + + "%define DCTSIZE 8\n" + + "%define DCTSIZE2 64\n" + + "%define RGB_RED 0\n" + + "%define RGB_GREEN 1\n" + + "%define RGB_BLUE 2\n" + + "%define RGB_PIXELSIZE 3\n" + + "%define EXT_RGB_RED 0\n" + + "%define EXT_RGB_GREEN 1\n" + + "%define EXT_RGB_BLUE 2\n" + + "%define EXT_RGB_PIXELSIZE 3\n" + + "%define EXT_RGBX_RED 0\n" + + "%define EXT_RGBX_GREEN 1\n" + + "%define EXT_RGBX_BLUE 2\n" + + "%define EXT_RGBX_PIXELSIZE 4\n" + + "%define EXT_BGR_RED 2\n" + + "%define EXT_BGR_GREEN 1\n" + + "%define EXT_BGR_BLUE 0\n" + + "%define EXT_BGR_PIXELSIZE 3\n" + + "%define EXT_BGRX_RED 2\n" + + "%define EXT_BGRX_GREEN 1\n" + + "%define EXT_BGRX_BLUE 0\n" + + "%define EXT_BGRX_PIXELSIZE 4\n" + + "%define EXT_XBGR_RED 3\n" + + "%define EXT_XBGR_GREEN 2\n" + + "%define EXT_XBGR_BLUE 1\n" + + "%define EXT_XBGR_PIXELSIZE 4\n" + + "%define EXT_XRGB_RED 1\n" + + "%define EXT_XRGB_GREEN 2\n" + + "%define EXT_XRGB_BLUE 3\n" + + "%define EXT_XRGB_PIXELSIZE 4\n" + + "%define RGBX_FILLER_0XFF 1\n" + + "%define JSAMPLE byte ; unsigned char\n" + + "%define SIZEOF_JSAMPLE SIZEOF_BYTE ; sizeof(JSAMPLE)\n" + + "%define CENTERJSAMPLE 128\n" + + "%define JCOEF word ; short\n" + + "%define SIZEOF_JCOEF SIZEOF_WORD ; sizeof(JCOEF)\n" + + "%define JDIMENSION dword ; unsigned int\n" + + "%define SIZEOF_JDIMENSION SIZEOF_DWORD ; sizeof(JDIMENSION)\n" + + "%define JSAMPROW POINTER ; JSAMPLE * (jpeglib.h)\n" + + "%define JSAMPARRAY POINTER ; JSAMPROW * (jpeglib.h)\n" + + "%define JSAMPIMAGE POINTER ; JSAMPARRAY * (jpeglib.h)\n" + + "%define JCOEFPTR POINTER ; JCOEF * (jpeglib.h)\n" + + "%define SIZEOF_JSAMPROW SIZEOF_POINTER ; sizeof(JSAMPROW)\n" + + "%define SIZEOF_JSAMPARRAY SIZEOF_POINTER ; sizeof(JSAMPARRAY)\n" + + "%define SIZEOF_JSAMPIMAGE SIZEOF_POINTER ; sizeof(JSAMPIMAGE)\n" + + "%define SIZEOF_JCOEFPTR SIZEOF_POINTER ; sizeof(JCOEFPTR)\n" + + "%define DCTELEM word ; short\n" + + "%define SIZEOF_DCTELEM SIZEOF_WORD ; sizeof(DCTELEM)\n" + + "%define float FP32 ; float\n" + + "%define SIZEOF_FAST_FLOAT SIZEOF_FP32 ; sizeof(float)\n" + + "%define ISLOW_MULT_TYPE word ; must be short\n" + + "%define SIZEOF_ISLOW_MULT_TYPE SIZEOF_WORD ; sizeof(ISLOW_MULT_TYPE)\n" + + "%define IFAST_MULT_TYPE word ; must be short\n" + + "%define SIZEOF_IFAST_MULT_TYPE SIZEOF_WORD ; sizeof(IFAST_MULT_TYPE)\n" + + "%define IFAST_SCALE_BITS 2 ; fractional bits in scale factors\n" + + "%define FLOAT_MULT_TYPE FP32 ; must be float\n" + + "%define SIZEOF_FLOAT_MULT_TYPE SIZEOF_FP32 ; sizeof(FLOAT_MULT_TYPE)\n" + + "%define JSIMD_NONE 0x00\n" + + "%define JSIMD_MMX 0x01\n" + + "%define JSIMD_3DNOW 0x02\n" + + "%define JSIMD_SSE 0x04\n" + + "%define JSIMD_SSE2 0x08\n" + + "EOF", +) + +config_setting( + name = "k8", + values = {"cpu": "k8"}, +) + +config_setting( + name = "android", + values = {"crosstool_top": "//external:android/crosstool"}, +) + +config_setting( + name = "armeabi-v7a", + values = {"android_cpu": "armeabi-v7a"}, +) + +config_setting( + name = "arm64-v8a", + values = {"android_cpu": "arm64-v8a"}, +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows_msvc"}, ) diff --git a/nasm.BUILD b/nasm.BUILD new file mode 100644 index 0000000..67df7ae --- /dev/null +++ b/nasm.BUILD @@ -0,0 +1,115 @@ +# Description: +# NASM is a portable assembler in the Intel/Microsoft tradition. + +licenses(["notice"]) # BSD 2-clause + +exports_files(["LICENSE"]) + +cc_binary( + name = "nasm", + srcs = [ + "assemble.c", + "assemble.h", + "compiler.h", + "crc64.c", + "directiv.c", + "directiv.h", + "disp8.c", + "disp8.h", + "eval.c", + "eval.h", + "exprlib.c", + "float.c", + "float.h", + "hashtbl.c", + "hashtbl.h", + "iflag.c", + "iflag.h", + "iflaggen.h", + "ilog2.c", + "insns.h", + "insnsa.c", + "insnsb.c", + "insnsi.h", + "labels.c", + "labels.h", + "lib/strlcpy.c", + "listing.c", + "listing.h", + "macros.c", + "md5.h", + "md5c.c", + "nasm.c", + "nasm.h", + "nasmlib.c", + "nasmlib.h", + "opflags.h", + "output/codeview.c", + "output/dwarf.h", + "output/elf.h", + "output/nulldbg.c", + "output/nullout.c", + "output/outaout.c", + "output/outas86.c", + "output/outbin.c", + "output/outcoff.c", + "output/outdbg.c", + "output/outelf.c", + "output/outelf.h", + "output/outelf32.c", + "output/outelf64.c", + "output/outelfx32.c", + "output/outform.c", + "output/outform.h", + "output/outieee.c", + "output/outlib.c", + "output/outlib.h", + "output/outmacho.c", + "output/outobj.c", + "output/outrdf2.c", + "output/pecoff.h", + "output/stabs.h", + "parser.c", + "parser.h", + "pptok.c", + "pptok.h", + "preproc.c", + "preproc.h", + "preproc-nop.c", + "quote.c", + "quote.h", + "raa.c", + "raa.h", + "rbtree.c", + "rbtree.h", + "rdoff/rdoff.h", + "realpath.c", + "regflags.c", + "regs.h", + "regvals.c", + "saa.c", + "saa.h", + "srcfile.c", + "stdscan.c", + "stdscan.h", + "strfunc.c", + "tables.h", + "tokens.h", + "tokhash.c", + "ver.c", + "version.h", + ], + copts = select({ + ":windows": [], + "//conditions:default": [ + "-w", + "-std=c99", + ], + }), + visibility = ["@jpeg_archive//:__pkg__"], +) + +config_setting( + name = "windows", + values = {"cpu": "x64_windows_msvc"}, +)