aboutsummaryrefslogtreecommitdiffstats
path: root/package/libs/zlib/patches
diff options
context:
space:
mode:
authorDaniel Engberg <daniel.engberg.lists@pyret.net>2017-09-05 20:38:07 +0200
committerHauke Mehrtens <hauke@hauke-m.de>2018-01-02 17:11:12 +0100
commit3acecba5209984e43397bc0eaa96822ccacb5712 (patch)
tree9c15d5ce905137a93bf9db84ff384c47ff0c8b8e /package/libs/zlib/patches
parent383e8aeec703fee957f30892487d7bb746c1a212 (diff)
downloadupstream-3acecba5209984e43397bc0eaa96822ccacb5712.tar.gz
upstream-3acecba5209984e43397bc0eaa96822ccacb5712.tar.bz2
upstream-3acecba5209984e43397bc0eaa96822ccacb5712.zip
package/libs/zlib: Add ARM and NEON optimizations
This adds two optimizations for ARM: NEON optimized Adler(-)32 checksum algorithm (ARMv7 and newer NEON CPUs) ARM(v7+) specific optimization for inflate I've also connected inflate optimization to the build using the following source as template. https://github.com/mirror/chromium/commit/0397489124ce7e6aced020f8b85f5034c7d5f49b#diff-a62ad2db6c83dbc205d34bb9a8884f16 Additional info: https://codereview.chromium.org/2676493007/ https://codereview.chromium.org/2722063002/ Sources: https://github.com/madler/zlib/pull/251 (only the first commit) https://github.com/madler/zlib/pull/256 Signed-off-by: Daniel Engberg <daniel.engberg.lists@pyret.net>
Diffstat (limited to 'package/libs/zlib/patches')
-rw-r--r--package/libs/zlib/patches/001-neon-implementation-of-adler32.patch253
-rw-r--r--package/libs/zlib/patches/002-arm-specific-optimisations-for-inflate.patch2411
-rw-r--r--package/libs/zlib/patches/003-attach-sourcefiles-in-patch-002-to-buildsystem.patch100
3 files changed, 2764 insertions, 0 deletions
diff --git a/package/libs/zlib/patches/001-neon-implementation-of-adler32.patch b/package/libs/zlib/patches/001-neon-implementation-of-adler32.patch
new file mode 100644
index 0000000000..843ef45c7d
--- /dev/null
+++ b/package/libs/zlib/patches/001-neon-implementation-of-adler32.patch
@@ -0,0 +1,253 @@
+From d2f06cd65d7ac39c6dd6761eef162abc946b155b Mon Sep 17 00:00:00 2001
+From: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
+Date: Tue, 11 Apr 2017 17:13:02 -0700
+Subject: [PATCH] NEON implementation for Adler32
+
+The checksum is calculated in the uncompressed PNG data
+and can be made much faster by using SIMD.
+
+Tests in ARMv8 yielded an improvement of about 3x
+(e.g. walltime was 350ms x 125ms for a 4096x4096 bytes
+executed 30 times). That results in at least 18% improvement
+in image decoding in Chromium.
+
+Further details at:
+https://bugs.chromium.org/p/chromium/issues/detail?id=688601
+---
+ CMakeLists.txt | 29 +++++++---
+ adler32.c | 5 ++
+ contrib/README.contrib | 3 +
+ contrib/arm/neon_adler32.c | 137 +++++++++++++++++++++++++++++++++++++++++++++
+ 4 files changed, 166 insertions(+), 8 deletions(-)
+ create mode 100644 contrib/arm/neon_adler32.c
+
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 0fe939df..8e75f664 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -7,6 +7,7 @@ set(VERSION "1.2.11")
+
+ option(ASM686 "Enable building i686 assembly implementation")
+ option(AMD64 "Enable building amd64 assembly implementation")
++option(ARMv8 "Enable building ARM NEON intrinsics implementation")
+
+ set(INSTALL_BIN_DIR "${CMAKE_INSTALL_PREFIX}/bin" CACHE PATH "Installation directory for executables")
+ set(INSTALL_LIB_DIR "${CMAKE_INSTALL_PREFIX}/lib" CACHE PATH "Installation directory for libraries")
+@@ -132,14 +133,26 @@ endif()
+ if(CMAKE_COMPILER_IS_GNUCC)
+ if(ASM686)
+ set(ZLIB_ASMS contrib/asm686/match.S)
+- elseif (AMD64)
++ elseif(AMD64)
+ set(ZLIB_ASMS contrib/amd64/amd64-match.S)
+- endif ()
++ elseif(ARMv8)
++ set(ZLIB_ARMv8 contrib/arm/neon_adler32.c)
++ endif()
+
+- if(ZLIB_ASMS)
+- add_definitions(-DASMV)
+- set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
+- endif()
++ if(ZLIB_ASMS)
++ add_definitions(-DASMV)
++ set_source_files_properties(${ZLIB_ASMS} PROPERTIES LANGUAGE C COMPILE_FLAGS -DNO_UNDERLINE)
++ elseif(ZLIB_ARMv8)
++ add_definitions(-DARMv8)
++ set(COMPILER ${CMAKE_C_COMPILER})
++ # NEON is mandatory in ARMv8.
++ if(${COMPILER} MATCHES "aarch64")
++ set_source_files_properties(${ZLIB_ARMv8} PROPERTIES LANGUAGE C COMPILE_FLAGS -march=armv8-a)
++ # But it was optional for ARMv7.
++ elseif(${COMPILER} MATCHES "arm")
++ set_source_files_properties(${ZLIB_ARMv8} PROPERTIES LANGUAGE C COMPILE_FLAGS -mfpu=neon)
++ endif()
++ endif()
+ endif()
+
+ if(MSVC)
+@@ -183,8 +196,8 @@ if(MINGW)
+ set(ZLIB_DLL_SRCS ${CMAKE_CURRENT_BINARY_DIR}/zlib1rc.obj)
+ endif(MINGW)
+
+-add_library(zlib SHARED ${ZLIB_SRCS} ${ZLIB_ASMS} ${ZLIB_DLL_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
+-add_library(zlibstatic STATIC ${ZLIB_SRCS} ${ZLIB_ASMS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
++add_library(zlib SHARED ${ZLIB_SRCS} ${ZLIB_ASMS} ${ZLIB_ARMv8} ${ZLIB_DLL_SRCS} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
++add_library(zlibstatic STATIC ${ZLIB_SRCS} ${ZLIB_ASMS} ${ZLIB_ARMv8} ${ZLIB_PUBLIC_HDRS} ${ZLIB_PRIVATE_HDRS})
+ set_target_properties(zlib PROPERTIES DEFINE_SYMBOL ZLIB_DLL)
+ set_target_properties(zlib PROPERTIES SOVERSION 1)
+
+diff --git a/adler32.c b/adler32.c
+index d0be4380..45ebaa4b 100644
+--- a/adler32.c
++++ b/adler32.c
+@@ -136,7 +136,12 @@ uLong ZEXPORT adler32(adler, buf, len)
+ const Bytef *buf;
+ uInt len;
+ {
++#ifdef ARMv8
++# pragma message("Using NEON-ized Adler32.")
++ return NEON_adler32(adler, buf, len);
++#else
+ return adler32_z(adler, buf, len);
++#endif
+ }
+
+ /* ========================================================================= */
+diff --git a/contrib/README.contrib b/contrib/README.contrib
+index a411d5c3..3fd1d202 100644
+--- a/contrib/README.contrib
++++ b/contrib/README.contrib
+@@ -12,6 +12,9 @@ amd64/ by Mikhail Teterin <mi@ALDAN.algebra.com>
+ asm code for AMD64
+ See patch at http://www.freebsd.org/cgi/query-pr.cgi?pr=bin/96393
+
++arm/ by Adenilson Cavalcanti <cavalcantii@chromium.org>
++ ARM optimizations (NEON and ARMv8 code).
++
+ asm686/ by Brian Raiter <breadbox@muppetlabs.com>
+ asm code for Pentium and PPro/PII, using the AT&T (GNU as) syntax
+ See http://www.muppetlabs.com/~breadbox/software/assembly.html
+diff --git a/contrib/arm/neon_adler32.c b/contrib/arm/neon_adler32.c
+new file mode 100644
+index 00000000..f173a74f
+--- /dev/null
++++ b/contrib/arm/neon_adler32.c
+@@ -0,0 +1,137 @@
++/* Copyright (C) 1995-2011, 2016 Mark Adler
++ * Copyright (C) 2017 ARM Holdings Inc.
++ * Authors: Adenilson Cavalcanti <adenilson.cavalcanti@arm.com>
++ * Simon Hosie <simon.hosie@arm.com>
++ * This software is provided 'as-is', without any express or implied
++ * warranty. In no event will the authors be held liable for any damages
++ * arising from the use of this software.
++ * Permission is granted to anyone to use this software for any purpose,
++ * including commercial applications, and to alter it and redistribute it
++ * freely, subject to the following restrictions:
++ * 1. The origin of this software must not be misrepresented; you must not
++ * claim that you wrote the original software. If you use this software
++ * in a product, an acknowledgment in the product documentation would be
++ * appreciated but is not required.
++ * 2. Altered source versions must be plainly marked as such, and must not be
++ * misrepresented as being the original software.
++ * 3. This notice may not be removed or altered from any source distribution.
++ */
++
++#if (defined(__ARM_NEON__) || defined(__ARM_NEON))
++#include <arm_neon.h>
++
++static void NEON_accum32(uint32_t *s, const unsigned char *buf,
++ unsigned int len)
++{
++ static const uint8_t taps[32] = {
++ 32, 31, 30, 29, 28, 27, 26, 25,
++ 24, 23, 22, 21, 20, 19, 18, 17,
++ 16, 15, 14, 13, 12, 11, 10, 9,
++ 8, 7, 6, 5, 4, 3, 2, 1 };
++
++ uint32x2_t adacc2, s2acc2, as;
++ uint8x16_t t0 = vld1q_u8(taps), t1 = vld1q_u8(taps + 16);
++
++ uint32x4_t adacc = vdupq_n_u32(0), s2acc = vdupq_n_u32(0);
++ adacc = vsetq_lane_u32(s[0], adacc, 0);
++ s2acc = vsetq_lane_u32(s[1], s2acc, 0);
++
++ while (len >= 2) {
++ uint8x16_t d0 = vld1q_u8(buf), d1 = vld1q_u8(buf + 16);
++ uint16x8_t adler, sum2;
++ s2acc = vaddq_u32(s2acc, vshlq_n_u32(adacc, 5));
++ adler = vpaddlq_u8( d0);
++ adler = vpadalq_u8(adler, d1);
++ sum2 = vmull_u8( vget_low_u8(t0), vget_low_u8(d0));
++ sum2 = vmlal_u8(sum2, vget_high_u8(t0), vget_high_u8(d0));
++ sum2 = vmlal_u8(sum2, vget_low_u8(t1), vget_low_u8(d1));
++ sum2 = vmlal_u8(sum2, vget_high_u8(t1), vget_high_u8(d1));
++ adacc = vpadalq_u16(adacc, adler);
++ s2acc = vpadalq_u16(s2acc, sum2);
++ len -= 2;
++ buf += 32;
++ }
++
++ while (len > 0) {
++ uint8x16_t d0 = vld1q_u8(buf);
++ uint16x8_t adler, sum2;
++ s2acc = vaddq_u32(s2acc, vshlq_n_u32(adacc, 4));
++ adler = vpaddlq_u8(d0);
++ sum2 = vmull_u8( vget_low_u8(t1), vget_low_u8(d0));
++ sum2 = vmlal_u8(sum2, vget_high_u8(t1), vget_high_u8(d0));
++ adacc = vpadalq_u16(adacc, adler);
++ s2acc = vpadalq_u16(s2acc, sum2);
++ buf += 16;
++ len--;
++ }
++
++ adacc2 = vpadd_u32(vget_low_u32(adacc), vget_high_u32(adacc));
++ s2acc2 = vpadd_u32(vget_low_u32(s2acc), vget_high_u32(s2acc));
++ as = vpadd_u32(adacc2, s2acc2);
++ s[0] = vget_lane_u32(as, 0);
++ s[1] = vget_lane_u32(as, 1);
++}
++
++static void NEON_handle_tail(uint32_t *pair, const unsigned char *buf,
++ unsigned int len)
++{
++ /* Oldie K&R code integration. */
++ unsigned int i;
++ for (i = 0; i < len; ++i) {
++ pair[0] += buf[i];
++ pair[1] += pair[0];
++ }
++}
++
++extern unsigned long NEON_adler32(unsigned long adler, const unsigned char *buf,
++ const unsigned int len)
++{
++ /* initial Adler-32 value (deferred check for len == 1 speed) */
++ if (!buf)
++ return 1L;
++
++ /* The largest prime smaller than 65536. */
++ const uint32_t M_BASE = 65521;
++ /* This is the threshold where doing accumulation may overflow. */
++ const int M_NMAX = 5552;
++
++ unsigned long sum2;
++ uint32_t pair[2];
++ int n = M_NMAX;
++ unsigned int done = 0;
++ /* Oldie K&R code integration. */
++ unsigned int i;
++
++ /* Split Adler-32 into component sums, it can be supplied by
++ * the caller sites (e.g. in a PNG file).
++ */
++ sum2 = (adler >> 16) & 0xffff;
++ adler &= 0xffff;
++ pair[0] = adler;
++ pair[1] = sum2;
++
++ for (i = 0; i < len; i += n) {
++ if ((i + n) > len)
++ n = len - i;
++
++ if (n < 16)
++ break;
++
++ NEON_accum32(pair, buf + i, n / 16);
++ pair[0] %= M_BASE;
++ pair[1] %= M_BASE;
++
++ done += (n / 16) * 16;
++ }
++
++ /* Handle the tail elements. */
++ if (done < len) {
++ NEON_handle_tail(pair, (buf + done), len - done);
++ pair[0] %= M_BASE;
++ pair[1] %= M_BASE;
++ }
++
++ /* D = B * 65536 + A, see: https://en.wikipedia.org/wiki/Adler-32. */
++ return (pair[1] << 16) | pair[0];
++}
++#endif
diff --git a/package/libs/zlib/patches/002-arm-specific-optimisations-for-inflate.patch b/package/libs/zlib/patches/002-arm-specific-optimisations-for-inflate.patch
new file mode 100644
index 0000000000..475ed6f3e9
--- /dev/null
+++ b/package/libs/zlib/patches/002-arm-specific-optimisations-for-inflate.patch
@@ -0,0 +1,2411 @@
+From 6bac7a3e0ebcd3147294b73acb34606eba18ae7f Mon Sep 17 00:00:00 2001
+From: Simon Hosie <simon.hosie@arm.com>
+Date: Wed, 12 Apr 2017 12:52:33 -0700
+Subject: [PATCH 1/2] Prepare ARM-specific contrib directory.
+
+Change-Id: Id4cda552b39bfb39ab35ec499dbe122b43b6d1a1
+---
+ contrib/arm/inffast.c | 323 ++++++++++
+ contrib/arm/inflate.c | 1561 +++++++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 1884 insertions(+)
+ create mode 100644 contrib/arm/inffast.c
+ create mode 100644 contrib/arm/inflate.c
+
+diff --git a/contrib/arm/inffast.c b/contrib/arm/inffast.c
+new file mode 100644
+index 00000000..0dbd1dbc
+--- /dev/null
++++ b/contrib/arm/inffast.c
+@@ -0,0 +1,323 @@
++/* inffast.c -- fast decoding
++ * Copyright (C) 1995-2017 Mark Adler
++ * For conditions of distribution and use, see copyright notice in zlib.h
++ */
++
++#include "zutil.h"
++#include "inftrees.h"
++#include "inflate.h"
++#include "inffast.h"
++
++#ifdef ASMINF
++# pragma message("Assembler code may have bugs -- use at your own risk")
++#else
++
++/*
++ Decode literal, length, and distance codes and write out the resulting
++ literal and match bytes until either not enough input or output is
++ available, an end-of-block is encountered, or a data error is encountered.
++ When large enough input and output buffers are supplied to inflate(), for
++ example, a 16K input buffer and a 64K output buffer, more than 95% of the
++ inflate execution time is spent in this routine.
++
++ Entry assumptions:
++
++ state->mode == LEN
++ strm->avail_in >= 6
++ strm->avail_out >= 258
++ start >= strm->avail_out
++ state->bits < 8
++
++ On return, state->mode is one of:
++
++ LEN -- ran out of enough output space or enough available input
++ TYPE -- reached end of block code, inflate() to interpret next block
++ BAD -- error in block data
++
++ Notes:
++
++ - The maximum input bits used by a length/distance pair is 15 bits for the
++ length code, 5 bits for the length extra, 15 bits for the distance code,
++ and 13 bits for the distance extra. This totals 48 bits, or six bytes.
++ Therefore if strm->avail_in >= 6, then there is enough input to avoid
++ checking for available input while decoding.
++
++ - The maximum bytes that a single length/distance pair can output is 258
++ bytes, which is the maximum length that can be coded. inflate_fast()
++ requires strm->avail_out >= 258 for each loop to avoid checking for
++ output space.
++ */
++void ZLIB_INTERNAL inflate_fast(strm, start)
++z_streamp strm;
++unsigned start; /* inflate()'s starting value for strm->avail_out */
++{
++ struct inflate_state FAR *state;
++ z_const unsigned char FAR *in; /* local strm->next_in */
++ z_const unsigned char FAR *last; /* have enough input while in < last */
++ unsigned char FAR *out; /* local strm->next_out */
++ unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
++ unsigned char FAR *end; /* while out < end, enough space available */
++#ifdef INFLATE_STRICT
++ unsigned dmax; /* maximum distance from zlib header */
++#endif
++ unsigned wsize; /* window size or zero if not using window */
++ unsigned whave; /* valid bytes in the window */
++ unsigned wnext; /* window write index */
++ unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
++ unsigned long hold; /* local strm->hold */
++ unsigned bits; /* local strm->bits */
++ code const FAR *lcode; /* local strm->lencode */
++ code const FAR *dcode; /* local strm->distcode */
++ unsigned lmask; /* mask for first level of length codes */
++ unsigned dmask; /* mask for first level of distance codes */
++ code here; /* retrieved table entry */
++ unsigned op; /* code bits, operation, extra bits, or */
++ /* window position, window bytes to copy */
++ unsigned len; /* match length, unused bytes */
++ unsigned dist; /* match distance */
++ unsigned char FAR *from; /* where to copy match from */
++
++ /* copy state to local variables */
++ state = (struct inflate_state FAR *)strm->state;
++ in = strm->next_in;
++ last = in + (strm->avail_in - 5);
++ out = strm->next_out;
++ beg = out - (start - strm->avail_out);
++ end = out + (strm->avail_out - 257);
++#ifdef INFLATE_STRICT
++ dmax = state->dmax;
++#endif
++ wsize = state->wsize;
++ whave = state->whave;
++ wnext = state->wnext;
++ window = state->window;
++ hold = state->hold;
++ bits = state->bits;
++ lcode = state->lencode;
++ dcode = state->distcode;
++ lmask = (1U << state->lenbits) - 1;
++ dmask = (1U << state->distbits) - 1;
++
++ /* decode literals and length/distances until end-of-block or not enough
++ input data or output space */
++ do {
++ if (bits < 15) {
++ hold += (unsigned long)(*in++) << bits;
++ bits += 8;
++ hold += (unsigned long)(*in++) << bits;
++ bits += 8;
++ }
++ here = lcode[hold & lmask];
++ dolen:
++ op = (unsigned)(here.bits);
++ hold >>= op;
++ bits -= op;
++ op = (unsigned)(here.op);
++ if (op == 0) { /* literal */
++ Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
++ "inflate: literal '%c'\n" :
++ "inflate: literal 0x%02x\n", here.val));
++ *out++ = (unsigned char)(here.val);
++ }
++ else if (op & 16) { /* length base */
++ len = (unsigned)(here.val);
++ op &= 15; /* number of extra bits */
++ if (op) {
++ if (bits < op) {
++ hold += (unsigned long)(*in++) << bits;
++ bits += 8;
++ }
++ len += (unsigned)hold & ((1U << op) - 1);
++ hold >>= op;
++ bits -= op;
++ }
++ Tracevv((stderr, "inflate: length %u\n", len));
++ if (bits < 15) {
++ hold += (unsigned long)(*in++) << bits;
++ bits += 8;
++ hold += (unsigned long)(*in++) << bits;
++ bits += 8;
++ }
++ here = dcode[hold & dmask];
++ dodist:
++ op = (unsigned)(here.bits);
++ hold >>= op;
++ bits -= op;
++ op = (unsigned)(here.op);
++ if (op & 16) { /* distance base */
++ dist = (unsigned)(here.val);
++ op &= 15; /* number of extra bits */
++ if (bits < op) {
++ hold += (unsigned long)(*in++) << bits;
++ bits += 8;
++ if (bits < op) {
++ hold += (unsigned long)(*in++) << bits;
++ bits += 8;
++ }
++ }
++ dist += (unsigned)hold & ((1U << op) - 1);
++#ifdef INFLATE_STRICT
++ if (dist > dmax) {
++ strm->msg = (char *)"invalid distance too far back";
++ state->mode = BAD;
++ break;
++ }
++#endif
++ hold >>= op;
++ bits -= op;
++ Tracevv((stderr, "inflate: distance %u\n", dist));
++ op = (unsigned)(out - beg); /* max distance in output */
++ if (dist > op) { /* see if copy from window */
++ op = dist - op; /* distance back in window */
++ if (op > whave) {
++ if (state->sane) {
++ strm->msg =
++ (char *)"invalid distance too far back";
++ state->mode = BAD;
++ break;
++ }
++#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
++ if (len <= op - whave) {
++ do {
++ *out++ = 0;
++ } while (--len);
++ continue;
++ }
++ len -= op - whave;
++ do {
++ *out++ = 0;
++ } while (--op > whave);
++ if (op == 0) {
++ from = out - dist;
++ do {
++ *out++ = *from++;
++ } while (--len);
++ continue;
++ }
++#endif
++ }
++ from = window;
++ if (wnext == 0) { /* very common case */
++ from += wsize - op;
++ if (op < len) { /* some from window */
++ len -= op;
++ do {
++ *out++ = *from++;
++ } while (--op);
++ from = out - dist; /* rest from output */
++ }
++ }
++ else if (wnext < op) { /* wrap around window */
++ from += wsize + wnext - op;
++ op -= wnext;
++ if (op < len) { /* some from end of window */
++ len -= op;
++ do {
++ *out++ = *from++;
++ } while (--op);
++ from = window;
++ if (wnext < len) { /* some from start of window */
++ op = wnext;
++ len -= op;
++ do {
++ *out++ = *from++;
++ } while (--op);
++ from = out - dist; /* rest from output */
++ }
++ }
++ }
++ else { /* contiguous in window */
++ from += wnext - op;
++ if (op < len) { /* some from window */
++ len -= op;
++ do {
++ *out++ = *from++;
++ } while (--op);
++ from = out - dist; /* rest from output */
++ }
++ }
++ while (len > 2) {
++ *out++ = *from++;
++ *out++ = *from++;
++ *out++ = *from++;
++ len -= 3;
++ }
++ if (len) {
++ *out++ = *from++;
++ if (len > 1)
++ *out++ = *from++;
++ }
++ }
++ else {
++ from = out - dist; /* copy direct from output */
++ do { /* minimum length is three */
++ *out++ = *from++;
++ *out++ = *from++;
++ *out++ = *from++;
++ len -= 3;
++ } while (len > 2);
++ if (len) {
++ *out++ = *from++;
++ if (len > 1)
++ *out++ = *from++;
++ }
++ }
++ }
++ else if ((op & 64) == 0) { /* 2nd level distance code */
++ here = dcode[here.val + (hold & ((1U << op) - 1))];
++ goto dodist;
++ }
++ else {
++ strm->msg = (char *)"invalid distance code";
++ state->mode = BAD;
++ break;
++ }
++ }
++ else if ((op & 64) == 0) { /* 2nd level length code */
++ here = lcode[here.val + (hold & ((1U << op) - 1))];
++ goto dolen;
++ }
++ else if (op & 32) { /* end-of-block */
++ Tracevv((stderr, "inflate: end of block\n"));
++ state->mode = TYPE;
++ break;
++ }
++ else {
++ strm->msg = (char *)"invalid literal/length code";
++ state->mode = BAD;
++ break;
++ }
++ } while (in < last && out < end);
++
++ /* return unused bytes (on entry, bits < 8, so in won't go too far back) */
++ len = bits >> 3;
++ in -= len;
++ bits -= len << 3;
++ hold &= (1U << bits) - 1;
++
++ /* update state and return */
++ strm->next_in = in;
++ strm->next_out = out;
++ strm->avail_in = (unsigned)(in < last ? 5 + (last - in) : 5 - (in - last));
++ strm->avail_out = (unsigned)(out < end ?
++ 257 + (end - out) : 257 - (out - end));
++ state->hold = hold;
++ state->bits = bits;
++ return;
++}
++
++/*
++ inflate_fast() speedups that turned out slower (on a PowerPC G3 750CXe):
++ - Using bit fields for code structure
++ - Different op definition to avoid & for extra bits (do & for table bits)
++ - Three separate decoding do-loops for direct, window, and wnext == 0
++ - Special case for distance > 1 copies to do overlapped load and store copy
++ - Explicit branch predictions (based on measured branch probabilities)
++ - Deferring match copy and interspersed it with decoding subsequent codes
++ - Swapping literal/length else
++ - Swapping window/direct else
++ - Larger unrolled copy loops (three is about right)
++ - Moving len -= 3 statement into middle of loop
++ */
++
++#endif /* !ASMINF */
+diff --git a/contrib/arm/inflate.c b/contrib/arm/inflate.c
+new file mode 100644
+index 00000000..ac333e8c
+--- /dev/null
++++ b/contrib/arm/inflate.c
+@@ -0,0 +1,1561 @@
++/* inflate.c -- zlib decompression
++ * Copyright (C) 1995-2016 Mark Adler
++ * For conditions of distribution and use, see copyright notice in zlib.h
++ */
++
++/*
++ * Change history:
++ *
++ * 1.2.beta0 24 Nov 2002
++ * - First version -- complete rewrite of inflate to simplify code, avoid
++ * creation of window when not needed, minimize use of window when it is
++ * needed, make inffast.c even faster, implement gzip decoding, and to
++ * improve code readability and style over the previous zlib inflate code
++ *
++ * 1.2.beta1 25 Nov 2002
++ * - Use pointers for available input and output checking in inffast.c
++ * - Remove input and output counters in inffast.c
++ * - Change inffast.c entry and loop from avail_in >= 7 to >= 6
++ * - Remove unnecessary second byte pull from length extra in inffast.c
++ * - Unroll direct copy to three copies per loop in inffast.c
++ *
++ * 1.2.beta2 4 Dec 2002
++ * - Change external routine names to reduce potential conflicts
++ * - Correct filename to inffixed.h for fixed tables in inflate.c
++ * - Make hbuf[] unsigned char to match parameter type in inflate.c
++ * - Change strm->next_out[-state->offset] to *(strm->next_out - state->offset)
++ * to avoid negation problem on Alphas (64 bit) in inflate.c
++ *
++ * 1.2.beta3 22 Dec 2002
++ * - Add comments on state->bits assertion in inffast.c
++ * - Add comments on op field in inftrees.h
++ * - Fix bug in reuse of allocated window after inflateReset()
++ * - Remove bit fields--back to byte structure for speed
++ * - Remove distance extra == 0 check in inflate_fast()--only helps for lengths
++ * - Change post-increments to pre-increments in inflate_fast(), PPC biased?
++ * - Add compile time option, POSTINC, to use post-increments instead (Intel?)
++ * - Make MATCH copy in inflate() much faster for when inflate_fast() not used
++ * - Use local copies of stream next and avail values, as well as local bit
++ * buffer and bit count in inflate()--for speed when inflate_fast() not used
++ *
++ * 1.2.beta4 1 Jan 2003
++ * - Split ptr - 257 statements in inflate_table() to avoid compiler warnings
++ * - Move a comment on output buffer sizes from inffast.c to inflate.c
++ * - Add comments in inffast.c to introduce the inflate_fast() routine
++ * - Rearrange window copies in inflate_fast() for speed and simplification
++ * - Unroll last copy for window match in inflate_fast()
++ * - Use local copies of window variables in inflate_fast() for speed
++ * - Pull out common wnext == 0 case for speed in inflate_fast()
++ * - Make op and len in inflate_fast() unsigned for consistency
++ * - Add FAR to lcode and dcode declarations in inflate_fast()
++ * - Simplified bad distance check in inflate_fast()
++ * - Added inflateBackInit(), inflateBack(), and inflateBackEnd() in new
++ * source file infback.c to provide a call-back interface to inflate for
++ * programs like gzip and unzip -- uses window as output buffer to avoid
++ * window copying
++ *
++ * 1.2.beta5 1 Jan 2003
++ * - Improved inflateBack() interface to allow the caller to provide initial
++ * input in strm.
++ * - Fixed stored blocks bug in inflateBack()
++ *
++ * 1.2.beta6 4 Jan 2003
++ * - Added comments in inffast.c on effectiveness of POSTINC
++ * - Typecasting all around to reduce compiler warnings
++ * - Changed loops from while (1) or do {} while (1) to for (;;), again to
++ * make compilers happy
++ * - Changed type of window in inflateBackInit() to unsigned char *
++ *
++ * 1.2.beta7 27 Jan 2003
++ * - Changed many types to unsigned or unsigned short to avoid warnings
++ * - Added inflateCopy() function
++ *
++ * 1.2.0 9 Mar 2003
++ * - Changed inflateBack() interface to provide separate opaque descriptors
++ * for the in() and out() functions
++ * - Changed inflateBack() argument and in_func typedef to swap the length
++ * and buffer address return values for the input function
++ * - Check next_in and next_out for Z_NULL on entry to inflate()
++ *
++ * The history for versions after 1.2.0 are in ChangeLog in zlib distribution.
++ */
++
++#include "zutil.h"
++#include "inftrees.h"
++#include "inflate.h"
++#include "inffast.h"
++
++#ifdef MAKEFIXED
++# ifndef BUILDFIXED
++# define BUILDFIXED
++# endif
++#endif
++
++/* function prototypes */
++local int inflateStateCheck OF((z_streamp strm));
++local void fixedtables OF((struct inflate_state FAR *state));
++local int updatewindow OF((z_streamp strm, const unsigned char FAR *end,
++ unsigned copy));
++#ifdef BUILDFIXED
++ void makefixed OF((void));
++#endif
++local unsigned syncsearch OF((unsigned FAR *have, const unsigned char FAR *buf,
++ unsigned len));
++
++local int inflateStateCheck(strm)
++z_streamp strm;
++{
++ struct inflate_state FAR *state;
++ if (strm == Z_NULL ||
++ strm->zalloc == (alloc_func)0 || strm->zfree == (free_func)0)
++ return 1;
++ state = (struct inflate_state FAR *)strm->state;
++ if (state == Z_NULL || state->strm != strm ||
++ state->mode < HEAD || state->mode > SYNC)
++ return 1;
++ return 0;
++}
++
++int ZEXPORT inflateResetKeep(strm)
++z_streamp strm;
++{
++ struct inflate_state FAR *state;
++
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ strm->total_in = strm->total_out = state->total = 0;
++ strm->msg = Z_NULL;
++ if (state->wrap) /* to support ill-conceived Java test suite */
++ strm->adler = state->wrap & 1;
++ state->mode = HEAD;
++ state->last = 0;
++ state->havedict = 0;
++ state->dmax = 32768U;
++ state->head = Z_NULL;
++ state->hold = 0;
++ state->bits = 0;
++ state->lencode = state->distcode = state->next = state->codes;
++ state->sane = 1;
++ state->back = -1;
++ Tracev((stderr, "inflate: reset\n"));
++ return Z_OK;
++}
++
++int ZEXPORT inflateReset(strm)
++z_streamp strm;
++{
++ struct inflate_state FAR *state;
++
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ state->wsize = 0;
++ state->whave = 0;
++ state->wnext = 0;
++ return inflateResetKeep(strm);
++}
++
++int ZEXPORT inflateReset2(strm, windowBits)
++z_streamp strm;
++int windowBits;
++{
++ int wrap;
++ struct inflate_state FAR *state;
++
++ /* get the state */
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++
++ /* extract wrap request from windowBits parameter */
++ if (windowBits < 0) {
++ wrap = 0;
++ windowBits = -windowBits;
++ }
++ else {
++ wrap = (windowBits >> 4) + 5;
++#ifdef GUNZIP
++ if (windowBits < 48)
++ windowBits &= 15;
++#endif
++ }
++
++ /* set number of window bits, free window if different */
++ if (windowBits && (windowBits < 8 || windowBits > 15))
++ return Z_STREAM_ERROR;
++ if (state->window != Z_NULL && state->wbits != (unsigned)windowBits) {
++ ZFREE(strm, state->window);
++ state->window = Z_NULL;
++ }
++
++ /* update state and reset the rest of it */
++ state->wrap = wrap;
++ state->wbits = (unsigned)windowBits;
++ return inflateReset(strm);
++}
++
++int ZEXPORT inflateInit2_(strm, windowBits, version, stream_size)
++z_streamp strm;
++int windowBits;
++const char *version;
++int stream_size;
++{
++ int ret;
++ struct inflate_state FAR *state;
++
++ if (version == Z_NULL || version[0] != ZLIB_VERSION[0] ||
++ stream_size != (int)(sizeof(z_stream)))
++ return Z_VERSION_ERROR;
++ if (strm == Z_NULL) return Z_STREAM_ERROR;
++ strm->msg = Z_NULL; /* in case we return an error */
++ if (strm->zalloc == (alloc_func)0) {
++#ifdef Z_SOLO
++ return Z_STREAM_ERROR;
++#else
++ strm->zalloc = zcalloc;
++ strm->opaque = (voidpf)0;
++#endif
++ }
++ if (strm->zfree == (free_func)0)
++#ifdef Z_SOLO
++ return Z_STREAM_ERROR;
++#else
++ strm->zfree = zcfree;
++#endif
++ state = (struct inflate_state FAR *)
++ ZALLOC(strm, 1, sizeof(struct inflate_state));
++ if (state == Z_NULL) return Z_MEM_ERROR;
++ Tracev((stderr, "inflate: allocated\n"));
++ strm->state = (struct internal_state FAR *)state;
++ state->strm = strm;
++ state->window = Z_NULL;
++ state->mode = HEAD; /* to pass state test in inflateReset2() */
++ ret = inflateReset2(strm, windowBits);
++ if (ret != Z_OK) {
++ ZFREE(strm, state);
++ strm->state = Z_NULL;
++ }
++ return ret;
++}
++
++int ZEXPORT inflateInit_(strm, version, stream_size)
++z_streamp strm;
++const char *version;
++int stream_size;
++{
++ return inflateInit2_(strm, DEF_WBITS, version, stream_size);
++}
++
++int ZEXPORT inflatePrime(strm, bits, value)
++z_streamp strm;
++int bits;
++int value;
++{
++ struct inflate_state FAR *state;
++
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ if (bits < 0) {
++ state->hold = 0;
++ state->bits = 0;
++ return Z_OK;
++ }
++ if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
++ value &= (1L << bits) - 1;
++ state->hold += (unsigned)value << state->bits;
++ state->bits += (uInt)bits;
++ return Z_OK;
++}
++
++/*
++ Return state with length and distance decoding tables and index sizes set to
++ fixed code decoding. Normally this returns fixed tables from inffixed.h.
++ If BUILDFIXED is defined, then instead this routine builds the tables the
++ first time it's called, and returns those tables the first time and
++ thereafter. This reduces the size of the code by about 2K bytes, in
++ exchange for a little execution time. However, BUILDFIXED should not be
++ used for threaded applications, since the rewriting of the tables and virgin
++ may not be thread-safe.
++ */
++local void fixedtables(state)
++struct inflate_state FAR *state;
++{
++#ifdef BUILDFIXED
++ static int virgin = 1;
++ static code *lenfix, *distfix;
++ static code fixed[544];
++
++ /* build fixed huffman tables if first call (may not be thread safe) */
++ if (virgin) {
++ unsigned sym, bits;
++ static code *next;
++
++ /* literal/length table */
++ sym = 0;
++ while (sym < 144) state->lens[sym++] = 8;
++ while (sym < 256) state->lens[sym++] = 9;
++ while (sym < 280) state->lens[sym++] = 7;
++ while (sym < 288) state->lens[sym++] = 8;
++ next = fixed;
++ lenfix = next;
++ bits = 9;
++ inflate_table(LENS, state->lens, 288, &(next), &(bits), state->work);
++
++ /* distance table */
++ sym = 0;
++ while (sym < 32) state->lens[sym++] = 5;
++ distfix = next;
++ bits = 5;
++ inflate_table(DISTS, state->lens, 32, &(next), &(bits), state->work);
++
++ /* do this just once */
++ virgin = 0;
++ }
++#else /* !BUILDFIXED */
++# include "inffixed.h"
++#endif /* BUILDFIXED */
++ state->lencode = lenfix;
++ state->lenbits = 9;
++ state->distcode = distfix;
++ state->distbits = 5;
++}
++
++#ifdef MAKEFIXED
++#include <stdio.h>
++
++/*
++ Write out the inffixed.h that is #include'd above. Defining MAKEFIXED also
++ defines BUILDFIXED, so the tables are built on the fly. makefixed() writes
++ those tables to stdout, which would be piped to inffixed.h. A small program
++ can simply call makefixed to do this:
++
++ void makefixed(void);
++
++ int main(void)
++ {
++ makefixed();
++ return 0;
++ }
++
++ Then that can be linked with zlib built with MAKEFIXED defined and run:
++
++ a.out > inffixed.h
++ */
++void makefixed()
++{
++ unsigned low, size;
++ struct inflate_state state;
++
++ fixedtables(&state);
++ puts(" /* inffixed.h -- table for decoding fixed codes");
++ puts(" * Generated automatically by makefixed().");
++ puts(" */");
++ puts("");
++ puts(" /* WARNING: this file should *not* be used by applications.");
++ puts(" It is part of the implementation of this library and is");
++ puts(" subject to change. Applications should only use zlib.h.");
++ puts(" */");
++ puts("");
++ size = 1U << 9;
++ printf(" static const code lenfix[%u] = {", size);
++ low = 0;
++ for (;;) {
++ if ((low % 7) == 0) printf("\n ");
++ printf("{%u,%u,%d}", (low & 127) == 99 ? 64 : state.lencode[low].op,
++ state.lencode[low].bits, state.lencode[low].val);
++ if (++low == size) break;
++ putchar(',');
++ }
++ puts("\n };");
++ size = 1U << 5;
++ printf("\n static const code distfix[%u] = {", size);
++ low = 0;
++ for (;;) {
++ if ((low % 6) == 0) printf("\n ");
++ printf("{%u,%u,%d}", state.distcode[low].op, state.distcode[low].bits,
++ state.distcode[low].val);
++ if (++low == size) break;
++ putchar(',');
++ }
++ puts("\n };");
++}
++#endif /* MAKEFIXED */
++
++/*
++ Update the window with the last wsize (normally 32K) bytes written before
++ returning. If window does not exist yet, create it. This is only called
++ when a window is already in use, or when output has been written during this
++ inflate call, but the end of the deflate stream has not been reached yet.
++ It is also called to create a window for dictionary data when a dictionary
++ is loaded.
++
++ Providing output buffers larger than 32K to inflate() should provide a speed
++ advantage, since only the last 32K of output is copied to the sliding window
++ upon return from inflate(), and since all distances after the first 32K of
++ output will fall in the output data, making match copies simpler and faster.
++ The advantage may be dependent on the size of the processor's data caches.
++ */
++local int updatewindow(strm, end, copy)
++z_streamp strm;
++const Bytef *end;
++unsigned copy;
++{
++ struct inflate_state FAR *state;
++ unsigned dist;
++
++ state = (struct inflate_state FAR *)strm->state;
++
++ /* if it hasn't been done already, allocate space for the window */
++ if (state->window == Z_NULL) {
++ state->window = (unsigned char FAR *)
++ ZALLOC(strm, 1U << state->wbits,
++ sizeof(unsigned char));
++ if (state->window == Z_NULL) return 1;
++ }
++
++ /* if window not in use yet, initialize */
++ if (state->wsize == 0) {
++ state->wsize = 1U << state->wbits;
++ state->wnext = 0;
++ state->whave = 0;
++ }
++
++ /* copy state->wsize or less output bytes into the circular window */
++ if (copy >= state->wsize) {
++ zmemcpy(state->window, end - state->wsize, state->wsize);
++ state->wnext = 0;
++ state->whave = state->wsize;
++ }
++ else {
++ dist = state->wsize - state->wnext;
++ if (dist > copy) dist = copy;
++ zmemcpy(state->window + state->wnext, end - copy, dist);
++ copy -= dist;
++ if (copy) {
++ zmemcpy(state->window, end - copy, copy);
++ state->wnext = copy;
++ state->whave = state->wsize;
++ }
++ else {
++ state->wnext += dist;
++ if (state->wnext == state->wsize) state->wnext = 0;
++ if (state->whave < state->wsize) state->whave += dist;
++ }
++ }
++ return 0;
++}
++
++/* Macros for inflate(): */
++
++/* check function to use adler32() for zlib or crc32() for gzip */
++#ifdef GUNZIP
++# define UPDATE(check, buf, len) \
++ (state->flags ? crc32(check, buf, len) : adler32(check, buf, len))
++#else
++# define UPDATE(check, buf, len) adler32(check, buf, len)
++#endif
++
++/* check macros for header crc */
++#ifdef GUNZIP
++# define CRC2(check, word) \
++ do { \
++ hbuf[0] = (unsigned char)(word); \
++ hbuf[1] = (unsigned char)((word) >> 8); \
++ check = crc32(check, hbuf, 2); \
++ } while (0)
++
++# define CRC4(check, word) \
++ do { \
++ hbuf[0] = (unsigned char)(word); \
++ hbuf[1] = (unsigned char)((word) >> 8); \
++ hbuf[2] = (unsigned char)((word) >> 16); \
++ hbuf[3] = (unsigned char)((word) >> 24); \
++ check = crc32(check, hbuf, 4); \
++ } while (0)
++#endif
++
++/* Load registers with state in inflate() for speed */
++#define LOAD() \
++ do { \
++ put = strm->next_out; \
++ left = strm->avail_out; \
++ next = strm->next_in; \
++ have = strm->avail_in; \
++ hold = state->hold; \
++ bits = state->bits; \
++ } while (0)
++
++/* Restore state from registers in inflate() */
++#define RESTORE() \
++ do { \
++ strm->next_out = put; \
++ strm->avail_out = left; \
++ strm->next_in = next; \
++ strm->avail_in = have; \
++ state->hold = hold; \
++ state->bits = bits; \
++ } while (0)
++
++/* Clear the input bit accumulator */
++#define INITBITS() \
++ do { \
++ hold = 0; \
++ bits = 0; \
++ } while (0)
++
++/* Get a byte of input into the bit accumulator, or return from inflate()
++ if there is no input available. */
++#define PULLBYTE() \
++ do { \
++ if (have == 0) goto inf_leave; \
++ have--; \
++ hold += (unsigned long)(*next++) << bits; \
++ bits += 8; \
++ } while (0)
++
++/* Assure that there are at least n bits in the bit accumulator. If there is
++ not enough available input to do that, then return from inflate(). */
++#define NEEDBITS(n) \
++ do { \
++ while (bits < (unsigned)(n)) \
++ PULLBYTE(); \
++ } while (0)
++
++/* Return the low n bits of the bit accumulator (n < 16) */
++#define BITS(n) \
++ ((unsigned)hold & ((1U << (n)) - 1))
++
++/* Remove n bits from the bit accumulator */
++#define DROPBITS(n) \
++ do { \
++ hold >>= (n); \
++ bits -= (unsigned)(n); \
++ } while (0)
++
++/* Remove zero to seven bits as needed to go to a byte boundary */
++#define BYTEBITS() \
++ do { \
++ hold >>= bits & 7; \
++ bits -= bits & 7; \
++ } while (0)
++
++/*
++ inflate() uses a state machine to process as much input data and generate as
++ much output data as possible before returning. The state machine is
++ structured roughly as follows:
++
++ for (;;) switch (state) {
++ ...
++ case STATEn:
++ if (not enough input data or output space to make progress)
++ return;
++ ... make progress ...
++ state = STATEm;
++ break;
++ ...
++ }
++
++ so when inflate() is called again, the same case is attempted again, and
++ if the appropriate resources are provided, the machine proceeds to the
++ next state. The NEEDBITS() macro is usually the way the state evaluates
++ whether it can proceed or should return. NEEDBITS() does the return if
++ the requested bits are not available. The typical use of the BITS macros
++ is:
++
++ NEEDBITS(n);
++ ... do something with BITS(n) ...
++ DROPBITS(n);
++
++ where NEEDBITS(n) either returns from inflate() if there isn't enough
++ input left to load n bits into the accumulator, or it continues. BITS(n)
++ gives the low n bits in the accumulator. When done, DROPBITS(n) drops
++ the low n bits off the accumulator. INITBITS() clears the accumulator
++ and sets the number of available bits to zero. BYTEBITS() discards just
++ enough bits to put the accumulator on a byte boundary. After BYTEBITS()
++ and a NEEDBITS(8), then BITS(8) would return the next byte in the stream.
++
++ NEEDBITS(n) uses PULLBYTE() to get an available byte of input, or to return
++ if there is no input available. The decoding of variable length codes uses
++ PULLBYTE() directly in order to pull just enough bytes to decode the next
++ code, and no more.
++
++ Some states loop until they get enough input, making sure that enough
++ state information is maintained to continue the loop where it left off
++ if NEEDBITS() returns in the loop. For example, want, need, and keep
++ would all have to actually be part of the saved state in case NEEDBITS()
++ returns:
++
++ case STATEw:
++ while (want < need) {
++ NEEDBITS(n);
++ keep[want++] = BITS(n);
++ DROPBITS(n);
++ }
++ state = STATEx;
++ case STATEx:
++
++ As shown above, if the next state is also the next case, then the break
++ is omitted.
++
++ A state may also return if there is not enough output space available to
++ complete that state. Those states are copying stored data, writing a
++ literal byte, and copying a matching string.
++
++ When returning, a "goto inf_leave" is used to update the total counters,
++ update the check value, and determine whether any progress has been made
++ during that inflate() call in order to return the proper return code.
++ Progress is defined as a change in either strm->avail_in or strm->avail_out.
++ When there is a window, goto inf_leave will update the window with the last
++ output written. If a goto inf_leave occurs in the middle of decompression
++ and there is no window currently, goto inf_leave will create one and copy
++ output to the window for the next call of inflate().
++
++ In this implementation, the flush parameter of inflate() only affects the
++ return code (per zlib.h). inflate() always writes as much as possible to
++ strm->next_out, given the space available and the provided input--the effect
++ documented in zlib.h of Z_SYNC_FLUSH. Furthermore, inflate() always defers
++ the allocation of and copying into a sliding window until necessary, which
++ provides the effect documented in zlib.h for Z_FINISH when the entire input
++ stream available. So the only thing the flush parameter actually does is:
++ when flush is set to Z_FINISH, inflate() cannot return Z_OK. Instead it
++ will return Z_BUF_ERROR if it has not reached the end of the stream.
++ */
++
++int ZEXPORT inflate(strm, flush)
++z_streamp strm;
++int flush;
++{
++ struct inflate_state FAR *state;
++ z_const unsigned char FAR *next; /* next input */
++ unsigned char FAR *put; /* next output */
++ unsigned have, left; /* available input and output */
++ unsigned long hold; /* bit buffer */
++ unsigned bits; /* bits in bit buffer */
++ unsigned in, out; /* save starting available input and output */
++ unsigned copy; /* number of stored or match bytes to copy */
++ unsigned char FAR *from; /* where to copy match bytes from */
++ code here; /* current decoding table entry */
++ code last; /* parent table entry */
++ unsigned len; /* length to copy for repeats, bits to drop */
++ int ret; /* return code */
++#ifdef GUNZIP
++ unsigned char hbuf[4]; /* buffer for gzip header crc calculation */
++#endif
++ static const unsigned short order[19] = /* permutation of code lengths */
++ {16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
++
++ if (inflateStateCheck(strm) || strm->next_out == Z_NULL ||
++ (strm->next_in == Z_NULL && strm->avail_in != 0))
++ return Z_STREAM_ERROR;
++
++ state = (struct inflate_state FAR *)strm->state;
++ if (state->mode == TYPE) state->mode = TYPEDO; /* skip check */
++ LOAD();
++ in = have;
++ out = left;
++ ret = Z_OK;
++ for (;;)
++ switch (state->mode) {
++ case HEAD:
++ if (state->wrap == 0) {
++ state->mode = TYPEDO;
++ break;
++ }
++ NEEDBITS(16);
++#ifdef GUNZIP
++ if ((state->wrap & 2) && hold == 0x8b1f) { /* gzip header */
++ if (state->wbits == 0)
++ state->wbits = 15;
++ state->check = crc32(0L, Z_NULL, 0);
++ CRC2(state->check, hold);
++ INITBITS();
++ state->mode = FLAGS;
++ break;
++ }
++ state->flags = 0; /* expect zlib header */
++ if (state->head != Z_NULL)
++ state->head->done = -1;
++ if (!(state->wrap & 1) || /* check if zlib header allowed */
++#else
++ if (
++#endif
++ ((BITS(8) << 8) + (hold >> 8)) % 31) {
++ strm->msg = (char *)"incorrect header check";
++ state->mode = BAD;
++ break;
++ }
++ if (BITS(4) != Z_DEFLATED) {
++ strm->msg = (char *)"unknown compression method";
++ state->mode = BAD;
++ break;
++ }
++ DROPBITS(4);
++ len = BITS(4) + 8;
++ if (state->wbits == 0)
++ state->wbits = len;
++ if (len > 15 || len > state->wbits) {
++ strm->msg = (char *)"invalid window size";
++ state->mode = BAD;
++ break;
++ }
++ state->dmax = 1U << len;
++ Tracev((stderr, "inflate: zlib header ok\n"));
++ strm->adler = state->check = adler32(0L, Z_NULL, 0);
++ state->mode = hold & 0x200 ? DICTID : TYPE;
++ INITBITS();
++ break;
++#ifdef GUNZIP
++ case FLAGS:
++ NEEDBITS(16);
++ state->flags = (int)(hold);
++ if ((state->flags & 0xff) != Z_DEFLATED) {
++ strm->msg = (char *)"unknown compression method";
++ state->mode = BAD;
++ break;
++ }
++ if (state->flags & 0xe000) {
++ strm->msg = (char *)"unknown header flags set";
++ state->mode = BAD;
++ break;
++ }
++ if (state->head != Z_NULL)
++ state->head->text = (int)((hold >> 8) & 1);
++ if ((state->flags & 0x0200) && (state->wrap & 4))
++ CRC2(state->check, hold);
++ INITBITS();
++ state->mode = TIME;
++ case TIME:
++ NEEDBITS(32);
++ if (state->head != Z_NULL)
++ state->head->time = hold;
++ if ((state->flags & 0x0200) && (state->wrap & 4))
++ CRC4(state->check, hold);
++ INITBITS();
++ state->mode = OS;
++ case OS:
++ NEEDBITS(16);
++ if (state->head != Z_NULL) {
++ state->head->xflags = (int)(hold & 0xff);
++ state->head->os = (int)(hold >> 8);
++ }
++ if ((state->flags & 0x0200) && (state->wrap & 4))
++ CRC2(state->check, hold);
++ INITBITS();
++ state->mode = EXLEN;
++ case EXLEN:
++ if (state->flags & 0x0400) {
++ NEEDBITS(16);
++ state->length = (unsigned)(hold);
++ if (state->head != Z_NULL)
++ state->head->extra_len = (unsigned)hold;
++ if ((state->flags & 0x0200) && (state->wrap & 4))
++ CRC2(state->check, hold);
++ INITBITS();
++ }
++ else if (state->head != Z_NULL)
++ state->head->extra = Z_NULL;
++ state->mode = EXTRA;
++ case EXTRA:
++ if (state->flags & 0x0400) {
++ copy = state->length;
++ if (copy > have) copy = have;
++ if (copy) {
++ if (state->head != Z_NULL &&
++ state->head->extra != Z_NULL) {
++ len = state->head->extra_len - state->length;
++ zmemcpy(state->head->extra + len, next,
++ len + copy > state->head->extra_max ?
++ state->head->extra_max - len : copy);
++ }
++ if ((state->flags & 0x0200) && (state->wrap & 4))
++ state->check = crc32(state->check, next, copy);
++ have -= copy;
++ next += copy;
++ state->length -= copy;
++ }
++ if (state->length) goto inf_leave;
++ }
++ state->length = 0;
++ state->mode = NAME;
++ case NAME:
++ if (state->flags & 0x0800) {
++ if (have == 0) goto inf_leave;
++ copy = 0;
++ do {
++ len = (unsigned)(next[copy++]);
++ if (state->head != Z_NULL &&
++ state->head->name != Z_NULL &&
++ state->length < state->head->name_max)
++ state->head->name[state->length++] = (Bytef)len;
++ } while (len && copy < have);
++ if ((state->flags & 0x0200) && (state->wrap & 4))
++ state->check = crc32(state->check, next, copy);
++ have -= copy;
++ next += copy;
++ if (len) goto inf_leave;
++ }
++ else if (state->head != Z_NULL)
++ state->head->name = Z_NULL;
++ state->length = 0;
++ state->mode = COMMENT;
++ case COMMENT:
++ if (state->flags & 0x1000) {
++ if (have == 0) goto inf_leave;
++ copy = 0;
++ do {
++ len = (unsigned)(next[copy++]);
++ if (state->head != Z_NULL &&
++ state->head->comment != Z_NULL &&
++ state->length < state->head->comm_max)
++ state->head->comment[state->length++] = (Bytef)len;
++ } while (len && copy < have);
++ if ((state->flags & 0x0200) && (state->wrap & 4))
++ state->check = crc32(state->check, next, copy);
++ have -= copy;
++ next += copy;
++ if (len) goto inf_leave;
++ }
++ else if (state->head != Z_NULL)
++ state->head->comment = Z_NULL;
++ state->mode = HCRC;
++ case HCRC:
++ if (state->flags & 0x0200) {
++ NEEDBITS(16);
++ if ((state->wrap & 4) && hold != (state->check & 0xffff)) {
++ strm->msg = (char *)"header crc mismatch";
++ state->mode = BAD;
++ break;
++ }
++ INITBITS();
++ }
++ if (state->head != Z_NULL) {
++ state->head->hcrc = (int)((state->flags >> 9) & 1);
++ state->head->done = 1;
++ }
++ strm->adler = state->check = crc32(0L, Z_NULL, 0);
++ state->mode = TYPE;
++ break;
++#endif
++ case DICTID:
++ NEEDBITS(32);
++ strm->adler = state->check = ZSWAP32(hold);
++ INITBITS();
++ state->mode = DICT;
++ case DICT:
++ if (state->havedict == 0) {
++ RESTORE();
++ return Z_NEED_DICT;
++ }
++ strm->adler = state->check = adler32(0L, Z_NULL, 0);
++ state->mode = TYPE;
++ case TYPE:
++ if (flush == Z_BLOCK || flush == Z_TREES) goto inf_leave;
++ case TYPEDO:
++ if (state->last) {
++ BYTEBITS();
++ state->mode = CHECK;
++ break;
++ }
++ NEEDBITS(3);
++ state->last = BITS(1);
++ DROPBITS(1);
++ switch (BITS(2)) {
++ case 0: /* stored block */
++ Tracev((stderr, "inflate: stored block%s\n",
++ state->last ? " (last)" : ""));
++ state->mode = STORED;
++ break;
++ case 1: /* fixed block */
++ fixedtables(state);
++ Tracev((stderr, "inflate: fixed codes block%s\n",
++ state->last ? " (last)" : ""));
++ state->mode = LEN_; /* decode codes */
++ if (flush == Z_TREES) {
++ DROPBITS(2);
++ goto inf_leave;
++ }
++ break;
++ case 2: /* dynamic block */
++ Tracev((stderr, "inflate: dynamic codes block%s\n",
++ state->last ? " (last)" : ""));
++ state->mode = TABLE;
++ break;
++ case 3:
++ strm->msg = (char *)"invalid block type";
++ state->mode = BAD;
++ }
++ DROPBITS(2);
++ break;
++ case STORED:
++ BYTEBITS(); /* go to byte boundary */
++ NEEDBITS(32);
++ if ((hold & 0xffff) != ((hold >> 16) ^ 0xffff)) {
++ strm->msg = (char *)"invalid stored block lengths";
++ state->mode = BAD;
++ break;
++ }
++ state->length = (unsigned)hold & 0xffff;
++ Tracev((stderr, "inflate: stored length %u\n",
++ state->length));
++ INITBITS();
++ state->mode = COPY_;
++ if (flush == Z_TREES) goto inf_leave;
++ case COPY_:
++ state->mode = COPY;
++ case COPY:
++ copy = state->length;
++ if (copy) {
++ if (copy > have) copy = have;
++ if (copy > left) copy = left;
++ if (copy == 0) goto inf_leave;
++ zmemcpy(put, next, copy);
++ have -= copy;
++ next += copy;
++ left -= copy;
++ put += copy;
++ state->length -= copy;
++ break;
++ }
++ Tracev((stderr, "inflate: stored end\n"));
++ state->mode = TYPE;
++ break;
++ case TABLE:
++ NEEDBITS(14);
++ state->nlen = BITS(5) + 257;
++ DROPBITS(5);
++ state->ndist = BITS(5) + 1;
++ DROPBITS(5);
++ state->ncode = BITS(4) + 4;
++ DROPBITS(4);
++#ifndef PKZIP_BUG_WORKAROUND
++ if (state->nlen > 286 || state->ndist > 30) {
++ strm->msg = (char *)"too many length or distance symbols";
++ state->mode = BAD;
++ break;
++ }
++#endif
++ Tracev((stderr, "inflate: table sizes ok\n"));
++ state->have = 0;
++ state->mode = LENLENS;
++ case LENLENS:
++ while (state->have < state->ncode) {
++ NEEDBITS(3);
++ state->lens[order[state->have++]] = (unsigned short)BITS(3);
++ DROPBITS(3);
++ }
++ while (state->have < 19)
++ state->lens[order[state->have++]] = 0;
++ state->next = state->codes;
++ state->lencode = (const code FAR *)(state->next);
++ state->lenbits = 7;
++ ret = inflate_table(CODES, state->lens, 19, &(state->next),
++ &(state->lenbits), state->work);
++ if (ret) {
++ strm->msg = (char *)"invalid code lengths set";
++ state->mode = BAD;
++ break;
++ }
++ Tracev((stderr, "inflate: code lengths ok\n"));
++ state->have = 0;
++ state->mode = CODELENS;
++ case CODELENS:
++ while (state->have < state->nlen + state->ndist) {
++ for (;;) {
++ here = state->lencode[BITS(state->lenbits)];
++ if ((unsigned)(here.bits) <= bits) break;
++ PULLBYTE();
++ }
++ if (here.val < 16) {
++ DROPBITS(here.bits);
++ state->lens[state->have++] = here.val;
++ }
++ else {
++ if (here.val == 16) {
++ NEEDBITS(here.bits + 2);
++ DROPBITS(here.bits);
++ if (state->have == 0) {
++ strm->msg = (char *)"invalid bit length repeat";
++ state->mode = BAD;
++ break;
++ }
++ len = state->lens[state->have - 1];
++ copy = 3 + BITS(2);
++ DROPBITS(2);
++ }
++ else if (here.val == 17) {
++ NEEDBITS(here.bits + 3);
++ DROPBITS(here.bits);
++ len = 0;
++ copy = 3 + BITS(3);
++ DROPBITS(3);
++ }
++ else {
++ NEEDBITS(here.bits + 7);
++ DROPBITS(here.bits);
++ len = 0;
++ copy = 11 + BITS(7);
++ DROPBITS(7);
++ }
++ if (state->have + copy > state->nlen + state->ndist) {
++ strm->msg = (char *)"invalid bit length repeat";
++ state->mode = BAD;
++ break;
++ }
++ while (copy--)
++ state->lens[state->have++] = (unsigned short)len;
++ }
++ }
++
++ /* handle error breaks in while */
++ if (state->mode == BAD) break;
++
++ /* check for end-of-block code (better have one) */
++ if (state->lens[256] == 0) {
++ strm->msg = (char *)"invalid code -- missing end-of-block";
++ state->mode = BAD;
++ break;
++ }
++
++ /* build code tables -- note: do not change the lenbits or distbits
++ values here (9 and 6) without reading the comments in inftrees.h
++ concerning the ENOUGH constants, which depend on those values */
++ state->next = state->codes;
++ state->lencode = (const code FAR *)(state->next);
++ state->lenbits = 9;
++ ret = inflate_table(LENS, state->lens, state->nlen, &(state->next),
++ &(state->lenbits), state->work);
++ if (ret) {
++ strm->msg = (char *)"invalid literal/lengths set";
++ state->mode = BAD;
++ break;
++ }
++ state->distcode = (const code FAR *)(state->next);
++ state->distbits = 6;
++ ret = inflate_table(DISTS, state->lens + state->nlen, state->ndist,
++ &(state->next), &(state->distbits), state->work);
++ if (ret) {
++ strm->msg = (char *)"invalid distances set";
++ state->mode = BAD;
++ break;
++ }
++ Tracev((stderr, "inflate: codes ok\n"));
++ state->mode = LEN_;
++ if (flush == Z_TREES) goto inf_leave;
++ case LEN_:
++ state->mode = LEN;
++ case LEN:
++ if (have >= 6 && left >= 258) {
++ RESTORE();
++ inflate_fast(strm, out);
++ LOAD();
++ if (state->mode == TYPE)
++ state->back = -1;
++ break;
++ }
++ state->back = 0;
++ for (;;) {
++ here = state->lencode[BITS(state->lenbits)];
++ if ((unsigned)(here.bits) <= bits) break;
++ PULLBYTE();
++ }
++ if (here.op && (here.op & 0xf0) == 0) {
++ last = here;
++ for (;;) {
++ here = state->lencode[last.val +
++ (BITS(last.bits + last.op) >> last.bits)];
++ if ((unsigned)(last.bits + here.bits) <= bits) break;
++ PULLBYTE();
++ }
++ DROPBITS(last.bits);
++ state->back += last.bits;
++ }
++ DROPBITS(here.bits);
++ state->back += here.bits;
++ state->length = (unsigned)here.val;
++ if ((int)(here.op) == 0) {
++ Tracevv((stderr, here.val >= 0x20 && here.val < 0x7f ?
++ "inflate: literal '%c'\n" :
++ "inflate: literal 0x%02x\n", here.val));
++ state->mode = LIT;
++ break;
++ }
++ if (here.op & 32) {
++ Tracevv((stderr, "inflate: end of block\n"));
++ state->back = -1;
++ state->mode = TYPE;
++ break;
++ }
++ if (here.op & 64) {
++ strm->msg = (char *)"invalid literal/length code";
++ state->mode = BAD;
++ break;
++ }
++ state->extra = (unsigned)(here.op) & 15;
++ state->mode = LENEXT;
++ case LENEXT:
++ if (state->extra) {
++ NEEDBITS(state->extra);
++ state->length += BITS(state->extra);
++ DROPBITS(state->extra);
++ state->back += state->extra;
++ }
++ Tracevv((stderr, "inflate: length %u\n", state->length));
++ state->was = state->length;
++ state->mode = DIST;
++ case DIST:
++ for (;;) {
++ here = state->distcode[BITS(state->distbits)];
++ if ((unsigned)(here.bits) <= bits) break;
++ PULLBYTE();
++ }
++ if ((here.op & 0xf0) == 0) {
++ last = here;
++ for (;;) {
++ here = state->distcode[last.val +
++ (BITS(last.bits + last.op) >> last.bits)];
++ if ((unsigned)(last.bits + here.bits) <= bits) break;
++ PULLBYTE();
++ }
++ DROPBITS(last.bits);
++ state->back += last.bits;
++ }
++ DROPBITS(here.bits);
++ state->back += here.bits;
++ if (here.op & 64) {
++ strm->msg = (char *)"invalid distance code";
++ state->mode = BAD;
++ break;
++ }
++ state->offset = (unsigned)here.val;
++ state->extra = (unsigned)(here.op) & 15;
++ state->mode = DISTEXT;
++ case DISTEXT:
++ if (state->extra) {
++ NEEDBITS(state->extra);
++ state->offset += BITS(state->extra);
++ DROPBITS(state->extra);
++ state->back += state->extra;
++ }
++#ifdef INFLATE_STRICT
++ if (state->offset > state->dmax) {
++ strm->msg = (char *)"invalid distance too far back";
++ state->mode = BAD;
++ break;
++ }
++#endif
++ Tracevv((stderr, "inflate: distance %u\n", state->offset));
++ state->mode = MATCH;
++ case MATCH:
++ if (left == 0) goto inf_leave;
++ copy = out - left;
++ if (state->offset > copy) { /* copy from window */
++ copy = state->offset - copy;
++ if (copy > state->whave) {
++ if (state->sane) {
++ strm->msg = (char *)"invalid distance too far back";
++ state->mode = BAD;
++ break;
++ }
++#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
++ Trace((stderr, "inflate.c too far\n"));
++ copy -= state->whave;
++ if (copy > state->length) copy = state->length;
++ if (copy > left) copy = left;
++ left -= copy;
++ state->length -= copy;
++ do {
++ *put++ = 0;
++ } while (--copy);
++ if (state->length == 0) state->mode = LEN;
++ break;
++#endif
++ }
++ if (copy > state->wnext) {
++ copy -= state->wnext;
++ from = state->window + (state->wsize - copy);
++ }
++ else
++ from = state->window + (state->wnext - copy);
++ if (copy > state->length) copy = state->length;
++ }
++ else { /* copy from output */
++ from = put - state->offset;
++ copy = state->length;
++ }
++ if (copy > left) copy = left;
++ left -= copy;
++ state->length -= copy;
++ do {
++ *put++ = *from++;
++ } while (--copy);
++ if (state->length == 0) state->mode = LEN;
++ break;
++ case LIT:
++ if (left == 0) goto inf_leave;
++ *put++ = (unsigned char)(state->length);
++ left--;
++ state->mode = LEN;
++ break;
++ case CHECK:
++ if (state->wrap) {
++ NEEDBITS(32);
++ out -= left;
++ strm->total_out += out;
++ state->total += out;
++ if ((state->wrap & 4) && out)
++ strm->adler = state->check =
++ UPDATE(state->check, put - out, out);
++ out = left;
++ if ((state->wrap & 4) && (
++#ifdef GUNZIP
++ state->flags ? hold :
++#endif
++ ZSWAP32(hold)) != state->check) {
++ strm->msg = (char *)"incorrect data check";
++ state->mode = BAD;
++ break;
++ }
++ INITBITS();
++ Tracev((stderr, "inflate: check matches trailer\n"));
++ }
++#ifdef GUNZIP
++ state->mode = LENGTH;
++ case LENGTH:
++ if (state->wrap && state->flags) {
++ NEEDBITS(32);
++ if (hold != (state->total & 0xffffffffUL)) {
++ strm->msg = (char *)"incorrect length check";
++ state->mode = BAD;
++ break;
++ }
++ INITBITS();
++ Tracev((stderr, "inflate: length matches trailer\n"));
++ }
++#endif
++ state->mode = DONE;
++ case DONE:
++ ret = Z_STREAM_END;
++ goto inf_leave;
++ case BAD:
++ ret = Z_DATA_ERROR;
++ goto inf_leave;
++ case MEM:
++ return Z_MEM_ERROR;
++ case SYNC:
++ default:
++ return Z_STREAM_ERROR;
++ }
++
++ /*
++ Return from inflate(), updating the total counts and the check value.
++ If there was no progress during the inflate() call, return a buffer
++ error. Call updatewindow() to create and/or update the window state.
++ Note: a memory error from inflate() is non-recoverable.
++ */
++ inf_leave:
++ RESTORE();
++ if (state->wsize || (out != strm->avail_out && state->mode < BAD &&
++ (state->mode < CHECK || flush != Z_FINISH)))
++ if (updatewindow(strm, strm->next_out, out - strm->avail_out)) {
++ state->mode = MEM;
++ return Z_MEM_ERROR;
++ }
++ in -= strm->avail_in;
++ out -= strm->avail_out;
++ strm->total_in += in;
++ strm->total_out += out;
++ state->total += out;
++ if ((state->wrap & 4) && out)
++ strm->adler = state->check =
++ UPDATE(state->check, strm->next_out - out, out);
++ strm->data_type = (int)state->bits + (state->last ? 64 : 0) +
++ (state->mode == TYPE ? 128 : 0) +
++ (state->mode == LEN_ || state->mode == COPY_ ? 256 : 0);
++ if (((in == 0 && out == 0) || flush == Z_FINISH) && ret == Z_OK)
++ ret = Z_BUF_ERROR;
++ return ret;
++}
++
++int ZEXPORT inflateEnd(strm)
++z_streamp strm;
++{
++ struct inflate_state FAR *state;
++ if (inflateStateCheck(strm))
++ return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ if (state->window != Z_NULL) ZFREE(strm, state->window);
++ ZFREE(strm, strm->state);
++ strm->state = Z_NULL;
++ Tracev((stderr, "inflate: end\n"));
++ return Z_OK;
++}
++
++int ZEXPORT inflateGetDictionary(strm, dictionary, dictLength)
++z_streamp strm;
++Bytef *dictionary;
++uInt *dictLength;
++{
++ struct inflate_state FAR *state;
++
++ /* check state */
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++
++ /* copy dictionary */
++ if (state->whave && dictionary != Z_NULL) {
++ zmemcpy(dictionary, state->window + state->wnext,
++ state->whave - state->wnext);
++ zmemcpy(dictionary + state->whave - state->wnext,
++ state->window, state->wnext);
++ }
++ if (dictLength != Z_NULL)
++ *dictLength = state->whave;
++ return Z_OK;
++}
++
++int ZEXPORT inflateSetDictionary(strm, dictionary, dictLength)
++z_streamp strm;
++const Bytef *dictionary;
++uInt dictLength;
++{
++ struct inflate_state FAR *state;
++ unsigned long dictid;
++ int ret;
++
++ /* check state */
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ if (state->wrap != 0 && state->mode != DICT)
++ return Z_STREAM_ERROR;
++
++ /* check for correct dictionary identifier */
++ if (state->mode == DICT) {
++ dictid = adler32(0L, Z_NULL, 0);
++ dictid = adler32(dictid, dictionary, dictLength);
++ if (dictid != state->check)
++ return Z_DATA_ERROR;
++ }
++
++ /* copy dictionary to window using updatewindow(), which will amend the
++ existing dictionary if appropriate */
++ ret = updatewindow(strm, dictionary + dictLength, dictLength);
++ if (ret) {
++ state->mode = MEM;
++ return Z_MEM_ERROR;
++ }
++ state->havedict = 1;
++ Tracev((stderr, "inflate: dictionary set\n"));
++ return Z_OK;
++}
++
++int ZEXPORT inflateGetHeader(strm, head)
++z_streamp strm;
++gz_headerp head;
++{
++ struct inflate_state FAR *state;
++
++ /* check state */
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ if ((state->wrap & 2) == 0) return Z_STREAM_ERROR;
++
++ /* save header structure */
++ state->head = head;
++ head->done = 0;
++ return Z_OK;
++}
++
++/*
++ Search buf[0..len-1] for the pattern: 0, 0, 0xff, 0xff. Return when found
++ or when out of input. When called, *have is the number of pattern bytes
++ found in order so far, in 0..3. On return *have is updated to the new
++ state. If on return *have equals four, then the pattern was found and the
++ return value is how many bytes were read including the last byte of the
++ pattern. If *have is less than four, then the pattern has not been found
++ yet and the return value is len. In the latter case, syncsearch() can be
++ called again with more data and the *have state. *have is initialized to
++ zero for the first call.
++ */
++local unsigned syncsearch(have, buf, len)
++unsigned FAR *have;
++const unsigned char FAR *buf;
++unsigned len;
++{
++ unsigned got;
++ unsigned next;
++
++ got = *have;
++ next = 0;
++ while (next < len && got < 4) {
++ if ((int)(buf[next]) == (got < 2 ? 0 : 0xff))
++ got++;
++ else if (buf[next])
++ got = 0;
++ else
++ got = 4 - got;
++ next++;
++ }
++ *have = got;
++ return next;
++}
++
++int ZEXPORT inflateSync(strm)
++z_streamp strm;
++{
++ unsigned len; /* number of bytes to look at or looked at */
++ unsigned long in, out; /* temporary to save total_in and total_out */
++ unsigned char buf[4]; /* to restore bit buffer to byte string */
++ struct inflate_state FAR *state;
++
++ /* check parameters */
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ if (strm->avail_in == 0 && state->bits < 8) return Z_BUF_ERROR;
++
++ /* if first time, start search in bit buffer */
++ if (state->mode != SYNC) {
++ state->mode = SYNC;
++ state->hold <<= state->bits & 7;
++ state->bits -= state->bits & 7;
++ len = 0;
++ while (state->bits >= 8) {
++ buf[len++] = (unsigned char)(state->hold);
++ state->hold >>= 8;
++ state->bits -= 8;
++ }
++ state->have = 0;
++ syncsearch(&(state->have), buf, len);
++ }
++
++ /* search available input */
++ len = syncsearch(&(state->have), strm->next_in, strm->avail_in);
++ strm->avail_in -= len;
++ strm->next_in += len;
++ strm->total_in += len;
++
++ /* return no joy or set up to restart inflate() on a new block */
++ if (state->have != 4) return Z_DATA_ERROR;
++ in = strm->total_in; out = strm->total_out;
++ inflateReset(strm);
++ strm->total_in = in; strm->total_out = out;
++ state->mode = TYPE;
++ return Z_OK;
++}
++
++/*
++ Returns true if inflate is currently at the end of a block generated by
++ Z_SYNC_FLUSH or Z_FULL_FLUSH. This function is used by one PPP
++ implementation to provide an additional safety check. PPP uses
++ Z_SYNC_FLUSH but removes the length bytes of the resulting empty stored
++ block. When decompressing, PPP checks that at the end of input packet,
++ inflate is waiting for these length bytes.
++ */
++int ZEXPORT inflateSyncPoint(strm)
++z_streamp strm;
++{
++ struct inflate_state FAR *state;
++
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ return state->mode == STORED && state->bits == 0;
++}
++
++int ZEXPORT inflateCopy(dest, source)
++z_streamp dest;
++z_streamp source;
++{
++ struct inflate_state FAR *state;
++ struct inflate_state FAR *copy;
++ unsigned char FAR *window;
++ unsigned wsize;
++
++ /* check input */
++ if (inflateStateCheck(source) || dest == Z_NULL)
++ return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)source->state;
++
++ /* allocate space */
++ copy = (struct inflate_state FAR *)
++ ZALLOC(source, 1, sizeof(struct inflate_state));
++ if (copy == Z_NULL) return Z_MEM_ERROR;
++ window = Z_NULL;
++ if (state->window != Z_NULL) {
++ window = (unsigned char FAR *)
++ ZALLOC(source, 1U << state->wbits, sizeof(unsigned char));
++ if (window == Z_NULL) {
++ ZFREE(source, copy);
++ return Z_MEM_ERROR;
++ }
++ }
++
++ /* copy state */
++ zmemcpy((voidpf)dest, (voidpf)source, sizeof(z_stream));
++ zmemcpy((voidpf)copy, (voidpf)state, sizeof(struct inflate_state));
++ copy->strm = dest;
++ if (state->lencode >= state->codes &&
++ state->lencode <= state->codes + ENOUGH - 1) {
++ copy->lencode = copy->codes + (state->lencode - state->codes);
++ copy->distcode = copy->codes + (state->distcode - state->codes);
++ }
++ copy->next = copy->codes + (state->next - state->codes);
++ if (window != Z_NULL) {
++ wsize = 1U << state->wbits;
++ zmemcpy(window, state->window, wsize);
++ }
++ copy->window = window;
++ dest->state = (struct internal_state FAR *)copy;
++ return Z_OK;
++}
++
++int ZEXPORT inflateUndermine(strm, subvert)
++z_streamp strm;
++int subvert;
++{
++ struct inflate_state FAR *state;
++
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++#ifdef INFLATE_ALLOW_INVALID_DISTANCE_TOOFAR_ARRR
++ state->sane = !subvert;
++ return Z_OK;
++#else
++ (void)subvert;
++ state->sane = 1;
++ return Z_DATA_ERROR;
++#endif
++}
++
++int ZEXPORT inflateValidate(strm, check)
++z_streamp strm;
++int check;
++{
++ struct inflate_state FAR *state;
++
++ if (inflateStateCheck(strm)) return Z_STREAM_ERROR;
++ state = (struct inflate_state FAR *)strm->state;
++ if (check)
++ state->wrap |= 4;
++ else
++ state->wrap &= ~4;
++ return Z_OK;
++}
++
++long ZEXPORT inflateMark(strm)
++z_streamp strm;
++{
++ struct inflate_state FAR *state;
++
++ if (inflateStateCheck(strm))
++ return -(1L << 16);
++ state = (struct inflate_state FAR *)strm->state;
++ return (long)(((unsigned long)((long)state->back)) << 16) +
++ (state->mode == COPY ? state->length :
++ (state->mode == MATCH ? state->was - state->length : 0));
++}
++
++unsigned long ZEXPORT inflateCodesUsed(strm)
++z_streamp strm;
++{
++ struct inflate_state FAR *state;
++ if (inflateStateCheck(strm)) return (unsigned long)-1;
++ state = (struct inflate_state FAR *)strm->state;
++ return (unsigned long)(state->next - state->codes);
++}
+
+From 247147654fe5cd11cf15d8dff91440405ea57040 Mon Sep 17 00:00:00 2001
+From: Simon Hosie <simon.hosie@arm.com>
+Date: Wed, 12 Apr 2017 15:44:21 -0700
+Subject: [PATCH 2/2] Inflate using wider loads and stores
+
+In inflate_fast() the output pointer always has plenty of room to write. This
+means that so long as the target is capable, wide un-aligned loads and stores
+can be used to transfer several bytes at once. When the reference distance is
+too short simply unroll the data a little to increase the distance.
+
+Change-Id: I59854eb25d2b1e43561c8a2afaf9175bf10cf674
+---
+ contrib/arm/chunkcopy.h | 279 ++++++++++++++++++++++++++++++++++++++++++++++++
+ contrib/arm/inffast.c | 96 +++++++----------
+ contrib/arm/inflate.c | 22 ++--
+ 3 files changed, 335 insertions(+), 62 deletions(-)
+ create mode 100644 contrib/arm/chunkcopy.h
+
+diff --git a/contrib/arm/chunkcopy.h b/contrib/arm/chunkcopy.h
+new file mode 100644
+index 00000000..2d6fd6f9
+--- /dev/null
++++ b/contrib/arm/chunkcopy.h
+@@ -0,0 +1,279 @@
++/* chunkcopy.h -- fast copies and sets
++ * Copyright (C) 2017 ARM, Inc.
++ * For conditions of distribution and use, see copyright notice in zlib.h
++ */
++
++#ifndef CHUNKCOPY_H
++#define CHUNKCOPY_H
++
++#include "zutil.h"
++#include <arm_neon.h>
++
++#if __STDC_VERSION__ >= 199901L
++#define Z_RESTRICT restrict
++#else
++#define Z_RESTRICT
++#endif
++
++typedef uint8x16_t chunkcopy_chunk_t;
++#define CHUNKCOPY_CHUNK_SIZE sizeof(chunkcopy_chunk_t)
++
++/*
++ Ask the compiler to perform a wide, unaligned load with an machine
++ instruction appropriate for the chunkcopy_chunk_t type.
++ */
++static inline chunkcopy_chunk_t loadchunk(const unsigned char FAR *s) {
++ chunkcopy_chunk_t c;
++ __builtin_memcpy(&c, s, sizeof(c));
++ return c;
++}
++
++/*
++ Ask the compiler to perform a wide, unaligned store with an machine
++ instruction appropriate for the chunkcopy_chunk_t type.
++ */
++static inline void storechunk(unsigned char FAR *d, chunkcopy_chunk_t c) {
++ __builtin_memcpy(d, &c, sizeof(c));
++}
++
++/*
++ Perform a memcpy-like operation, but assume that length is non-zero and that
++ it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if
++ the length is shorter than this.
++
++ It also guarantees that it will properly unroll the data if the distance
++ between `out` and `from` is at least CHUNKCOPY_CHUNK_SIZE, which we rely on
++ in chunkcopy_relaxed().
++
++ Aside from better memory bus utilisation, this means that short copies
++ (CHUNKCOPY_CHUNK_SIZE bytes or fewer) will fall straight through the loop
++ without iteration, which will hopefully make the branch prediction more
++ reliable.
++ */
++static inline unsigned char FAR *chunkcopy_core(unsigned char FAR *out,
++ const unsigned char FAR *from,
++ unsigned len) {
++ int bump = (--len % CHUNKCOPY_CHUNK_SIZE) + 1;
++ storechunk(out, loadchunk(from));
++ out += bump;
++ from += bump;
++ len /= CHUNKCOPY_CHUNK_SIZE;
++ while (len-- > 0) {
++ storechunk(out, loadchunk(from));
++ out += CHUNKCOPY_CHUNK_SIZE;
++ from += CHUNKCOPY_CHUNK_SIZE;
++ }
++ return out;
++}
++
++/*
++ Like chunkcopy_core, but avoid writing beyond of legal output.
++
++ Accepts an additional pointer to the end of safe output. A generic safe
++ copy would use (out + len), but it's normally the case that the end of the
++ output buffer is beyond the end of the current copy, and this can still be
++ exploited.
++ */
++static inline unsigned char FAR *chunkcopy_core_safe(unsigned char FAR *out,
++ const unsigned char FAR * from,
++ unsigned len,
++ unsigned char FAR *limit) {
++ Assert(out + len <= limit, "chunk copy exceeds safety limit");
++ if (limit - out < CHUNKCOPY_CHUNK_SIZE) {
++ const unsigned char FAR * Z_RESTRICT rfrom = from;
++ if (len & 8) { __builtin_memcpy(out, rfrom, 8); out += 8; rfrom += 8; }
++ if (len & 4) { __builtin_memcpy(out, rfrom, 4); out += 4; rfrom += 4; }
++ if (len & 2) { __builtin_memcpy(out, rfrom, 2); out += 2; rfrom += 2; }
++ if (len & 1) { *out++ = *rfrom++; }
++ return out;
++ }
++ return chunkcopy_core(out, from, len);
++}
++
++/*
++ Perform short copies until distance can be rewritten as being at least
++ CHUNKCOPY_CHUNK_SIZE.
++
++ This assumes that it's OK to overwrite at least the first
++ 2*CHUNKCOPY_CHUNK_SIZE bytes of output even if the copy is shorter than
++ this. This assumption holds within inflate_fast() which starts every
++ iteration with at least 258 bytes of output space available (258 being the
++ maximum length output from a single token; see inffast.c).
++ */
++static inline unsigned char FAR *chunkunroll_relaxed(unsigned char FAR *out,
++ unsigned FAR *dist,
++ unsigned FAR *len) {
++ const unsigned char FAR *from = out - *dist;
++ while (*dist < *len && *dist < CHUNKCOPY_CHUNK_SIZE) {
++ storechunk(out, loadchunk(from));
++ out += *dist;
++ *len -= *dist;
++ *dist += *dist;
++ }
++ return out;
++}
++
++
++static inline uint8x16_t chunkset_vld1q_dup_u8x8(const unsigned char FAR * Z_RESTRICT from) {
++#if defined(__clang__) || defined(__aarch64__)
++ return vreinterpretq_u8_u64(vld1q_dup_u64((void *)from));
++#else
++ /* 32-bit GCC uses an alignment hint for vld1q_dup_u64, even when given a
++ * void pointer, so here's an alternate implementation.
++ */
++ uint8x8_t h = vld1_u8(from);
++ return vcombine_u8(h, h);
++#endif
++}
++
++/*
++ Perform an overlapping copy which behaves as a memset() operation, but
++ supporting periods other than one, and assume that length is non-zero and
++ that it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE*3 bytes of output
++ even if the length is shorter than this.
++ */
++static inline unsigned char FAR *chunkset_core(unsigned char FAR *out,
++ unsigned period,
++ unsigned len) {
++ uint8x16_t f;
++ int bump = ((len - 1) % sizeof(f)) + 1;
++
++ switch (period) {
++ case 1:
++ f = vld1q_dup_u8(out - 1);
++ vst1q_u8(out, f);
++ out += bump;
++ len -= bump;
++ while (len > 0) {
++ vst1q_u8(out, f);
++ out += sizeof(f);
++ len -= sizeof(f);
++ }
++ return out;
++ case 2:
++ f = vreinterpretq_u8_u16(vld1q_dup_u16((void *)(out - 2)));
++ vst1q_u8(out, f);
++ out += bump;
++ len -= bump;
++ if (len > 0) {
++ f = vreinterpretq_u8_u16(vld1q_dup_u16((void *)(out - 2)));
++ do {
++ vst1q_u8(out, f);
++ out += sizeof(f);
++ len -= sizeof(f);
++ } while (len > 0);
++ }
++ return out;
++ case 4:
++ f = vreinterpretq_u8_u32(vld1q_dup_u32((void *)(out - 4)));
++ vst1q_u8(out, f);
++ out += bump;
++ len -= bump;
++ if (len > 0) {
++ f = vreinterpretq_u8_u32(vld1q_dup_u32((void *)(out - 4)));
++ do {
++ vst1q_u8(out, f);
++ out += sizeof(f);
++ len -= sizeof(f);
++ } while (len > 0);
++ }
++ return out;
++ case 8:
++ f = chunkset_vld1q_dup_u8x8(out - 8);
++ vst1q_u8(out, f);
++ out += bump;
++ len -= bump;
++ if (len > 0) {
++ f = chunkset_vld1q_dup_u8x8(out - 8);
++ do {
++ vst1q_u8(out, f);
++ out += sizeof(f);
++ len -= sizeof(f);
++ } while (len > 0);
++ }
++ return out;
++ }
++ out = chunkunroll_relaxed(out, &period, &len);
++ return chunkcopy_core(out, out - period, len);
++}
++
++/*
++ Perform a memcpy-like operation, but assume that length is non-zero and that
++ it's OK to overwrite at least CHUNKCOPY_CHUNK_SIZE bytes of output even if
++ the length is shorter than this.
++
++ Unlike chunkcopy_core() above, no guarantee is made regarding the behaviour
++ of overlapping buffers, regardless of the distance between the pointers.
++ This is reflected in the `restrict`-qualified pointers, allowing the
++ compiler to reorder loads and stores.
++ */
++static inline unsigned char FAR *chunkcopy_relaxed(unsigned char FAR * Z_RESTRICT out,
++ const unsigned char FAR * Z_RESTRICT from,
++ unsigned len) {
++ return chunkcopy_core(out, from, len);
++}
++
++/*
++ Like chunkcopy_relaxed, but avoid writing beyond of legal output.
++
++ Unlike chunkcopy_core_safe() above, no guarantee is made regarding the
++ behaviour of overlapping buffers, regardless of the distance between the
++ pointers. This is reflected in the `restrict`-qualified pointers, allowing
++ the compiler to reorder loads and stores.
++
++ Accepts an additional pointer to the end of safe output. A generic safe
++ copy would use (out + len), but it's normally the case that the end of the
++ output buffer is beyond the end of the current copy, and this can still be
++ exploited.
++ */
++static inline unsigned char FAR *chunkcopy_safe(unsigned char FAR *out,
++ const unsigned char FAR * Z_RESTRICT from,
++ unsigned len,
++ unsigned char FAR *limit) {
++ Assert(out + len <= limit, "chunk copy exceeds safety limit");
++ return chunkcopy_core_safe(out, from, len, limit);
++}
++
++/*
++ Perform chunky copy within the same buffer, where the source and destination
++ may potentially overlap.
++
++ Assumes that len > 0 on entry, and that it's safe to write at least
++ CHUNKCOPY_CHUNK_SIZE*3 bytes to the output.
++ */
++static inline unsigned char FAR *chunkcopy_lapped_relaxed(unsigned char FAR *out,
++ unsigned dist,
++ unsigned len) {
++ if (dist < len && dist < CHUNKCOPY_CHUNK_SIZE) {
++ return chunkset_core(out, dist, len);
++ }
++ return chunkcopy_core(out, out - dist, len);
++}
++
++/*
++ Behave like chunkcopy_lapped_relaxed, but avoid writing beyond of legal output.
++
++ Accepts an additional pointer to the end of safe output. A generic safe
++ copy would use (out + len), but it's normally the case that the end of the
++ output buffer is beyond the end of the current copy, and this can still be
++ exploited.
++ */
++static inline unsigned char FAR *chunkcopy_lapped_safe(unsigned char FAR *out,
++ unsigned dist,
++ unsigned len,
++ unsigned char FAR *limit) {
++ Assert(out + len <= limit, "chunk copy exceeds safety limit");
++ if (limit - out < CHUNKCOPY_CHUNK_SIZE * 3) {
++ /* TODO: try harder to optimise this */
++ while (len-- > 0) {
++ *out = *(out - dist);
++ out++;
++ }
++ return out;
++ }
++ return chunkcopy_lapped_relaxed(out, dist, len);
++}
++
++#undef Z_RESTRICT
++
++#endif /* CHUNKCOPY_H */
+diff --git a/contrib/arm/inffast.c b/contrib/arm/inffast.c
+index 0dbd1dbc..f7f50071 100644
+--- a/contrib/arm/inffast.c
++++ b/contrib/arm/inffast.c
+@@ -7,6 +7,7 @@
+ #include "inftrees.h"
+ #include "inflate.h"
+ #include "inffast.h"
++#include "chunkcopy.h"
+
+ #ifdef ASMINF
+ # pragma message("Assembler code may have bugs -- use at your own risk")
+@@ -57,6 +58,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
+ unsigned char FAR *out; /* local strm->next_out */
+ unsigned char FAR *beg; /* inflate()'s initial strm->next_out */
+ unsigned char FAR *end; /* while out < end, enough space available */
++ unsigned char FAR *limit; /* safety limit for chunky copies */
+ #ifdef INFLATE_STRICT
+ unsigned dmax; /* maximum distance from zlib header */
+ #endif
+@@ -84,12 +86,13 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
+ out = strm->next_out;
+ beg = out - (start - strm->avail_out);
+ end = out + (strm->avail_out - 257);
++ limit = out + strm->avail_out;
+ #ifdef INFLATE_STRICT
+ dmax = state->dmax;
+ #endif
+ wsize = state->wsize;
+ whave = state->whave;
+- wnext = state->wnext;
++ wnext = (state->wnext == 0 && whave >= wsize) ? wsize : state->wnext;
+ window = state->window;
+ hold = state->hold;
+ bits = state->bits;
+@@ -197,70 +200,51 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
+ #endif
+ }
+ from = window;
+- if (wnext == 0) { /* very common case */
+- from += wsize - op;
+- if (op < len) { /* some from window */
+- len -= op;
+- do {
+- *out++ = *from++;
+- } while (--op);
+- from = out - dist; /* rest from output */
+- }
++ if (wnext >= op) { /* contiguous in window */
++ from += wnext - op;
+ }
+- else if (wnext < op) { /* wrap around window */
+- from += wsize + wnext - op;
++ else { /* wrap around window */
+ op -= wnext;
++ from += wsize - op;
+ if (op < len) { /* some from end of window */
+ len -= op;
+- do {
+- *out++ = *from++;
+- } while (--op);
+- from = window;
+- if (wnext < len) { /* some from start of window */
+- op = wnext;
+- len -= op;
+- do {
+- *out++ = *from++;
+- } while (--op);
+- from = out - dist; /* rest from output */
+- }
++ out = chunkcopy_safe(out, from, op, limit);
++ from = window; /* more from start of window */
++ op = wnext;
++ /* This (rare) case can create a situation where
++ the first chunkcopy below must be checked.
++ */
+ }
+ }
+- else { /* contiguous in window */
+- from += wnext - op;
+- if (op < len) { /* some from window */
+- len -= op;
+- do {
+- *out++ = *from++;
+- } while (--op);
+- from = out - dist; /* rest from output */
+- }
+- }
+- while (len > 2) {
+- *out++ = *from++;
+- *out++ = *from++;
+- *out++ = *from++;
+- len -= 3;
+- }
+- if (len) {
+- *out++ = *from++;
+- if (len > 1)
+- *out++ = *from++;
++ if (op < len) { /* still need some from output */
++ out = chunkcopy_safe(out, from, op, limit);
++ len -= op;
++ /* When dist is small the amount of data that can be
++ copied from the window is also small, and progress
++ towards the dangerous end of the output buffer is
++ also small. This means that for trivial memsets and
++ for chunkunroll_relaxed() a safety check is
++ unnecessary. However, these conditions may not be
++ entered at all, and in that case it's possible that
++ the main copy is near the end.
++ */
++ out = chunkunroll_relaxed(out, &dist, &len);
++ out = chunkcopy_safe(out, out - dist, len, limit);
++ } else {
++ /* from points to window, so there is no risk of
++ overlapping pointers requiring memset-like behaviour
++ */
++ out = chunkcopy_safe(out, from, len, limit);
+ }
+ }
+ else {
+- from = out - dist; /* copy direct from output */
+- do { /* minimum length is three */
+- *out++ = *from++;
+- *out++ = *from++;
+- *out++ = *from++;
+- len -= 3;
+- } while (len > 2);
+- if (len) {
+- *out++ = *from++;
+- if (len > 1)
+- *out++ = *from++;
+- }
++ /* Whole reference is in range of current output. No
++ range checks are necessary because we start with room
++ for at least 258 bytes of output, so unroll and roundoff
++ operations can write beyond `out+len` so long as they
++ stay within 258 bytes of `out`.
++ */
++ out = chunkcopy_lapped_relaxed(out, dist, len);
+ }
+ }
+ else if ((op & 64) == 0) { /* 2nd level distance code */
+diff --git a/contrib/arm/inflate.c b/contrib/arm/inflate.c
+index ac333e8c..e40322c3 100644
+--- a/contrib/arm/inflate.c
++++ b/contrib/arm/inflate.c
+@@ -84,6 +84,7 @@
+ #include "inftrees.h"
+ #include "inflate.h"
+ #include "inffast.h"
++#include "contrib/arm/chunkcopy.h"
+
+ #ifdef MAKEFIXED
+ # ifndef BUILDFIXED
+@@ -405,10 +406,20 @@ unsigned copy;
+
+ /* if it hasn't been done already, allocate space for the window */
+ if (state->window == Z_NULL) {
++ unsigned wsize = 1U << state->wbits;
+ state->window = (unsigned char FAR *)
+- ZALLOC(strm, 1U << state->wbits,
++ ZALLOC(strm, wsize + CHUNKCOPY_CHUNK_SIZE,
+ sizeof(unsigned char));
+ if (state->window == Z_NULL) return 1;
++#ifdef INFLATE_CLEAR_UNUSED_UNDEFINED
++ /* Copies from the overflow portion of this buffer are undefined and
++ may cause analysis tools to raise a warning if we don't initialize
++ it. However, this undefined data overwrites other undefined data
++ and is subsequently either overwritten or left deliberately
++ undefined at the end of decode; so there's really no point.
++ */
++ memset(state->window + wsize, 0, CHUNKCOPY_CHUNK_SIZE);
++#endif
+ }
+
+ /* if window not in use yet, initialize */
+@@ -1175,17 +1186,16 @@ int flush;
+ else
+ from = state->window + (state->wnext - copy);
+ if (copy > state->length) copy = state->length;
++ if (copy > left) copy = left;
++ put = chunkcopy_safe(put, from, copy, put + left);
+ }
+ else { /* copy from output */
+- from = put - state->offset;
+ copy = state->length;
++ if (copy > left) copy = left;
++ put = chunkcopy_lapped_safe(put, state->offset, copy, put + left);
+ }
+- if (copy > left) copy = left;
+ left -= copy;
+ state->length -= copy;
+- do {
+- *put++ = *from++;
+- } while (--copy);
+ if (state->length == 0) state->mode = LEN;
+ break;
+ case LIT:
diff --git a/package/libs/zlib/patches/003-attach-sourcefiles-in-patch-002-to-buildsystem.patch b/package/libs/zlib/patches/003-attach-sourcefiles-in-patch-002-to-buildsystem.patch
new file mode 100644
index 0000000000..68f317b24b
--- /dev/null
+++ b/package/libs/zlib/patches/003-attach-sourcefiles-in-patch-002-to-buildsystem.patch
@@ -0,0 +1,100 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 8e75f66..24d7329 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -95,34 +95,67 @@ set(ZLIB_PUBLIC_HDRS
+ ${CMAKE_CURRENT_BINARY_DIR}/zconf.h
+ zlib.h
+ )
+-set(ZLIB_PRIVATE_HDRS
+- crc32.h
+- deflate.h
+- gzguts.h
+- inffast.h
+- inffixed.h
+- inflate.h
+- inftrees.h
+- trees.h
+- zutil.h
+-)
+-set(ZLIB_SRCS
+- adler32.c
+- compress.c
+- crc32.c
+- deflate.c
+- gzclose.c
+- gzlib.c
+- gzread.c
+- gzwrite.c
+- inflate.c
+- infback.c
+- inftrees.c
+- inffast.c
+- trees.c
+- uncompr.c
+- zutil.c
+-)
++
++if(ARMv8)
++ set(ZLIB_PRIVATE_HDRS
++ crc32.h
++ deflate.h
++ gzguts.h
++ inffast.h
++ inffixed.h
++ inflate.h
++ inftrees.h
++ trees.h
++ zutil.h
++ contrib/arm/chunkcopy.h
++ )
++ set(ZLIB_SRCS
++ adler32.c
++ compress.c
++ crc32.c
++ deflate.c
++ gzclose.c
++ gzlib.c
++ gzread.c
++ gzwrite.c
++ infback.c
++ inftrees.c
++ contrib/arm/inflate.c
++ contrib/arm/inffast.c
++ trees.c
++ uncompr.c
++ zutil.c
++ )
++ else()
++ set(ZLIB_PRIVATE_HDRS
++ crc32.h
++ deflate.h
++ gzguts.h
++ inffast.h
++ inffixed.h
++ inflate.h
++ inftrees.h
++ trees.h
++ zutil.h
++ )
++ set(ZLIB_SRCS
++ adler32.c
++ compress.c
++ crc32.c
++ deflate.c
++ gzclose.c
++ gzlib.c
++ gzread.c
++ gzwrite.c
++ inflate.c
++ infback.c
++ inftrees.c
++ inffast.c
++ trees.c
++ uncompr.c
++ zutil.c
++ )
++endif()
+
+ if(NOT MINGW)
+ set(ZLIB_DLL_SRCS