/* * SubCircuit -- An implementation of the Ullmann Subgraph Isomorphism * algorithm for coarse grain logic networks * * Copyright (C) 2013 Clifford Wolf * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * */ #include "subcircuit.h" #include #include #include #include #ifdef _YOSYS_ # include "kernel/yosys.h" # define my_printf YOSYS_NAMESPACE_PREFIX log #else # define my_printf printf #endif using namespace SubCircuit; #ifndef _YOSYS_ static std::string my_stringf(const char *fmt, ...) { std::string string; char *str = NULL; va_list ap; va_start(ap, fmt); if (vasprintf(&str, fmt, ap) < 0) str = NULL; va_end(ap); if (str != NULL) { string = str; free(str); } return string; } #else # define my_stringf YOSYS_NAMESPACE_PREFIX stringf #endif SubCircuit::Graph::Graph(const Graph &other, const std::vector &otherNodes) { allExtern = other.allExtern; std::map other2this; for (int i = 0; i < int(otherNodes.size()); i++) { assert(other.nodeMap.count(otherNodes[i]) > 0); other2this[other.nodeMap.at(otherNodes[i])] = i; nodeMap[otherNodes[i]] = i; } std::map edges2this; for (auto &i1 : other2this) for (auto &i2 : other.nodes[i1.first].ports) for (auto &i3 : i2.bits) if (edges2this.count(i3.edgeIdx) == 0) { int next_idx = edges2this.size(); edges2this[i3.edgeIdx] = next_idx; } edges.resize(edges2this.size()); for (auto &it : edges2this) { for (auto &bit : other.edges[it.first].portBits) if (other2this.count(bit.nodeIdx) > 0) edges[it.second].portBits.insert(BitRef(other2this[bit.nodeIdx], bit.portIdx, bit.bitIdx)); edges[it.second].constValue = other.edges[it.first].constValue; edges[it.second].isExtern = other.edges[it.first].isExtern; } nodes.resize(other2this.size()); for (auto &it : other2this) { nodes[it.second] = other.nodes[it.first]; for (auto &i2 : nodes[it.second].ports) for (auto &i3 : i2.bits) i3.edgeIdx = edges2this.at(i3.edgeIdx); } } bool SubCircuit::Graph::BitRef::operator < (const BitRef &other) const { if (nodeIdx != other.nodeIdx) return nodeIdx < other.nodeIdx; if (portIdx != other.portIdx) return portIdx < other.portIdx; return bitIdx < other.bitIdx; } void SubCircuit::Graph::createNode(std::string nodeId, std::string typeId, void *userData, bool shared) { assert(nodeMap.count(nodeId) == 0); nodeMap[nodeId] = nodes.size(); nodes.push_back(Node()); Node &newNode = nodes.back(); newNode.nodeId = nodeId; newNode.typeId = typeId; newNode.userData = userData; newNode.shared = shared; } void SubCircuit::Graph::createPort(std::string nodeId, std::string portId, int width, int minWidth) { assert(nodeMap.count(nodeId) != 0); int nodeIdx = nodeMap[nodeId]; Node &node = nodes[nodeIdx]; assert(node.portMap.count(portId) == 0); int portIdx = node.ports.size(); node.portMap[portId] = portIdx; node.ports.push_back(Port()); Port &port = node.ports.back(); port.portId = portId; port.minWidth = minWidth < 0 ? width : minWidth; port.bits.insert(port.bits.end(), width, PortBit()); for (int i = 0; i < width; i++) { port.bits[i].edgeIdx = edges.size(); edges.push_back(Edge()); edges.back().portBits.insert(BitRef(nodeIdx, portIdx, i)); } } void SubCircuit::Graph::createConnection(std::string fromNodeId, std::string fromPortId, int fromBit, std::string toNodeId, std::string toPortId, int toBit, int width) { assert(nodeMap.count(fromNodeId) != 0); assert(nodeMap.count(toNodeId) != 0); int fromNodeIdx = nodeMap[fromNodeId]; Node &fromNode = nodes[fromNodeIdx]; int toNodeIdx = nodeMap[toNodeId]; Node &toNode = nodes[toNodeIdx]; assert(fromNode.portMap.count(fromPortId) != 0); assert(toNode.portMap.count(toPortId) != 0); int fromPortIdx = fromNode.portMap[fromPortId]; Port &fromPort = fromNode.ports[fromPortIdx]; int toPortIdx = toNode.portMap[toPortId]; Port &toPort = toNode.ports[toPortIdx]; if (width < 0) { assert(fromBit == 0 && toBit == 0); assert(fromPort.bits.size() == toPort.bits.size()); width = fromPort.bits.size(); } assert(fromBit >= 0 && toBit >= 0); for (int i = 0; i < width; i++) { assert(fromBit + i < int(fromPort.bits.size())); assert(toBit + i < int(toPort.bits.size())); int fromEdgeIdx = fromPort.bits[fromBit + i].edgeIdx; int toEdgeIdx = toPort.bits[toBit + i].edgeIdx; if (fromEdgeIdx == toEdgeIdx) continue; // merge toEdge into fromEdge if (edges[toEdgeIdx].isExtern) edges[fromEdgeIdx].isExtern = true; if (edges[toEdgeIdx].constValue) { assert(edges[fromEdgeIdx].constValue == 0); edges[fromEdgeIdx].constValue = edges[toEdgeIdx].constValue; } for (const auto &ref : edges[toEdgeIdx].portBits) { edges[fromEdgeIdx].portBits.insert(ref); nodes[ref.nodeIdx].ports[ref.portIdx].bits[ref.bitIdx].edgeIdx = fromEdgeIdx; } // remove toEdge (move last edge over toEdge if needed) if (toEdgeIdx+1 != int(edges.size())) { edges[toEdgeIdx] = edges.back(); for (const auto &ref : edges[toEdgeIdx].portBits) nodes[ref.nodeIdx].ports[ref.portIdx].bits[ref.bitIdx].edgeIdx = toEdgeIdx; } edges.pop_back(); } } void SubCircuit::Graph::createConnection(std::string fromNodeId, std::string fromPortId, std::string toNodeId, std::string toPortId) { createConnection(fromNodeId, fromPortId, 0, toNodeId, toPortId, 0, -1); } void SubCircuit::Graph::createConstant(std::string toNodeId, std::string toPortId, int toBit, int constValue) { assert(nodeMap.count(toNodeId) != 0); int toNodeIdx = nodeMap[toNodeId]; Node &toNode = nodes[toNodeIdx]; assert(toNode.portMap.count(toPortId) != 0); int toPortIdx = toNode.portMap[toPortId]; Port &toPort = toNode.ports[toPortIdx]; assert(toBit >= 0 && toBit < int(toPort.bits.size())); int toEdgeIdx = toPort.bits[toBit].edgeIdx; assert(edges[toEdgeIdx].constValue == 0); edges[toEdgeIdx].constValue = constValue; } void SubCircuit::Graph::createConstant(std::string toNodeId, std::string toPortId, int constValue) { assert(nodeMap.count(toNodeId) != 0); int toNodeIdx = nodeMap[toNodeId]; Node &toNode = nodes[toNodeIdx]; assert(toNode.portMap.count(toPortId) != 0); int toPortIdx = toNode.portMap[toPortId]; Port &toPort = toNode.ports[toPortIdx]; for (int i = 0; i < int(toPort.bits.size()); i++) { int toEdgeIdx = toPort.bits[i].edgeIdx; assert(edges[toEdgeIdx].constValue == 0); edges[toEdgeIdx].constValue = constValue % 2 ? '1' : '0'; constValue = constValue >> 1; } } void SubCircuit::Graph::markExtern(std::string nodeId, std::string portId, int bit) { assert(nodeMap.count(nodeId) != 0); Node &node = nodes[nodeMap[nodeId]]; assert(node.portMap.count(portId) != 0); Port &port = node.ports[node.portMap[portId]]; if (bit < 0) { for (const auto portBit : port.bits) edges[portBit.edgeIdx].isExtern = true; } else { assert(bit < int(port.bits.size())); edges[port.bits[bit].edgeIdx].isExtern = true; } } void SubCircuit::Graph::markAllExtern() { allExtern = true; } void SubCircuit::Graph::print() { for (int i = 0; i < int(nodes.size()); i++) { const Node &node = nodes[i]; my_printf("NODE %d: %s (%s)\n", i, node.nodeId.c_str(), node.typeId.c_str()); for (int j = 0; j < int(node.ports.size()); j++) { const Port &port = node.ports[j]; my_printf(" PORT %d: %s (%d/%d)\n", j, port.portId.c_str(), port.minWidth, int(port.bits.size())); for (int k = 0; k < int(port.bits.size()); k++) { int edgeIdx = port.bits[k].edgeIdx; my_printf(" BIT %d (%d):", k, edgeIdx); for (const auto &ref : edges[edgeIdx].portBits) my_printf(" %d.%d.%d", ref.nodeIdx, ref.portIdx, ref.bitIdx); if (edges[edgeIdx].isExtern) my_printf(" [extern]"); my_printf("\n"); } } } } class SubCircuit::SolverWorker { // basic internal data structures typedef std::vector> adjMatrix_t; struct GraphData { std::string graphId; Graph graph; adjMatrix_t adjMatrix; std::vector usedNodes; }; static void printAdjMatrix(const adjMatrix_t &matrix) { my_printf("%7s", ""); for (int i = 0; i < int(matrix.size()); i++) my_printf("%4d:", i); my_printf("\n"); for (int i = 0; i < int(matrix.size()); i++) { my_printf("%5d:", i); for (int j = 0; j < int(matrix.size()); j++) if (matrix.at(i).count(j) == 0) my_printf("%5s", "-"); else my_printf("%5d", matrix.at(i).at(j)); my_printf("\n"); } } // helper functions for handling permutations static const int maxPermutationsLimit = 1000000; static int numberOfPermutations(const std::vector &list) { int numPermutations = 1; for (int i = 0; i < int(list.size()); i++) { assert(numPermutations < maxPermutationsLimit); numPermutations *= i+1; } return numPermutations; } static void permutateVectorToMap(std::map &map, const std::vector &list, int idx) { // convert idx to a list.size() digits factoradic number std::vector factoradicDigits; for (int i = 0; i < int(list.size()); i++) { factoradicDigits.push_back(idx % (i+1)); idx = idx / (i+1); } // construct permutation std::vector pool = list; std::vector permutation; while (!factoradicDigits.empty()) { int i = factoradicDigits.back(); factoradicDigits.pop_back(); permutation.push_back(pool[i]); pool.erase(pool.begin() + i); } // update map for (int i = 0; i < int(list.size()); i++) map[list[i]] = permutation[i]; } static int numberOfPermutationsArray(const std::vector> &list) { int numPermutations = 1; for (const auto &it : list) { int thisPermutations = numberOfPermutations(it); assert(float(numPermutations) * float(thisPermutations) < maxPermutationsLimit); numPermutations *= thisPermutations; } return numPermutations; } static void permutateVectorToMapArray(std::map &map, const std::vector> &list, int idx) { for (const auto &it : list) { int thisPermutations = numberOfPermutations(it); int thisIdx = idx % thisPermutations; permutateVectorToMap(map, it, thisIdx); idx /= thisPermutations; } } static void applyPermutation(std::map &map, const std::map &permutation) { std::vector> changeLog; for (const auto &it : permutation) if (map.count(it.second)) changeLog.push_back(std::pair(it.first, map.at(it.second))); else changeLog.push_back(std::pair(it.first, it.second)); for (const auto &it : changeLog) map[it.first] = it.second; } // classes for internal digraph representation struct DiBit { std::string fromPort, toPort; int fromBit, toBit; DiBit() : fromPort(), toPort(), fromBit(-1), toBit(-1) { } DiBit(std::string fromPort, int fromBit, std::string toPort, int toBit) : fromPort(fromPort), toPort(toPort), fromBit(fromBit), toBit(toBit) { } bool operator < (const DiBit &other) const { if (fromPort != other.fromPort) return fromPort < other.fromPort; if (toPort != other.toPort) return toPort < other.toPort; if (fromBit != other.fromBit) return fromBit < other.fromBit; return toBit < other.toBit; } std::string toString() const { return my_stringf("%s[%d]:%s[%d]", fromPort.c_str(), fromBit, toPort.c_str(), toBit); } }; struct DiNode { std::string typeId; std::map portSizes; DiNode() { } DiNode(const Graph &graph, int nodeIdx) { const Graph::Node &node = graph.nodes.at(nodeIdx); typeId = node.typeId; for (const auto &port : node.ports) portSizes[port.portId] = port.bits.size(); } bool operator < (const DiNode &other) const { if (typeId != other.typeId) return typeId < other.typeId; return portSizes < other.portSizes; } std::string toString() const { std::string str; bool firstPort = true; for (const auto &it : portSizes) { str += my_stringf("%s%s[%d]", firstPort ? "" : ",", it.first.c_str(), it.second); firstPort = false; } return typeId + "(" + str + ")"; } }; struct DiEdge { DiNode fromNode, toNode; std::set bits; std::string userAnnotation; bool operator < (const DiEdge &other) const { if (fromNode < other.fromNode || other.fromNode < fromNode) return fromNode < other.fromNode; if (toNode < other.toNode || other.toNode < toNode) return toNode < other.toNode; if (bits < other.bits || other.bits < bits) return bits < other.bits; return userAnnotation < other.userAnnotation; } bool compare(const DiEdge &other, const std::map &mapFromPorts, const std::map &mapToPorts) const { // Rules for matching edges: // // For all bits in the needle edge: // - ignore if needle ports don't exist in haystack edge // - otherwise: matching bit in haystack edge must exist // // There is no need to check in the other direction, as checking // of the isExtern properties is already performed in node matching. // // Note: "this" is needle, "other" is haystack for (auto bit : bits) { if (mapFromPorts.count(bit.fromPort) > 0) bit.fromPort = mapFromPorts.at(bit.fromPort); if (mapToPorts.count(bit.toPort) > 0) bit.toPort = mapToPorts.at(bit.toPort); if (other.fromNode.portSizes.count(bit.fromPort) == 0) continue; if (other.toNode.portSizes.count(bit.toPort) == 0) continue; if (bit.fromBit >= other.fromNode.portSizes.at(bit.fromPort)) continue; if (bit.toBit >= other.toNode.portSizes.at(bit.toPort)) continue; if (other.bits.count(bit) == 0) return false; } return true; } bool compareWithFromAndToPermutations(const DiEdge &other, const std::map &mapFromPorts, const std::map &mapToPorts, const std::map>> &swapPermutations) const { if (swapPermutations.count(fromNode.typeId) > 0) for (const auto &permutation : swapPermutations.at(fromNode.typeId)) { std::map thisMapFromPorts = mapFromPorts; applyPermutation(thisMapFromPorts, permutation); if (compareWithToPermutations(other, thisMapFromPorts, mapToPorts, swapPermutations)) return true; } return compareWithToPermutations(other, mapFromPorts, mapToPorts, swapPermutations); } bool compareWithToPermutations(const DiEdge &other, const std::map &mapFromPorts, const std::map &mapToPorts, const std::map>> &swapPermutations) const { if (swapPermutations.count(toNode.typeId) > 0) for (const auto &permutation : swapPermutations.at(toNode.typeId)) { std::map thisMapToPorts = mapToPorts; applyPermutation(thisMapToPorts, permutation); if (compare(other, mapFromPorts, thisMapToPorts)) return true; } return compare(other, mapFromPorts, mapToPorts); } bool compare(const DiEdge &other, const std::map>> &swapPorts, const std::map>> &swapPermutations) const { // brute force method for port swapping: try all variations std::vector> swapFromPorts; std::vector> swapToPorts; // only use groups that are relevant for this edge if (swapPorts.count(fromNode.typeId) > 0) for (const auto &ports : swapPorts.at(fromNode.typeId)) { for (const auto &bit : bits) if (ports.count(bit.fromPort)) goto foundFromPortMatch; if (0) { foundFromPortMatch: std::vector portsVector; for (const auto &port : ports) portsVector.push_back(port); swapFromPorts.push_back(portsVector); } } if (swapPorts.count(toNode.typeId) > 0) for (const auto &ports : swapPorts.at(toNode.typeId)) { for (const auto &bit : bits) if (ports.count(bit.toPort)) goto foundToPortMatch; if (0) { foundToPortMatch: std::vector portsVector; for (const auto &port : ports) portsVector.push_back(port); swapToPorts.push_back(portsVector); } } // try all permutations std::map mapFromPorts, mapToPorts; int fromPortsPermutations = numberOfPermutationsArray(swapFromPorts); int toPortsPermutations = numberOfPermutationsArray(swapToPorts); for (int i = 0; i < fromPortsPermutations; i++) { permutateVectorToMapArray(mapFromPorts, swapFromPorts, i); for (int j = 0; j < toPortsPermutations; j++) { permutateVectorToMapArray(mapToPorts, swapToPorts, j); if (compareWithFromAndToPermutations(other, mapFromPorts, mapToPorts, swapPermutations)
/* crc32.c -- compute the CRC-32 of a data stream
 * Copyright (C) 1995-2006, 2010 Mark Adler
 * For conditions of distribution and use, see copyright notice in zlib.h
 *
 * Thanks to Rodney Brown <rbrown64@csc.com.au> for his contribution of faster
 * CRC methods: exclusive-oring 32 bits of data at a time, and pre-computing
 * tables for updating the shift register in one step with three exclusive-ors
 * instead of four steps with four exclusive-ors.  This results in about a
 * factor of two increase in speed on a Power PC G4 (PPC7455) using gcc -O3.
 */

/* @(#) $Id$ */

/*
  Note on the use of DYNAMIC_CRC_TABLE: there is no mutex or semaphore
  protection on the static variables used to control the first-use generation
  of the crc tables.  Therefore, if you #define DYNAMIC_CRC_TABLE, you should
  first call get_crc_table() to initialize the tables before allowing more than
  one thread to use crc32().
 */

#ifdef MAKECRCH
#  include <stdio.h>
#  ifndef DYNAMIC_CRC_TABLE
#    define DYNAMIC_CRC_TABLE
#  endif /* !DYNAMIC_CRC_TABLE */
#endif /* MAKECRCH */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "misc/util/abc_global.h"

#include "zutil.h"      /* for STDC and FAR definitions */

ABC_NAMESPACE_IMPL_START

#define local static

/* Find a four-byte integer type for crc32_little() and crc32_big(). */
#ifndef NOBYFOUR
#  ifdef STDC           /* need ANSI C limits.h to determine sizes */
ABC_NAMESPACE_IMPL_END
#    include <limits.h>
ABC_NAMESPACE_IMPL_START
#    define BYFOUR
#    if (UINT_MAX == 0xffffffffUL)
       typedef unsigned int u4;
#    else
#      if (ULONG_MAX == 0xffffffffUL)
         typedef unsigned long u4;
#      else
#        if (USHRT_MAX == 0xffffffffUL)
           typedef unsigned short u4;
#        else
#          undef BYFOUR     /* can't find a four-byte integer type! */
#        endif
#      endif
#    endif
#  endif /* STDC */
#endif /* !NOBYFOUR */

/* Definitions for doing the crc four data bytes at a time. */
#ifdef BYFOUR
#  define REV(w) ((((w)>>24)&0xff)+(((w)>>8)&0xff00)+ \
                (((w)&0xff00)<<8)+(((w)&0xff)<<24))
   local unsigned long crc32_little OF((unsigned long,
                        const unsigned char FAR *, unsigned));
   local unsigned long crc32_big OF((unsigned long,
                        const unsigned char FAR *, unsigned));
#  define TBLS 8
#else
#  define TBLS 1
#endif /* BYFOUR */

/* Local functions for crc concatenation */
local unsigned long gf2_matrix_times OF((unsigned long *mat,
                                         unsigned long vec));
local void gf2_matrix_square OF((unsigned long *square, unsigned long *mat));
local uLong crc32_combine_(uLong crc1, uLong crc2, z_off64_t len2);


#ifdef DYNAMIC_CRC_TABLE

local volatile int crc_table_empty = 1;
local unsigned long FAR crc_table[TBLS][256];
local void make_crc_table OF((void));
#ifdef MAKECRCH
   local void write_table OF((FILE *, const unsigned long FAR *));
#endif /* MAKECRCH */
/*
  Generate tables for a byte-wise 32-bit CRC calculation on the polynomial:
  x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x+1.

  Polynomials over GF(2) are represented in binary, one bit per coefficient,
  with the lowest powers in the most significant bit.  Then adding polynomials
  is just exclusive-or, and multiplying a polynomial by x is a right shift by
  one.  If we call the above polynomial p, and represent a byte as the
  polynomial q, also with the lowest power in the most significant bit (so the
  byte 0xb1 is the polynomial x^7+x^3+x+1), then the CRC is (q*x^32) mod p,
  where a mod b means the remainder after dividing a by b.

  This calculation is done using the shift-register method of multiplying and
  taking the remainder.  The register is initialized to zero, and for each
  incoming bit, x^32 is added mod p to the register if the bit is a one (where
  x^32 mod p is p+x^32 = x^26+...+1), and the register is multiplied mod p by
  x (which is shifting right by one and adding x^32 mod p if the bit shifted
  out is a one).  We start with the highest power (least significant bit) of
  q and repeat for all eight bits of q.

  The first table is simply the CRC of all possible eight bit values.  This is
  all the information needed to generate CRCs on data a byte at a time for all
  combinations of CRC register values and incoming bytes.  The remaining tables
  allow for word-at-a-time CRC calculation for both big-endian and little-
  endian machines, where a word is four bytes.
*/
local void make_crc_table()
{
    unsigned long c;
    int n, k;
    unsigned long poly;                 /* polynomial exclusive-or pattern */
    /* terms of polynomial defining this crc (except x^32): */
    static volatile int first = 1;      /* flag to limit concurrent making */
    static const unsigned char p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};

    /* See if another task is already doing this (not thread-safe, but better
       than nothing -- significantly reduces duration of vulnerability in
       case the advice about DYNAMIC_CRC_TABLE is ignored) */
    if (first) {
        first = 0;

        /* make exclusive-or pattern from polynomial (0xedb88320UL) */
        poly = 0UL;
        for (n = 0; n < sizeof(p)/sizeof(unsigned char); n++)
            poly |= 1UL << (31 - p[n]);

        /* generate a crc for every 8-bit value */
        for (n = 0; n < 256; n++) {
            c = (unsigned long)n;
            for (k = 0; k < 8; k++)
                c = c & 1 ? poly ^ (c >> 1) : c >> 1;
            crc_table[0][n] = c;
        }

#ifdef BYFOUR
        /* generate crc for each value followed by one, two, and three zeros,
           and then the byte reversal of those as well as the first table */
        for (n = 0; n < 256; n++) {
            c = crc_table[0][n];
            crc_table[4][n] = REV(c);
            for (k = 1; k < 4; k++) {
                c = crc_table[0][c & 0xff] ^ (c >> 8);
                crc_table[k][n] = c;
                crc_table[k + 4][n] = REV(c);
            }
        }
#endif /* BYFOUR */

        crc_table_empty = 0;
    }
    else {      /* not first */
        /* wait for the other guy to finish (not efficient, but rare) */
        while (crc_table_empty)
            ;
    }

#ifdef MAKECRCH
    /* write out CRC tables to crc32.h */
    {
        FILE *out;

        out = fopen("crc32.h", "w");
        if (out == NULL) return;
        fprintf(out, "/* crc32.h -- tables for rapid CRC calculation\n");
        fprintf(out, " * Generated automatically by crc32.c\n */\n\n");
        fprintf(out, "local const unsigned long FAR ");
        fprintf(out, "crc_table[TBLS][256] =\n{\n  {\n");
        write_table(out, crc_table[0]);
#  ifdef BYFOUR
        fprintf(out, "#ifdef BYFOUR\n");
        for (k = 1; k < 8; k++) {
            fprintf(out, "  },\n  {\n");
            write_table(out, crc_table[k]);
        }
        fprintf(out, "#endif\n");
#  endif /* BYFOUR */
        fprintf(out, "  }\n};\n");
        fclose(out);
    }
#endif /* MAKECRCH */
}

#ifdef MAKECRCH
local void write_table(FILE *out, const unsigned long FAR *table)
{
    int n;

    for (n = 0; n < 256; n++)
        fprintf(out, "%s0x%08lxUL%s", n % 5 ? "" : "    ", table[n],
                n == 255 ? "\n" : (n % 5 == 4 ? ",\n" : ", "));
}
#endif /* MAKECRCH */

#else /* !DYNAMIC_CRC_TABLE */
/* ========================================================================
 * Tables of CRC-32s of all single-byte values, made by make_crc_table().
 */
ABC_NAMESPACE_IMPL_END
#include "crc32.h"
ABC_NAMESPACE_IMPL_START
#endif /* DYNAMIC_CRC_TABLE */

/* =========================================================================
 * This function can be used by asm versions of crc32()
 */
const unsigned long FAR * ZEXPORT get_crc_table()
{
#ifdef DYNAMIC_CRC_TABLE
    if (crc_table_empty)
        make_crc_table();
#endif /* DYNAMIC_CRC_TABLE */
    return (const unsigned long FAR *)crc_table;
}

/* ========================================================================= */
#define DO1 crc = crc_table[0][((int)crc ^ (*buf++)) & 0xff] ^ (crc >> 8)
#define DO8 DO1; DO1; DO1; DO1; DO1; DO1; DO1; DO1

/* ========================================================================= */
unsigned long ZEXPORT crc32(unsigned long crc, const unsigned char FAR *buf, uInt len)
{
    if (buf == Z_NULL) return 0UL;

#ifdef DYNAMIC_CRC_TABLE
    if (crc_table_empty)
        make_crc_table();
#endif /* DYNAMIC_CRC_TABLE */

#ifdef BYFOUR
    if (sizeof(void *) == sizeof(ptrdiff_t)) {
        u4 endian;

        endian = 1;
        if (*((unsigned char *)(&endian)))
            return crc32_little(crc, buf, len);
        else
            return crc32_big(crc, buf, len);
    }
#endif /* BYFOUR */
    crc = crc ^ 0xffffffffUL;
    while (len >= 8) {
        DO8;
        len -= 8;
    }
    if (len) do {
        DO1;
    } while (--len);
    return crc ^ 0xffffffffUL;
}

#ifdef BYFOUR

/* ========================================================================= */
#define DOLIT4 c ^= *buf4++; \
        c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
            crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4

/* ========================================================================= */
local unsigned long crc32_little(unsigned long crc, const unsigned char FAR *buf, unsigned len)
{
    register u4 c;
    register const u4 FAR *buf4;

    c = (u4)crc;
    c = ~c;
    while (len && ((ptrdiff_t)buf & 3)) {
        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
        len--;
    }

    buf4 = (const u4 FAR *)(const void FAR *)buf;
    while (len >= 32) {
        DOLIT32;
        len -= 32;
    }
    while (len >= 4) {
        DOLIT4;
        len -= 4;
    }
    buf = (const unsigned char FAR *)buf4;

    if (len) do {
        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
    } while (--len);
    c = ~c;
    return (unsigned long)c;
}

/* ========================================================================= */
#define DOBIG4 c ^= *++buf4; \
        c = crc_table[4][c & 0xff] ^ crc_table[5][(c >> 8) & 0xff] ^ \
            crc_table[6][(c >> 16) & 0xff] ^ crc_table[7][c >> 24]
#define DOBIG32 DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4; DOBIG4

/* ========================================================================= */
local unsigned long crc32_big(unsigned long crc, const unsigned char FAR *buf, unsigned len)
{
    register u4 c;
    register const u4 FAR *buf4;

    c = REV((u4)crc);
    c = ~c;
    while (len && ((ptrdiff_t)buf & 3)) {
        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
        len--;
    }

    buf4 = (const u4 FAR *)(const void FAR *)buf;
    buf4--;
    while (len >= 32) {
        DOBIG32;
        len -= 32;
    }
    while (len >= 4) {
        DOBIG4;
        len -= 4;
    }
    buf4++;
    buf = (const unsigned char FAR *)buf4;

    if (len) do {
        c = crc_table[4][(c >> 24) ^ *buf++] ^ (c << 8);
    } while (--len);
    c = ~c;
    return (unsigned long)(REV(c));
}

#endif /* BYFOUR */

#define GF2_DIM 32      /* dimension of GF(2) vectors (length of CRC) */

/* ========================================================================= */
local unsigned long gf2_matrix_times(unsigned long *mat, unsigned long vec)
{
    unsigned long sum;

    sum = 0;
    while (vec) {
        if (vec & 1)
            sum ^= *mat;
        vec >>= 1;
        mat++;
    }
    return sum;
}

/* ========================================================================= */
local void gf2_matrix_square(unsigned long *square, unsigned long *mat)
{
    int n;

    for (n = 0; n < GF2_DIM; n++)
        square[n] = gf2_matrix_times(mat, mat[n]);
}

/* ========================================================================= */
local uLong crc32_combine_(uLong crc1, uLong crc2, z_off64_t len2)
{
    int n;
    unsigned long row;
    unsigned long even[GF2_DIM];    /* even-power-of-two zeros operator */
    unsigned long odd[GF2_DIM];     /* odd-power-of-two zeros operator */

    /* degenerate case (also disallow negative lengths) */
    if (len2 <= 0)
        return crc1;

    /* put operator for one zero bit in odd */
    odd[0] = 0xedb88320UL;          /* CRC-32 polynomial */
    row = 1;
    for (n = 1; n < GF2_DIM; n++) {
        odd[n] = row;
        row <<= 1;
    }

    /* put operator for two zero bits in even */
    gf2_matrix_square(even, odd);

    /* put operator for four zero bits in odd */
    gf2_matrix_square(odd, even);

    /* apply len2 zeros to crc1 (first square will put the operator for one
       zero byte, eight zero bits, in even) */
    do {
        /* apply zeros operator for this bit of len2 */
        gf2_matrix_square(even, odd);
        if (len2 & 1)
            crc1 = gf2_matrix_times(even, crc1);
        len2 >>= 1;

        /* if no more bits set, then done */
        if (len2 == 0)
            break;

        /* another iteration of the loop with odd and even swapped */
        gf2_matrix_square(odd, even);
        if (len2 & 1)
            crc1 = gf2_matrix_times(odd, crc1);
        len2 >>= 1;

        /* if no more bits set, then done */
    } while (len2 != 0);

    /* return combined crc */
    crc1 ^= crc2;
    return crc1;
}

/* ========================================================================= */
uLong ZEXPORT crc32_combine(uLong crc1, uLong crc2, z_off_t len2)
{
    return crc32_combine_(crc1, crc2, len2);
}

uLong ZEXPORT crc32_combine64(uLong crc1, uLong crc2, z_off64_t len2)
{
    return crc32_combine_(crc1, crc2, len2);
}


ABC_NAMESPACE_IMPL_END