1 files changed, 735 insertions, 0 deletions
diff --git a/tools/mcufontencoder/src/encode_rlefont.cc b/tools/mcufontencoder/src/encode_rlefont.cc
new file mode 100644
index 00000000..602f6033
--- /dev/null
+++ b/tools/mcufontencoder/src/encode_rlefont.cc
@@ -0,0 +1,735 @@
+#include "encode_rlefont.hh"
+#include <algorithm>
+#include <stdexcept>
+#include "ccfixes.hh"
+
+// Number of reserved codes before the dictionary entries.
+#define DICT_START 24
+
+// Special reference to mean "fill with zeros to the end of the glyph"
+#define REF_FILLZEROS 16
+
+// RLE codes
+#define RLE_CODEMASK    0xC0
+#define RLE_VALMASK     0x3F
+#define RLE_ZEROS       0x00 // 0 to 63 zeros
+#define RLE_64ZEROS     0x40 // (1 to 64) * 64 zeros
+#define RLE_ONES        0x80 // 1 to 64 full alphas
+#define RLE_SHADE       0xC0 // 1 to 4 partial alphas
+
+// Dictionary "fill entries" for encoding bits directly.
+#define DICT_START7BIT  4
+#define DICT_START6BIT  132
+#define DICT_START5BIT  196
+#define DICT_START4BIT  228
+#define DICT_START3BIT  244
+#define DICT_START2BIT  252
+
+namespace mcufont {
+namespace rlefont {
+
+// Get bit count for the "fill entries"
+static size_t fillentry_bitcount(size_t index)
+{
+    if (index >= DICT_START2BIT)
+        return 2;
+    else if (index >= DICT_START3BIT)
+        return 3;
+    else if (index >= DICT_START4BIT)
+        return 4;
+    else if (index >= DICT_START5BIT)
+        return 5;
+    else if (index >= DICT_START6BIT)
+        return 6;
+    else
+        return 7;
+}
+
+// Count the number of equal pixels at the beginning of the pixelstring.
+static size_t prefix_length(const DataFile::pixels_t &pixels, size_t pos)
+{
+    uint8_t pixel = pixels.at(pos);
+    size_t count = 1;
+    while (pos + count < pixels.size() &&
+            pixels.at(pos + count) == pixel)
+    {
+        count++;
+    }
+    return count;
+}
+
+// Perform the RLE encoding for a dictionary entry.
+static encoded_font_t::rlestring_t encode_rle(const DataFile::pixels_t &pixels)
+{
+    encoded_font_t::rlestring_t result;
+    
+    size_t pos = 0;
+    while (pos < pixels.size())
+    {
+        uint8_t pixel = pixels.at(pos);
+        size_t count = prefix_length(pixels, pos);
+        pos += count;
+        
+        if (pixel == 0)
+        {
+            // Up to 63 zeros can be encoded with RLE_ZEROS. If there are more,
+            // encode using RLE_64ZEROS, and then whatever remains with RLE_ZEROS.
+            while (count >= 64)
+            {
+                size_t c = (count > 4096) ? 64 : (count / 64);
+                result.push_back(RLE_64ZEROS | (c - 1));
+                count -= c * 64;
+            }
+            
+            if (count)
+            {
+                result.push_back(RLE_ZEROS | count);
+            }
+        }
+        else if (pixel == 15)
+        {
+            // Encode ones.
+            while (count)
+            {
+                size_t c = (count > 64) ? 64 : count;
+                result.push_back(RLE_ONES | (c - 1));
+                count -= c;
+            }
+        }
+        else
+        {
+            // Encode shades.
+            while (count)
+            {
+                size_t c = (count > 4) ? 4 : count;
+                result.push_back(RLE_SHADE | ((c - 1) << 4) | pixel);
+                count -= c;
+            }
+        }
+    }
+    
+    return result;
+}
+
+// We use a tree structure to represent the dictionary entries.
+// Using this tree, we can perform a combined Aho-Corasick string matching
+// and breadth-first search to find the optimal encoding of glyph data.
+class DictTreeNode
+{
+public:
+    constexpr DictTreeNode():
+        m_index(-1),
+        m_ref(false),
+        m_length(0),
+        m_child0(nullptr),
+        m_child15(nullptr),
+        m_suffix(nullptr)
+        {}
+    
+    void SetChild(uint8_t p, DictTreeNode *child)
+    {
+        if (p == 0)
+            m_child0 = child;
+        else if (p == 15)
+            m_child15 = child;
+        else if (p > 15)
+            throw std::logic_error("invalid pixel alpha: " + std::to_string(p));
+        else
+        {
+            if (!m_children)
+            {
+                m_children.reset(new DictTreeNode*[14]());
+            }
+            m_children[p - 1] = child;
+        }
+    }
+    
+    DictTreeNode* GetChild(uint8_t p) const
+    { 
+        if (p == 0)
+            return m_child0;
+        else if (p == 15)
+            return m_child15;
+        else if (p > 15)
+            throw std::logic_error("invalid pixel alpha: " + std::to_string(p));
+        else if (!m_children)
+            return nullptr;
+        else
+            return m_children[p - 1];
+    }
+    
+    bool HasIntermediateChildren() const { return m_children != nullptr; }
+    
+    int GetIndex() const { return m_index; }
+    void SetIndex(int index) { m_index = index; }
+    bool GetRef() const { return m_ref; }
+    void SetRef(bool ref) { m_ref = ref; }
+    size_t GetLength() const { return m_length; }
+    void SetLength(size_t length) { m_length = length; }
+    DictTreeNode *GetSuffix() const { return m_suffix; }
+    void SetSuffix(DictTreeNode *suffix) { m_suffix = suffix; }
+    
+private:
+    // Index of dictionary entry or -1 if just a intermediate node.
+    int m_index;
+    
+    // True for ref-encoded dictionary entries. Used to avoid recursion when
+    // encoding them.
+    bool m_ref;
+    
+    // Length of the corresponding dictionary entry replacement.
+    // Equals the distance from the tree root.
+    size_t m_length;
+    
+    // Most tree nodes will only ever contains children for 0 or 15.
+    // Therefore the array for other nodes is allocated only on demand.
+    DictTreeNode *m_child0;
+    DictTreeNode *m_child15;
+    std::unique_ptr<DictTreeNode*[]> m_children;
+    
+    // Pointer to the longest suffix of this entry that exists in the
+    // dictionary.
+    DictTreeNode *m_suffix;
+};
+
+// Preallocated array for tree nodes
+class TreeAllocator
+{
+public:
+    TreeAllocator(size_t count)
+    {
+        m_storage.reset(new DictTreeNode[count]);
+        m_next = m_storage.get();
+        m_left = count;
+    }
+    
+    DictTreeNode *allocate()
+    {
+        if (m_left == 0)
+            throw std::logic_error("Ran out of preallocated entries");
+        
+        m_left--;
+        return m_next++;
+    }
+    
+private:
+    std::unique_ptr<DictTreeNode[]> m_storage;
+    DictTreeNode *m_next;
+    size_t m_left;
+};
+
+// Add a new dictionary entry to the tree. Adds the intermediate nodes, but
+// does not yet fill the suffix pointers.
+static DictTreeNode* add_tree_entry(const DataFile::pixels_t &entry, int index,
+                                    bool ref_encoded, DictTreeNode *root,
+                                    TreeAllocator &storage)
+{
+    DictTreeNode* node = root;
+    for (uint8_t p : entry)
+    {
+        DictTreeNode* branch = node->GetChild(p);
+        if (!branch)
+        {
+            branch = storage.allocate();
+            node->SetChild(p, branch);
+        }
+        
+        node = branch;
+    }
+    
+    // Replace the entry if it either does not yet have an encoding, or if
+    // the new entry is non-ref (i.e. can be used in more situations).
+    if (node->GetIndex() < 0 || (node->GetRef() && !ref_encoded))
+    {
+        node->SetIndex(index);
+        node->SetRef(ref_encoded);
+        node->SetLength(entry.size());
+    }
+    
+    return node;
+}
+
+// Walk the tree and find if the entry exists in the tree. If it does,
+// returns a pointer to it, otherwise nullptr.
+static DictTreeNode *find_tree_node(DataFile::pixels_t::const_iterator begin,
+                                    DataFile::pixels_t::const_iterator end,
+                                    DictTreeNode *root)
+{
+    DictTreeNode* node = root;
+    while (begin != end)
+    {
+        uint8_t pixel = *begin++;
+        node = node->GetChild(pixel);
+        
+        if (!node)
+            return nullptr;
+    }
+    
+    return node;
+}
+
+// Fill in the suffix pointers recursively for the given subtree.
+static void fill_tree_suffixes(DictTreeNode *root, DictTreeNode *subtree,
+    const DataFile::pixels_t &entry)
+{
+    for (size_t i = 1; i < entry.size(); i++)
+    {
+        DictTreeNode *node = find_tree_node(entry.begin() + i, entry.end(), root);
+        if (node)
+        {
+            subtree->SetSuffix(node);
+            break;
+        }
+    }
+    
+    if (!subtree->GetSuffix())
+        subtree->SetSuffix(root);
+    
+    DataFile::pixels_t newentry(entry);
+    newentry.resize(entry.size() + 1);
+    for (uint8_t i = 0; i < 16; i++)
+    {
+        // Speed-up for the common case of 0 and 15 alphas.
+        if (i == 1 && !subtree->HasIntermediateChildren())
+            i += 14;
+        
+        DictTreeNode *child = subtree->GetChild(i);
+        if (child)
+        {
+            newentry.at(entry.size()) = i;
+            fill_tree_suffixes(root, child, newentry);
+        }
+    }
+}
+
+// Construct a lookup tree from the dictionary entries.
+static DictTreeNode* construct_tree(const std::vector<DataFile::dictentry_t> &dictionary,
+                                    TreeAllocator &storage, bool fast)
+{
+    DictTreeNode* root = storage.allocate();
+    
+    // Populate the hardcoded entries for 0 to 15 alpha.
+    for (int j = 0; j < 16; j++)
+    {
+        DictTreeNode *node = storage.allocate();
+        node->SetIndex(j);
+        node->SetRef(false);
+        node->SetLength(1);
+        root->SetChild(j, node);
+    }
+    
+    // Populate the actual dictionary entries
+    size_t i = DICT_START;
+    for (DataFile::dictentry_t d : dictionary)
+    {
+        if (!d.replacement.size())
+            break;
+        
+        add_tree_entry(d.replacement, i, d.ref_encode, root, storage);
+        i++;
+    }
+    
+    if (!fast)
+    {
+        // Populate the fill entries for rest of dictionary
+        for (; i < 256; i++)
+        {
+            DataFile::pixels_t pixels;
+            size_t bitcount = fillentry_bitcount(i);
+            uint8_t byte = i - DICT_START7BIT;
+            for (size_t j = 0; j < bitcount; j++)
+            {
+                uint8_t p = (byte & (1 << j)) ? 15 : 0;
+                pixels.push_back(p);
+            }
+            
+            add_tree_entry(pixels, i, false, root, storage);
+        }
+        
+        // Fill in the suffix pointers for optimal encoding
+        DataFile::pixels_t nullentry;
+        fill_tree_suffixes(root, root, nullentry);
+    }
+    
+    return root;
+}
+
+// Structure for keeping track of the shortest encoding to reach particular
+// point of the pixel string.
+struct encoding_link_t
+{
+    // Index of the position prior to the last dictionary entry.
+    size_t previous;
+    
+    // Index of the dictionary entry that brings us to this point.
+    int index;
+    
+    // Number of links to get here from the start of the string.
+    size_t length;
+    
+    constexpr encoding_link_t(): previous(0), index(-1), length(9999999) {}
+};
+
+// Perform the reference encoding for a glyph entry (optimal version).
+// Uses a modified Aho-Corasick algorithm combined with breadth first search
+// to find the shortest representation.
+static encoded_font_t::refstring_t encode_ref_slow(const DataFile::pixels_t &pixels,
+                                                   const DictTreeNode *root,
+                                                   bool is_glyph)
+{
+    // Chain of encodings. Each entry in this array corresponds to a position
+    // in the pixel string.
+    std::unique_ptr<encoding_link_t[]> chain(new encoding_link_t[pixels.size() + 1]);
+    
+    chain[0].previous = 0;
+    chain[0].index = 0;
+    chain[0].length = 0;
+    
+    // Read the pixels one-by-one and update the encoding links accordingly.
+    const DictTreeNode *node = root;
+    for (size_t pos = 0; pos < pixels.size(); pos++)
+    {
+        uint8_t pixel = pixels.at(pos);
+        const DictTreeNode *branch = node->GetChild(pixel);
+        
+        while (!branch)
+        {
+            // Cannot expand this sequence, defer to suffix.
+            node = node->GetSuffix();
+            branch = node->GetChild(pixel);
+        }
+        
+        node = branch;
+        
+        // We have arrived at a new node, add it and any proper suffixes to
+        // the link chain.
+        const DictTreeNode *suffix = node;
+        while (suffix != root)
+        {
+            if (suffix->GetIndex() >= 0 && (is_glyph || !suffix->GetRef()))
+            {
+                encoding_link_t link;
+                link.previous = pos + 1 - suffix->GetLength();
+                link.index = suffix->GetIndex();
+                link.length = chain[link.previous].length + 1;
+                
+                if (link.length < chain[pos + 1].length)
+                    chain[pos + 1] = link;
+            }
+            suffix = suffix->GetSuffix();
+        }
+    }
+    
+    // Check if we can shorten the final encoding using REF_FILLZEROS.
+    if (is_glyph)
+    {
+        for (size_t pos = pixels.size() - 1; pos > 0; pos--)
+        {
+            if (pixels.at(pos) != 0)
+                break;
+            
+            encoding_link_t link;
+            link.previous = pos;
+            link.index = REF_FILLZEROS;
+            link.length = chain[pos].length + 1;
+            
+            if (link.length <= chain[pixels.size()].length)
+                chain[pixels.size()] = link;
+        }
+    }
+    
+    // Backtrack from the final link back to the start and construct the
+    // encoded string.
+    encoded_font_t::refstring_t result;
+    size_t len = chain[pixels.size()].length;
+    result.resize(len);
+    
+    size_t pos = pixels.size();
+    for (size_t i = len; i > 0; i--)
+    {
+        result.at(i - 1) = chain[pos].index;
+        pos = chain[pos].previous;
+    }
+    
+    return result;
+}
+
+// Walk the tree as far as possible following the given pixel string iterator.
+// Returns number of pixels encoded, and index is set to the dictionary reference.
+static size_t walk_tree(const DictTreeNode *tree,
+                        DataFile::pixels_t::const_iterator pixels,
+                        DataFile::pixels_t::const_iterator pixelsend,
+                        int &index, bool is_glyph)
+{
+    size_t best_length = 0;
+    size_t length = 0;
+    index = -1;
+    
+    const DictTreeNode* node = tree;
+    while (pixels != pixelsend)
+    {
+        uint8_t pixel = *pixels++;
+        node = node->GetChild(pixel);
+        
+        if (!node)
+            break;
+        
+        length++;
+        
+        if (is_glyph || !node->GetRef())
+        {
+            if (node->GetIndex() >= 0)
+            {
+                index = node->GetIndex();
+                best_length = length;
+            }
+        }
+    }
+    
+    if (index < 0)
+        throw std::logic_error("walk_tree failed to find a valid encoding");
+    
+    return best_length;
+}
+
+// Perform the reference encoding for a glyph entry (fast version).
+// Uses a simple greedy search to find select the encodings.
+static encoded_font_t::refstring_t encode_ref_fast(const DataFile::pixels_t &pixels,
+                                                   const DictTreeNode *tree,
+                                                   bool is_glyph)
+{
+    encoded_font_t::refstring_t result;
+    
+    // Strip any zeroes from end
+    size_t end = pixels.size();
+    
+    if (is_glyph)
+    {
+        while (end > 0 && pixels.at(end - 1) == 0) end--;
+    }
+    
+    size_t i = 0;
+    while (i < end)
+    {
+        int index;
+        i += walk_tree(tree, pixels.begin() + i, pixels.end(), index, is_glyph);
+        result.push_back(index);
+    }
+    
+    if (i < pixels.size())
+        result.push_back(REF_FILLZEROS);
+    
+    return result;
+}
+
+static encoded_font_t::refstring_t encode_ref(const DataFile::pixels_t &pixels,
+                                              const DictTreeNode *tree,
+                                              bool is_glyph, bool fast)
+{
+    if (fast)
+        return encode_ref_fast(pixels, tree, is_glyph);
+    else
+        return encode_ref_slow(pixels, tree, is_glyph);
+}
+
+// Compare dictionary entries by their coding type.
+// Sorts RLE-encoded entries first and any empty entries last.
+static bool cmp_dict_coding(const DataFile::dictentry_t &a,
+                            const DataFile::dictentry_t &b)
+{
+    if (a.replacement.size() == 0 && b.replacement.size() != 0)
+        return false;
+    else if (a.replacement.size() != 0 && b.replacement.size() == 0)
+        return true;
+    else if (a.ref_encode == false && b.ref_encode == true)
+        return true;
+    else
+        return false;
+}
+
+size_t estimate_tree_node_count(const std::vector<DataFile::dictentry_t> &dict)
+{
+    size_t count = DICT_START; // Preallocated entries
+    for (const DataFile::dictentry_t &d: dict)
+    {
+        count += d.replacement.size();
+    }
+    count += 128 * 7; // Fill entries
+    return count;
+}
+
+std::unique_ptr<encoded_font_t> encode_font(const DataFile &datafile,
+                                            bool fast)
+{
+    std::unique_ptr<encoded_font_t> result(new encoded_font_t);
+    
+    // Sort the dictionary so that RLE-coded entries come first.
+    // This way the two are easy to distinguish based on index.
+    std::vector<DataFile::dictentry_t> sorted_dict = datafile.GetDictionary();
+    std::stable_sort(sorted_dict.begin(), sorted_dict.end(), cmp_dict_coding);
+    
+    // Build the binary tree for looking up references.
+    size_t count = estimate_tree_node_count(sorted_dict);
+    TreeAllocator allocator(count);
+    DictTreeNode* tree = construct_tree(sorted_dict, allocator, fast);
+    
+    // Encode the dictionary entries, using either RLE or reference method.
+    for (const DataFile::dictentry_t &d : sorted_dict)
+    {
+        if (d.replacement.size() == 0)
+        {
+            continue;
+        }
+        else if (d.ref_encode)
+        {
+            result->ref_dictionary.push_back(encode_ref(d.replacement, tree, false, fast));
+        }
+        else
+        {
+            result->rle_dictionary.push_back(encode_rle(d.replacement));
+        }
+    }
+    
+    // Then reference-encode the glyphs
+    for (const DataFile::glyphentry_t &g : datafile.GetGlyphTable())
+    {
+        result->glyphs.push_back(encode_ref(g.data, tree, true, fast));
+    }
+    
+    // Optionally verify that the encoding was correct.
+    if (!fast)
+    {
+        for (size_t i = 0; i < datafile.GetGlyphCount(); i++)
+        {
+            std::unique_ptr<DataFile::pixels_t> decoded = 
+                decode_glyph(*result, i, datafile.GetFontInfo());
+            if (*decoded != datafile.GetGlyphEntry(i).data)
+            {
+                auto iter = std::mismatch(decoded->begin(), decoded->end(),
+                                          datafile.GetGlyphEntry(i).data.begin());
+                size_t pos = iter.first - decoded->begin();
+                throw std::logic_error("verification of glyph " + std::to_string(i) +
+                    " failed at position " + std::to_string(pos));
+            }
+        }
+    }
+    
+    return result;
+}
+
+size_t get_encoded_size(const encoded_font_t &encoded)
+{
+    size_t total = 0;
+    for (const encoded_font_t::rlestring_t &r : encoded.rle_dictionary)
+    {
+        total += r.size();
+        
+        if (r.size() != 0)
+            total += 2; // Offset table entry
+    }
+    for (const encoded_font_t::refstring_t &r : encoded.ref_dictionary)
+    {
+        total += r.size();
+        
+        if (r.size() != 0)
+            total += 2; // Offset table entry
+    }
+    for (const encoded_font_t::refstring_t &r : encoded.glyphs)
+    {
+        total += r.size();
+        total += 2; // Offset table entry
+        total += 1; // Width table entry
+    }
+    return total;
+}
+
+std::unique_ptr<DataFile::pixels_t> decode_glyph(
+    const encoded_font_t &encoded,
+    const encoded_font_t::refstring_t &refstring,
+    const DataFile::fontinfo_t &fontinfo)
+{
+    std::unique_ptr<DataFile::pixels_t> result(new DataFile::pixels_t);
+    
+    for (uint8_t ref : refstring)
+    {
+        if (ref <= 15)
+        {
+            result->push_back(ref);
+        }
+        else if (ref == REF_FILLZEROS)
+        {
+            result->resize(fontinfo.max_width * fontinfo.max_height, 0);
+        }
+        else if (ref < DICT_START)
+        {
+            throw std::logic_error("unknown code: " + std::to_string(ref));
+        }
+        else if (ref - DICT_START < (int)encoded.rle_dictionary.size())
+        {
+            for (uint8_t rle : encoded.rle_dictionary.at(ref - DICT_START))
+            {
+                if ((rle & RLE_CODEMASK) == RLE_ZEROS)
+                {
+                    for (int i = 0; i < (rle & RLE_VALMASK); i++)
+                    {
+                        result->push_back(0);
+                    }
+                }
+                else if ((rle & RLE_CODEMASK) == RLE_64ZEROS)
+                {
+                    for (int i = 0; i < ((rle & RLE_VALMASK) + 1) * 64; i++)
+                    {
+                        result->push_back(0);
+                    }
+                }
+                else if ((rle & RLE_CODEMASK) == RLE_ONES)
+                {
+                    for (int i = 0; i < (rle & RLE_VALMASK) + 1; i++)
+                    {
+                        result->push_back(15);
+                    }
+                }
+                else if ((rle & RLE_CODEMASK) == RLE_SHADE)
+                {
+                    uint8_t count, alpha;
+                    count = ((rle & RLE_VALMASK) >> 4) + 1;
+                    alpha = ((rle & RLE_VALMASK) & 0xF);
+                    for (int i = 0; i < count; i++)
+                    {
+                        result->push_back(alpha);
+                    }
+                }
+            }
+        }
+        else if (ref - DICT_START - encoded.rle_dictionary.size() < encoded.ref_dictionary.size())
+        {
+            size_t index = ref - DICT_START - encoded.rle_dictionary.size();
+            std::unique_ptr<DataFile::pixels_t> part =
+                decode_glyph(encoded, encoded.ref_dictionary.at(index),
+                             fontinfo);
+            result->insert(result->end(), part->begin(), part->end());
+        }
+        else
+        {
+            size_t bitcount = fillentry_bitcount(ref);
+            
+            uint8_t byte = ref - DICT_START7BIT;
+            for (size_t i = 0; i < bitcount; i++)
+            {
+                uint8_t p = (byte & (1 << i)) ? 15 : 0;
+                result->push_back(p);
+            }
+        }
+    }
+    
+    return result;
+}
+
+std::unique_ptr<DataFile::pixels_t> decode_glyph(
+    const encoded_font_t &encoded, size_t index,
+    const DataFile::fontinfo_t &fontinfo)
+{
+    return decode_glyph(encoded, encoded.glyphs.at(index), fontinfo);
+}
+
+}}