diff options
Diffstat (limited to 'tools/mcufontencoder/src/encode_rlefont.cc')
-rw-r--r-- | tools/mcufontencoder/src/encode_rlefont.cc | 735 |
1 files changed, 735 insertions, 0 deletions
diff --git a/tools/mcufontencoder/src/encode_rlefont.cc b/tools/mcufontencoder/src/encode_rlefont.cc new file mode 100644 index 00000000..602f6033 --- /dev/null +++ b/tools/mcufontencoder/src/encode_rlefont.cc @@ -0,0 +1,735 @@ +#include "encode_rlefont.hh" +#include <algorithm> +#include <stdexcept> +#include "ccfixes.hh" + +// Number of reserved codes before the dictionary entries. +#define DICT_START 24 + +// Special reference to mean "fill with zeros to the end of the glyph" +#define REF_FILLZEROS 16 + +// RLE codes +#define RLE_CODEMASK 0xC0 +#define RLE_VALMASK 0x3F +#define RLE_ZEROS 0x00 // 0 to 63 zeros +#define RLE_64ZEROS 0x40 // (1 to 64) * 64 zeros +#define RLE_ONES 0x80 // 1 to 64 full alphas +#define RLE_SHADE 0xC0 // 1 to 4 partial alphas + +// Dictionary "fill entries" for encoding bits directly. +#define DICT_START7BIT 4 +#define DICT_START6BIT 132 +#define DICT_START5BIT 196 +#define DICT_START4BIT 228 +#define DICT_START3BIT 244 +#define DICT_START2BIT 252 + +namespace mcufont { +namespace rlefont { + +// Get bit count for the "fill entries" +static size_t fillentry_bitcount(size_t index) +{ + if (index >= DICT_START2BIT) + return 2; + else if (index >= DICT_START3BIT) + return 3; + else if (index >= DICT_START4BIT) + return 4; + else if (index >= DICT_START5BIT) + return 5; + else if (index >= DICT_START6BIT) + return 6; + else + return 7; +} + +// Count the number of equal pixels at the beginning of the pixelstring. +static size_t prefix_length(const DataFile::pixels_t &pixels, size_t pos) +{ + uint8_t pixel = pixels.at(pos); + size_t count = 1; + while (pos + count < pixels.size() && + pixels.at(pos + count) == pixel) + { + count++; + } + return count; +} + +// Perform the RLE encoding for a dictionary entry. +static encoded_font_t::rlestring_t encode_rle(const DataFile::pixels_t &pixels) +{ + encoded_font_t::rlestring_t result; + + size_t pos = 0; + while (pos < pixels.size()) + { + uint8_t pixel = pixels.at(pos); + size_t count = prefix_length(pixels, pos); + pos += count; + + if (pixel == 0) + { + // Up to 63 zeros can be encoded with RLE_ZEROS. If there are more, + // encode using RLE_64ZEROS, and then whatever remains with RLE_ZEROS. + while (count >= 64) + { + size_t c = (count > 4096) ? 64 : (count / 64); + result.push_back(RLE_64ZEROS | (c - 1)); + count -= c * 64; + } + + if (count) + { + result.push_back(RLE_ZEROS | count); + } + } + else if (pixel == 15) + { + // Encode ones. + while (count) + { + size_t c = (count > 64) ? 64 : count; + result.push_back(RLE_ONES | (c - 1)); + count -= c; + } + } + else + { + // Encode shades. + while (count) + { + size_t c = (count > 4) ? 4 : count; + result.push_back(RLE_SHADE | ((c - 1) << 4) | pixel); + count -= c; + } + } + } + + return result; +} + +// We use a tree structure to represent the dictionary entries. +// Using this tree, we can perform a combined Aho-Corasick string matching +// and breadth-first search to find the optimal encoding of glyph data. +class DictTreeNode +{ +public: + constexpr DictTreeNode(): + m_index(-1), + m_ref(false), + m_length(0), + m_child0(nullptr), + m_child15(nullptr), + m_suffix(nullptr) + {} + + void SetChild(uint8_t p, DictTreeNode *child) + { + if (p == 0) + m_child0 = child; + else if (p == 15) + m_child15 = child; + else if (p > 15) + throw std::logic_error("invalid pixel alpha: " + std::to_string(p)); + else + { + if (!m_children) + { + m_children.reset(new DictTreeNode*[14]()); + } + m_children[p - 1] = child; + } + } + + DictTreeNode* GetChild(uint8_t p) const + { + if (p == 0) + return m_child0; + else if (p == 15) + return m_child15; + else if (p > 15) + throw std::logic_error("invalid pixel alpha: " + std::to_string(p)); + else if (!m_children) + return nullptr; + else + return m_children[p - 1]; + } + + bool HasIntermediateChildren() const { return m_children != nullptr; } + + int GetIndex() const { return m_index; } + void SetIndex(int index) { m_index = index; } + bool GetRef() const { return m_ref; } + void SetRef(bool ref) { m_ref = ref; } + size_t GetLength() const { return m_length; } + void SetLength(size_t length) { m_length = length; } + DictTreeNode *GetSuffix() const { return m_suffix; } + void SetSuffix(DictTreeNode *suffix) { m_suffix = suffix; } + +private: + // Index of dictionary entry or -1 if just a intermediate node. + int m_index; + + // True for ref-encoded dictionary entries. Used to avoid recursion when + // encoding them. + bool m_ref; + + // Length of the corresponding dictionary entry replacement. + // Equals the distance from the tree root. + size_t m_length; + + // Most tree nodes will only ever contains children for 0 or 15. + // Therefore the array for other nodes is allocated only on demand. + DictTreeNode *m_child0; + DictTreeNode *m_child15; + std::unique_ptr<DictTreeNode*[]> m_children; + + // Pointer to the longest suffix of this entry that exists in the + // dictionary. + DictTreeNode *m_suffix; +}; + +// Preallocated array for tree nodes +class TreeAllocator +{ +public: + TreeAllocator(size_t count) + { + m_storage.reset(new DictTreeNode[count]); + m_next = m_storage.get(); + m_left = count; + } + + DictTreeNode *allocate() + { + if (m_left == 0) + throw std::logic_error("Ran out of preallocated entries"); + + m_left--; + return m_next++; + } + +private: + std::unique_ptr<DictTreeNode[]> m_storage; + DictTreeNode *m_next; + size_t m_left; +}; + +// Add a new dictionary entry to the tree. Adds the intermediate nodes, but +// does not yet fill the suffix pointers. +static DictTreeNode* add_tree_entry(const DataFile::pixels_t &entry, int index, + bool ref_encoded, DictTreeNode *root, + TreeAllocator &storage) +{ + DictTreeNode* node = root; + for (uint8_t p : entry) + { + DictTreeNode* branch = node->GetChild(p); + if (!branch) + { + branch = storage.allocate(); + node->SetChild(p, branch); + } + + node = branch; + } + + // Replace the entry if it either does not yet have an encoding, or if + // the new entry is non-ref (i.e. can be used in more situations). + if (node->GetIndex() < 0 || (node->GetRef() && !ref_encoded)) + { + node->SetIndex(index); + node->SetRef(ref_encoded); + node->SetLength(entry.size()); + } + + return node; +} + +// Walk the tree and find if the entry exists in the tree. If it does, +// returns a pointer to it, otherwise nullptr. +static DictTreeNode *find_tree_node(DataFile::pixels_t::const_iterator begin, + DataFile::pixels_t::const_iterator end, + DictTreeNode *root) +{ + DictTreeNode* node = root; + while (begin != end) + { + uint8_t pixel = *begin++; + node = node->GetChild(pixel); + + if (!node) + return nullptr; + } + + return node; +} + +// Fill in the suffix pointers recursively for the given subtree. +static void fill_tree_suffixes(DictTreeNode *root, DictTreeNode *subtree, + const DataFile::pixels_t &entry) +{ + for (size_t i = 1; i < entry.size(); i++) + { + DictTreeNode *node = find_tree_node(entry.begin() + i, entry.end(), root); + if (node) + { + subtree->SetSuffix(node); + break; + } + } + + if (!subtree->GetSuffix()) + subtree->SetSuffix(root); + + DataFile::pixels_t newentry(entry); + newentry.resize(entry.size() + 1); + for (uint8_t i = 0; i < 16; i++) + { + // Speed-up for the common case of 0 and 15 alphas. + if (i == 1 && !subtree->HasIntermediateChildren()) + i += 14; + + DictTreeNode *child = subtree->GetChild(i); + if (child) + { + newentry.at(entry.size()) = i; + fill_tree_suffixes(root, child, newentry); + } + } +} + +// Construct a lookup tree from the dictionary entries. +static DictTreeNode* construct_tree(const std::vector<DataFile::dictentry_t> &dictionary, + TreeAllocator &storage, bool fast) +{ + DictTreeNode* root = storage.allocate(); + + // Populate the hardcoded entries for 0 to 15 alpha. + for (int j = 0; j < 16; j++) + { + DictTreeNode *node = storage.allocate(); + node->SetIndex(j); + node->SetRef(false); + node->SetLength(1); + root->SetChild(j, node); + } + + // Populate the actual dictionary entries + size_t i = DICT_START; + for (DataFile::dictentry_t d : dictionary) + { + if (!d.replacement.size()) + break; + + add_tree_entry(d.replacement, i, d.ref_encode, root, storage); + i++; + } + + if (!fast) + { + // Populate the fill entries for rest of dictionary + for (; i < 256; i++) + { + DataFile::pixels_t pixels; + size_t bitcount = fillentry_bitcount(i); + uint8_t byte = i - DICT_START7BIT; + for (size_t j = 0; j < bitcount; j++) + { + uint8_t p = (byte & (1 << j)) ? 15 : 0; + pixels.push_back(p); + } + + add_tree_entry(pixels, i, false, root, storage); + } + + // Fill in the suffix pointers for optimal encoding + DataFile::pixels_t nullentry; + fill_tree_suffixes(root, root, nullentry); + } + + return root; +} + +// Structure for keeping track of the shortest encoding to reach particular +// point of the pixel string. +struct encoding_link_t +{ + // Index of the position prior to the last dictionary entry. + size_t previous; + + // Index of the dictionary entry that brings us to this point. + int index; + + // Number of links to get here from the start of the string. + size_t length; + + constexpr encoding_link_t(): previous(0), index(-1), length(9999999) {} +}; + +// Perform the reference encoding for a glyph entry (optimal version). +// Uses a modified Aho-Corasick algorithm combined with breadth first search +// to find the shortest representation. +static encoded_font_t::refstring_t encode_ref_slow(const DataFile::pixels_t &pixels, + const DictTreeNode *root, + bool is_glyph) +{ + // Chain of encodings. Each entry in this array corresponds to a position + // in the pixel string. + std::unique_ptr<encoding_link_t[]> chain(new encoding_link_t[pixels.size() + 1]); + + chain[0].previous = 0; + chain[0].index = 0; + chain[0].length = 0; + + // Read the pixels one-by-one and update the encoding links accordingly. + const DictTreeNode *node = root; + for (size_t pos = 0; pos < pixels.size(); pos++) + { + uint8_t pixel = pixels.at(pos); + const DictTreeNode *branch = node->GetChild(pixel); + + while (!branch) + { + // Cannot expand this sequence, defer to suffix. + node = node->GetSuffix(); + branch = node->GetChild(pixel); + } + + node = branch; + + // We have arrived at a new node, add it and any proper suffixes to + // the link chain. + const DictTreeNode *suffix = node; + while (suffix != root) + { + if (suffix->GetIndex() >= 0 && (is_glyph || !suffix->GetRef())) + { + encoding_link_t link; + link.previous = pos + 1 - suffix->GetLength(); + link.index = suffix->GetIndex(); + link.length = chain[link.previous].length + 1; + + if (link.length < chain[pos + 1].length) + chain[pos + 1] = link; + } + suffix = suffix->GetSuffix(); + } + } + + // Check if we can shorten the final encoding using REF_FILLZEROS. + if (is_glyph) + { + for (size_t pos = pixels.size() - 1; pos > 0; pos--) + { + if (pixels.at(pos) != 0) + break; + + encoding_link_t link; + link.previous = pos; + link.index = REF_FILLZEROS; + link.length = chain[pos].length + 1; + + if (link.length <= chain[pixels.size()].length) + chain[pixels.size()] = link; + } + } + + // Backtrack from the final link back to the start and construct the + // encoded string. + encoded_font_t::refstring_t result; + size_t len = chain[pixels.size()].length; + result.resize(len); + + size_t pos = pixels.size(); + for (size_t i = len; i > 0; i--) + { + result.at(i - 1) = chain[pos].index; + pos = chain[pos].previous; + } + + return result; +} + +// Walk the tree as far as possible following the given pixel string iterator. +// Returns number of pixels encoded, and index is set to the dictionary reference. +static size_t walk_tree(const DictTreeNode *tree, + DataFile::pixels_t::const_iterator pixels, + DataFile::pixels_t::const_iterator pixelsend, + int &index, bool is_glyph) +{ + size_t best_length = 0; + size_t length = 0; + index = -1; + + const DictTreeNode* node = tree; + while (pixels != pixelsend) + { + uint8_t pixel = *pixels++; + node = node->GetChild(pixel); + + if (!node) + break; + + length++; + + if (is_glyph || !node->GetRef()) + { + if (node->GetIndex() >= 0) + { + index = node->GetIndex(); + best_length = length; + } + } + } + + if (index < 0) + throw std::logic_error("walk_tree failed to find a valid encoding"); + + return best_length; +} + +// Perform the reference encoding for a glyph entry (fast version). +// Uses a simple greedy search to find select the encodings. +static encoded_font_t::refstring_t encode_ref_fast(const DataFile::pixels_t &pixels, + const DictTreeNode *tree, + bool is_glyph) +{ + encoded_font_t::refstring_t result; + + // Strip any zeroes from end + size_t end = pixels.size(); + + if (is_glyph) + { + while (end > 0 && pixels.at(end - 1) == 0) end--; + } + + size_t i = 0; + while (i < end) + { + int index; + i += walk_tree(tree, pixels.begin() + i, pixels.end(), index, is_glyph); + result.push_back(index); + } + + if (i < pixels.size()) + result.push_back(REF_FILLZEROS); + + return result; +} + +static encoded_font_t::refstring_t encode_ref(const DataFile::pixels_t &pixels, + const DictTreeNode *tree, + bool is_glyph, bool fast) +{ + if (fast) + return encode_ref_fast(pixels, tree, is_glyph); + else + return encode_ref_slow(pixels, tree, is_glyph); +} + +// Compare dictionary entries by their coding type. +// Sorts RLE-encoded entries first and any empty entries last. +static bool cmp_dict_coding(const DataFile::dictentry_t &a, + const DataFile::dictentry_t &b) +{ + if (a.replacement.size() == 0 && b.replacement.size() != 0) + return false; + else if (a.replacement.size() != 0 && b.replacement.size() == 0) + return true; + else if (a.ref_encode == false && b.ref_encode == true) + return true; + else + return false; +} + +size_t estimate_tree_node_count(const std::vector<DataFile::dictentry_t> &dict) +{ + size_t count = DICT_START; // Preallocated entries + for (const DataFile::dictentry_t &d: dict) + { + count += d.replacement.size(); + } + count += 128 * 7; // Fill entries + return count; +} + +std::unique_ptr<encoded_font_t> encode_font(const DataFile &datafile, + bool fast) +{ + std::unique_ptr<encoded_font_t> result(new encoded_font_t); + + // Sort the dictionary so that RLE-coded entries come first. + // This way the two are easy to distinguish based on index. + std::vector<DataFile::dictentry_t> sorted_dict = datafile.GetDictionary(); + std::stable_sort(sorted_dict.begin(), sorted_dict.end(), cmp_dict_coding); + + // Build the binary tree for looking up references. + size_t count = estimate_tree_node_count(sorted_dict); + TreeAllocator allocator(count); + DictTreeNode* tree = construct_tree(sorted_dict, allocator, fast); + + // Encode the dictionary entries, using either RLE or reference method. + for (const DataFile::dictentry_t &d : sorted_dict) + { + if (d.replacement.size() == 0) + { + continue; + } + else if (d.ref_encode) + { + result->ref_dictionary.push_back(encode_ref(d.replacement, tree, false, fast)); + } + else + { + result->rle_dictionary.push_back(encode_rle(d.replacement)); + } + } + + // Then reference-encode the glyphs + for (const DataFile::glyphentry_t &g : datafile.GetGlyphTable()) + { + result->glyphs.push_back(encode_ref(g.data, tree, true, fast)); + } + + // Optionally verify that the encoding was correct. + if (!fast) + { + for (size_t i = 0; i < datafile.GetGlyphCount(); i++) + { + std::unique_ptr<DataFile::pixels_t> decoded = + decode_glyph(*result, i, datafile.GetFontInfo()); + if (*decoded != datafile.GetGlyphEntry(i).data) + { + auto iter = std::mismatch(decoded->begin(), decoded->end(), + datafile.GetGlyphEntry(i).data.begin()); + size_t pos = iter.first - decoded->begin(); + throw std::logic_error("verification of glyph " + std::to_string(i) + + " failed at position " + std::to_string(pos)); + } + } + } + + return result; +} + +size_t get_encoded_size(const encoded_font_t &encoded) +{ + size_t total = 0; + for (const encoded_font_t::rlestring_t &r : encoded.rle_dictionary) + { + total += r.size(); + + if (r.size() != 0) + total += 2; // Offset table entry + } + for (const encoded_font_t::refstring_t &r : encoded.ref_dictionary) + { + total += r.size(); + + if (r.size() != 0) + total += 2; // Offset table entry + } + for (const encoded_font_t::refstring_t &r : encoded.glyphs) + { + total += r.size(); + total += 2; // Offset table entry + total += 1; // Width table entry + } + return total; +} + +std::unique_ptr<DataFile::pixels_t> decode_glyph( + const encoded_font_t &encoded, + const encoded_font_t::refstring_t &refstring, + const DataFile::fontinfo_t &fontinfo) +{ + std::unique_ptr<DataFile::pixels_t> result(new DataFile::pixels_t); + + for (uint8_t ref : refstring) + { + if (ref <= 15) + { + result->push_back(ref); + } + else if (ref == REF_FILLZEROS) + { + result->resize(fontinfo.max_width * fontinfo.max_height, 0); + } + else if (ref < DICT_START) + { + throw std::logic_error("unknown code: " + std::to_string(ref)); + } + else if (ref - DICT_START < (int)encoded.rle_dictionary.size()) + { + for (uint8_t rle : encoded.rle_dictionary.at(ref - DICT_START)) + { + if ((rle & RLE_CODEMASK) == RLE_ZEROS) + { + for (int i = 0; i < (rle & RLE_VALMASK); i++) + { + result->push_back(0); + } + } + else if ((rle & RLE_CODEMASK) == RLE_64ZEROS) + { + for (int i = 0; i < ((rle & RLE_VALMASK) + 1) * 64; i++) + { + result->push_back(0); + } + } + else if ((rle & RLE_CODEMASK) == RLE_ONES) + { + for (int i = 0; i < (rle & RLE_VALMASK) + 1; i++) + { + result->push_back(15); + } + } + else if ((rle & RLE_CODEMASK) == RLE_SHADE) + { + uint8_t count, alpha; + count = ((rle & RLE_VALMASK) >> 4) + 1; + alpha = ((rle & RLE_VALMASK) & 0xF); + for (int i = 0; i < count; i++) + { + result->push_back(alpha); + } + } + } + } + else if (ref - DICT_START - encoded.rle_dictionary.size() < encoded.ref_dictionary.size()) + { + size_t index = ref - DICT_START - encoded.rle_dictionary.size(); + std::unique_ptr<DataFile::pixels_t> part = + decode_glyph(encoded, encoded.ref_dictionary.at(index), + fontinfo); + result->insert(result->end(), part->begin(), part->end()); + } + else + { + size_t bitcount = fillentry_bitcount(ref); + + uint8_t byte = ref - DICT_START7BIT; + for (size_t i = 0; i < bitcount; i++) + { + uint8_t p = (byte & (1 << i)) ? 15 : 0; + result->push_back(p); + } + } + } + + return result; +} + +std::unique_ptr<DataFile::pixels_t> decode_glyph( + const encoded_font_t &encoded, size_t index, + const DataFile::fontinfo_t &fontinfo) +{ + return decode_glyph(encoded, encoded.glyphs.at(index), fontinfo); +} + +}} |