diff options
| -rw-r--r-- | googletest/include/gtest/internal/gtest-internal.h | 14 | ||||
| -rw-r--r-- | googletest/src/gtest-all.cc | 3 | ||||
| -rw-r--r-- | googletest/src/gtest-edit-distance.cc | 507 | ||||
| -rw-r--r-- | googletest/src/gtest.cc | 244 | ||||
| -rw-r--r-- | googletest/test/gtest_unittest.cc | 77 | 
5 files changed, 265 insertions, 580 deletions
| diff --git a/googletest/include/gtest/internal/gtest-internal.h b/googletest/include/gtest/internal/gtest-internal.h index 1c2824dd..9593a45a 100644 --- a/googletest/include/gtest/internal/gtest-internal.h +++ b/googletest/include/gtest/internal/gtest-internal.h @@ -159,15 +159,15 @@ GTEST_DISABLE_MSC_WARNINGS_POP_()  //  4275  #endif  // GTEST_HAS_EXCEPTIONS +namespace edit_distance {  // Returns the optimal edits to go from 'left' to 'right'.  // All edits cost the same, with replace having lower priority than -// add/remove. Returns an approximation of the maximum memory used in -// 'memory_usage' if non-null. -// Uses a Dijkstra search, with a couple of simple bells and whistles added on. -enum EditType { kEditMatch, kEditAdd, kEditRemove, kEditReplace }; +// add/remove. +// Simple implementation of the Wagner-Fischer algorithm. +// See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm +enum EditType { kMatch, kAdd, kRemove, kReplace };  GTEST_API_ std::vector<EditType> CalculateOptimalEdits( -    const std::vector<size_t>& left, const std::vector<size_t>& right, -    size_t* memory_usage = NULL); +    const std::vector<size_t>& left, const std::vector<size_t>& right);  // Same as above, but the input is represented as strings.  GTEST_API_ std::vector<EditType> CalculateOptimalEdits( @@ -179,6 +179,8 @@ GTEST_API_ std::string CreateUnifiedDiff(const std::vector<std::string>& left,                                           const std::vector<std::string>& right,                                           size_t context = 2); +}  // namespace edit_distance +  // Calculate the diff between 'left' and 'right' and return it in unified diff  // format.  // If not null, stores in 'total_line_count' the total number of lines found diff --git a/googletest/src/gtest-all.cc b/googletest/src/gtest-all.cc index 3363d398..b217a180 100644 --- a/googletest/src/gtest-all.cc +++ b/googletest/src/gtest-all.cc @@ -38,11 +38,10 @@  #include "gtest/gtest.h"  // The following lines pull in the real gtest *.cc files. +#include "src/gtest.cc"  #include "src/gtest-death-test.cc" -#include "src/gtest-edit-distance.cc"  #include "src/gtest-filepath.cc"  #include "src/gtest-port.cc"  #include "src/gtest-printers.cc"  #include "src/gtest-test-part.cc"  #include "src/gtest-typed-test.cc" -#include "src/gtest.cc" diff --git a/googletest/src/gtest-edit-distance.cc b/googletest/src/gtest-edit-distance.cc deleted file mode 100644 index 331a3b9b..00000000 --- a/googletest/src/gtest-edit-distance.cc +++ /dev/null @@ -1,507 +0,0 @@ -// Copyright 2018, Google Inc. -// All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -//     * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -//     * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following disclaimer -// in the documentation and/or other materials provided with the -// distribution. -//     * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// -// Internal helper functions for finding optimal edit transformations -// between strings. - -#include "gtest/gtest.h" - -#include <functional> -#include <list> -#include <ostream>  // NOLINT -#include <queue> -#include <vector> - -namespace testing { -namespace internal { -namespace { - -// The following implement data structures and code for a Dijkstra-search -// based implementation of optimal edit distance. - -// Posible states a node can be in. Either a node is unsettled (it hasn't been -// drawn from the priority queue yet), or it is settled and a back-link to its -// parent node is fixed. -enum EditSearchState { -  kUnsettled, -  kMatchParent, -  kAddParent, -  kRemoveParent, -  kReplaceParent -}; - -// Custom container for search states. This is smaller and faster than a hash -// map, because the used states are dense along diagonals. -// Specifically, each state requires only 1 byte, whereas a hash_map would -// require storing the key, which would come to at least 8 bytes. std::map has -// an extra 32 bytes per node (3 pointers + 1 byte, padded), so even though -// there are circumstances where this class can have kBlockSize overhead per -// state, on average it does better than 40 bytes of overhead per state. -// In addition, in unopt builds (the usual way tests are run) the fewer -// allocations + better locality has this method running 10-50x faster than -// std::map for inputs that are large enough to measure. -class EditSearchMap { - public: -  EditSearchMap(size_t left_size, size_t right_size) -      : left_size_(left_size), right_size_(right_size) { -    GTEST_CHECK_(left_size_ == left_size && right_size_ == right_size) -        << "Overflow in size: Arguments too large"; -  } - -  // Gets a mutable reference to a state - this is actually of type -  // EditSearchState - inserting if it does not exist. -  unsigned char& insert(UInt32 left, UInt32 right) { -    std::vector<UInt32>* vec; -    size_t index1; -    size_t index2; -    if (left > right) { -      vec = &left_nodes_; -      index1 = left - right - 1; -      index2 = right; -    } else { -      vec = &right_nodes_; -      index1 = right - left; -      index2 = left; -    } -    if (vec->size() <= index1) { -      GTEST_CHECK_(vec->size() == index1) -          << "Array diagonals should only grow by one " << vec->size() << " vs " -          << index1; -      vec->push_back(block_indices_.size()); -      // Round up -      block_indices_.resize( -          block_indices_.size() + -              (DiagonalLength(left, right) + kBlockSize - 1) / kBlockSize, -          kUnallocatedBlock); -    } -    const size_t bucket = index2 / kBlockSize; -    const size_t pos_in_bucket = index2 % kBlockSize; -    UInt32& level2 = block_indices_[(*vec)[index1] + bucket]; -    if (level2 == kUnallocatedBlock) { -      level2 = nodes_.size(); -      size_t diagonal_length = DiagonalLength(left, right); -      GTEST_CHECK_(diagonal_length > index2) -          << diagonal_length << " " << index2; -      size_t block_size = kBlockSize; -      if (diagonal_length / kBlockSize == bucket) { -        // We can never get here if diagonal_length is a multiple of -        // kBlockSize, which is what we want, since this would evaluate to 0. -        block_size = diagonal_length % kBlockSize; -      } -      nodes_.resize(nodes_.size() + block_size); -    } -    return nodes_[level2 + pos_in_bucket]; -  } - -  size_t MemoryUsage() const { -    return nodes_.capacity() + -           sizeof(UInt32) * (left_nodes_.capacity() + right_nodes_.capacity() + -                             block_indices_.capacity()); -  } - - private: -  enum { kBlockSize = 1024, kUnallocatedBlock = 0xFFFFFFFFul }; - -  size_t DiagonalLength(UInt32 left, UInt32 right) const { -    return std::min(left_size_ - left, right_size_ - right) + -           (left < right ? left : right); -  } - -  // The state space is conceptually a left_size_ by right_size_ sparse matrix -  // of EditSearchStates. However, due to the access pattern of the search, it -  // is much better to store the nodes per diagonal rather than per row. -  UInt32 left_size_; -  UInt32 right_size_; -  // The nodes are stored by diagonals, split in two: Those to the left of the -  // main diagonal are in left_nodes_, and everything else is in right_nodes_. -  // The values are indices into block_indices_. -  std::vector<UInt32> left_nodes_; -  std::vector<UInt32> right_nodes_; -  // Every entry here is an offset into the beginning of a kBlockSize-sized -  // block in nodes_. An entire diagonal is allocated together here; for a -  // diagonal of length <= kBlockSize, that's just a single entry, but for -  // longer diagonals multiple contiguous index entries will be reserved at -  // once. Unused entries will be assigned kUnallocatedBlock; this -  // double-indirect scheme is used to save memory in the cases when an entire -  // diagonal isn't needed. -  std::vector<UInt32> block_indices_; -  // This stores the actual EditSearchState data, pointed to by block_indices_. -  std::vector<unsigned char> nodes_; -}; - -struct EditHeapEntry { -  EditHeapEntry(UInt32 l, UInt32 r, UInt64 c, EditSearchState s) -      : left(l), right(r), cost(c), state(s) {} - -  UInt32 left; -  UInt32 right; -  UInt64 cost : 61; -  // The state that the node will get when this entry is settled. Therefore, -  // this can never be kUnsettled. -  UInt64 state : 3; - -  bool operator>(const EditHeapEntry& other) const { return cost > other.cost; } -}; - -// Need a min-queue, so invert the comparator. -typedef std::priority_queue<EditHeapEntry, std::vector<EditHeapEntry>, -                            std::greater<EditHeapEntry>> -    EditHeap; - -}  // namespace - -std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left, -                                            const std::vector<size_t>& right, -                                            size_t* memory_usage) { -  const UInt64 kBaseCost = 100000; -  // We make replace a little more expensive than add/remove to lower -  // their priority. -  const UInt64 kReplaceCost = 100001; -  // In the common case where the vectors are the same (or almost the same) -  // size, we know that an add will have to be followed by some later remove -  // (or vice versa) in order to get the lengths to balance. We "borrow" some -  // of the cost of the later operation and bring it forward into the earlier -  // operation, to increase the cost of exploring (usually fruitlessly) around -  // the beginning of the graph. -  // However, there is a trade-off: This cheapens the cost of exploring around -  // the beginning of the graph (in one direction) when the vectors are -  // unequal in length. So we don't steal *all* the cost. -  // You can view this as a form of A*, using an admissable heuristic that has -  // been re-cast as a cost function that can be used in Dijkstra. -  const UInt64 kTowardsGoalCost = 50003; -  const UInt64 kAwayFromGoalCost = 2 * kBaseCost - kTowardsGoalCost; - -  EditSearchMap node_map(left.size() + 1, right.size() + 1); -  EditHeap heap; -  heap.push(EditHeapEntry(0, 0, 0, kReplaceParent)); - -  while (!heap.empty()) { -    const EditHeapEntry current_entry = heap.top(); -    heap.pop(); - -    UInt32 left_pos = current_entry.left; -    UInt32 right_pos = current_entry.right; -    unsigned char& current_state = node_map.insert(left_pos, right_pos); -    if (current_state != kUnsettled) { -      // Node was already settled by a previous entry in the priority queue, -      // this is a suboptimal path that should be ignored. -      continue; -    } -    current_state = current_entry.state; - -    if (left_pos == left.size() && right_pos == right.size()) { -      // This is the normal exit point; if we terminate due to the heap being -      // empty, we'll fail a check later. -      break; -    } - -    // Special case: Since the cost of a match is zero, we can immediately -    // settle the new node without putting it in the queue, since nothing can -    // have a smaller cost than it. Furthermore, we don't need to relax the -    // other two edges, since we know we don't need them: Any path from this -    // node that would use them has an path via the match that is at least as -    // cheap. Together, this means we can loop here until we stop matching. -    while (left_pos < left.size() && right_pos < right.size() && -           left[left_pos] == right[right_pos]) { -      left_pos++; -      right_pos++; -      unsigned char& fast_forward_state = node_map.insert(left_pos, right_pos); -      if (fast_forward_state != kUnsettled) { -        // The search reached around and settled this node before settling the -        // base node. This means we're completely done with this iteration; -        // abort to the outer loop. -        goto outer_loop_bottom; -        // Otherwise, when can settle this node, even if it was created from -        // another state - we know the cost of settling it now is optimal. -      } -      fast_forward_state = kMatchParent; -    } - -    // Relax adjacent nodes. We have no way to find or lower the cost of -    // existing entries in the heap, so we just push new entries and throw -    // them out at the top if the node is already settled. We *could* check to -    // see if they're already settled before pushing, but it turns out to be -    // ~not any faster, and more complicated to do so. -    // -    // If we're at an edge, there's only one node to relax. -    if (left_pos >= left.size()) { -      if (right_pos >= right.size()) { -        break;  // Can happen due to the fast-path loop above. -      } -      heap.push(EditHeapEntry(left_pos, right_pos + 1, -                              current_entry.cost + kTowardsGoalCost, -                              kAddParent)); -      continue; -    } -    if (right_pos >= right.size()) { -      heap.push(EditHeapEntry(left_pos + 1, right_pos, -                              current_entry.cost + kTowardsGoalCost, -                              kRemoveParent)); -      continue; -    } -    // General case: Relax 3 edges. -    heap.push(EditHeapEntry( -        left_pos, right_pos + 1, -        current_entry.cost + (right.size() + left_pos > right_pos + left.size() -                                  ? kTowardsGoalCost -                                  : kAwayFromGoalCost), -        kAddParent)); -    heap.push(EditHeapEntry( -        left_pos + 1, right_pos, -        current_entry.cost + (right.size() + left_pos < right_pos + left.size() -                                  ? kTowardsGoalCost -                                  : kAwayFromGoalCost), -        kRemoveParent)); -    heap.push(EditHeapEntry(left_pos + 1, right_pos + 1, -                            current_entry.cost + kReplaceCost, kReplaceParent)); -  outer_loop_bottom : {}  // Need the curlies to form a statement. -  } - -  // Reconstruct the best path. We do it in reverse order. -  std::vector<EditType> best_path; -  UInt32 left_pos = left.size(); -  UInt32 right_pos = right.size(); -  while (left_pos != 0 || right_pos != 0) { -    GTEST_CHECK_(left_pos <= left.size() && right_pos <= right.size()); -    // The node must already exist, but if it somehow doesn't, it will be -    // added as kUnsettled, which will crash below. -    const unsigned char state = node_map.insert(left_pos, right_pos); -    switch (state) { -      case kAddParent: -        right_pos--; -        break; -      case kRemoveParent: -        left_pos--; -        break; -      case kMatchParent: -      case kReplaceParent: -        left_pos--; -        right_pos--; -        break; -      default: -        GTEST_LOG_(FATAL) << "Unsettled node at " << left_pos << "," -                          << right_pos; -    } -    best_path.push_back(static_cast<EditType>(state - 1)); -  } -  std::reverse(best_path.begin(), best_path.end()); -  if (memory_usage != NULL) { -    *memory_usage = node_map.MemoryUsage(); -  } -  return best_path; -} - -namespace { - -// Helper class to convert string into ids with deduplication. -class InternalStrings { - public: -  size_t GetId(const std::string* str) { -    IdMap::iterator it = ids_.find(str); -    if (it != ids_.end()) return it->second; -    size_t id = ids_.size(); -    return ids_[str] = id; -  } - - private: -  struct IdMapCmp { -    bool operator()(const std::string* first, const std::string* second) const { -      return *first < *second; -    } -  }; -  typedef std::map<const std::string*, size_t, IdMapCmp> IdMap; -  IdMap ids_; -}; - -}  // namespace - -std::vector<EditType> CalculateOptimalEdits( -    const std::vector<std::string>& left, -    const std::vector<std::string>& right) { -  std::vector<size_t> left_ids, right_ids; -  { -    InternalStrings intern_table; -    for (size_t i = 0; i < left.size(); ++i) { -      left_ids.push_back(intern_table.GetId(&left[i])); -    } -    for (size_t i = 0; i < right.size(); ++i) { -      right_ids.push_back(intern_table.GetId(&right[i])); -    } -  } -  return CalculateOptimalEdits(left_ids, right_ids); -} - -namespace { - -// Helper class that holds the state for one hunk and prints it out to the -// stream. -// It reorders adds/removes when possible to group all removes before all -// adds. It also adds the hunk header before printing into the stream. -class Hunk { - public: -  Hunk(size_t left_start, size_t right_start) -      : left_start_(left_start), -        right_start_(right_start), -        adds_(), -        removes_(), -        common_() {} - -  void PushLine(char edit, const char* line) { -    switch (edit) { -      case ' ': -        ++common_; -        FlushEdits(); -        hunk_.push_back(std::make_pair(' ', line)); -        break; -      case '-': -        ++removes_; -        hunk_removes_.push_back(std::make_pair('-', line)); -        break; -      case '+': -        ++adds_; -        hunk_adds_.push_back(std::make_pair('+', line)); -        break; -    } -  } - -  void PrintTo(std::ostream* os) { -    PrintHeader(os); -    FlushEdits(); -    for (std::list<std::pair<char, const char*> >::const_iterator it = -             hunk_.begin(); -         it != hunk_.end(); ++it) { -      *os << it->first << it->second << "\n"; -    } -  } - -  bool has_edits() const { return adds_ || removes_; } - - private: -  void FlushEdits() { -    hunk_.splice(hunk_.end(), hunk_removes_); -    hunk_.splice(hunk_.end(), hunk_adds_); -  } - -  // Print a unified diff header for one hunk. -  // The format is -  //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@" -  // where the left/right parts are omitted if unnecessary. -  void PrintHeader(std::ostream* ss) const { -    *ss << "@@ "; -    if (removes_) { -      *ss << "-" << left_start_ << "," << (removes_ + common_); -    } -    if (removes_ && adds_) { -      *ss << " "; -    } -    if (adds_) { -      *ss << "+" << right_start_ << "," << (adds_ + common_); -    } -    *ss << " @@\n"; -  } - -  size_t left_start_, right_start_; -  size_t adds_, removes_, common_; -  std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_; -}; - -}  // namespace - -// Create a list of diff hunks in Unified diff format. -// Each hunk has a header generated by PrintHeader above plus a body with -// lines prefixed with ' ' for no change, '-' for deletion and '+' for -// addition. -// 'context' represents the desired unchanged prefix/suffix around the diff. -// If two hunks are close enough that their contexts overlap, then they are -// joined into one hunk. -std::string CreateUnifiedDiff(const std::vector<std::string>& left, -                              const std::vector<std::string>& right, -                              size_t context) { -  const std::vector<EditType> edits = CalculateOptimalEdits(left, right); - -  size_t l_i = 0, r_i = 0, edit_i = 0; -  std::stringstream ss; -  while (edit_i < edits.size()) { -    // Find first edit. -    while (edit_i < edits.size() && edits[edit_i] == kEditMatch) { -      ++l_i; -      ++r_i; -      ++edit_i; -    } - -    // Find the first line to include in the hunk. -    const size_t prefix_context = std::min(l_i, context); -    Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1); -    for (size_t i = prefix_context; i > 0; --i) { -      hunk.PushLine(' ', left[l_i - i].c_str()); -    } - -    // Iterate the edits until we found enough suffix for the hunk or the input -    // is over. -    size_t n_suffix = 0; -    for (; edit_i < edits.size(); ++edit_i) { -      if (n_suffix >= context) { -        // Continue only if the next hunk is very close. -        std::vector<EditType>::const_iterator it = edits.begin() + edit_i; -        while (it != edits.end() && *it == kEditMatch) ++it; -        if (it == edits.end() || (it - edits.begin()) - edit_i >= context) { -          // There is no next edit or it is too far away. -          break; -        } -      } - -      EditType edit = edits[edit_i]; -      // Reset count when a non match is found. -      n_suffix = edit == kEditMatch ? n_suffix + 1 : 0; - -      if (edit == kEditMatch || edit == kEditRemove || edit == kEditReplace) { -        hunk.PushLine(edit == kEditMatch ? ' ' : '-', left[l_i].c_str()); -      } -      if (edit == kEditAdd || edit == kEditReplace) { -        hunk.PushLine('+', right[r_i].c_str()); -      } - -      // Advance indices, depending on edit type. -      l_i += edit != kEditAdd; -      r_i += edit != kEditRemove; -    } - -    if (!hunk.has_edits()) { -      // We are done. We don't want this hunk. -      break; -    } - -    hunk.PrintTo(&ss); -  } -  return ss.str(); -} - -}  // namespace internal -}  // namespace testing diff --git a/googletest/src/gtest.cc b/googletest/src/gtest.cc index ba706c1e..97966377 100644 --- a/googletest/src/gtest.cc +++ b/googletest/src/gtest.cc @@ -46,6 +46,7 @@  #include <algorithm>  #include <iomanip>  #include <limits> +#include <list>  #include <map>  #include <ostream>  // NOLINT  #include <sstream> @@ -1067,6 +1068,246 @@ AssertionResult AssertionFailure(const Message& message) {  namespace internal { +namespace edit_distance { +std::vector<EditType> CalculateOptimalEdits(const std::vector<size_t>& left, +                                            const std::vector<size_t>& right) { +  std::vector<std::vector<double> > costs( +      left.size() + 1, std::vector<double>(right.size() + 1)); +  std::vector<std::vector<EditType> > best_move( +      left.size() + 1, std::vector<EditType>(right.size() + 1)); + +  // Populate for empty right. +  for (size_t l_i = 0; l_i < costs.size(); ++l_i) { +    costs[l_i][0] = static_cast<double>(l_i); +    best_move[l_i][0] = kRemove; +  } +  // Populate for empty left. +  for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) { +    costs[0][r_i] = static_cast<double>(r_i); +    best_move[0][r_i] = kAdd; +  } + +  for (size_t l_i = 0; l_i < left.size(); ++l_i) { +    for (size_t r_i = 0; r_i < right.size(); ++r_i) { +      if (left[l_i] == right[r_i]) { +        // Found a match. Consume it. +        costs[l_i + 1][r_i + 1] = costs[l_i][r_i]; +        best_move[l_i + 1][r_i + 1] = kMatch; +        continue; +      } + +      const double add = costs[l_i + 1][r_i]; +      const double remove = costs[l_i][r_i + 1]; +      const double replace = costs[l_i][r_i]; +      if (add < remove && add < replace) { +        costs[l_i + 1][r_i + 1] = add + 1; +        best_move[l_i + 1][r_i + 1] = kAdd; +      } else if (remove < add && remove < replace) { +        costs[l_i + 1][r_i + 1] = remove + 1; +        best_move[l_i + 1][r_i + 1] = kRemove; +      } else { +        // We make replace a little more expensive than add/remove to lower +        // their priority. +        costs[l_i + 1][r_i + 1] = replace + 1.00001; +        best_move[l_i + 1][r_i + 1] = kReplace; +      } +    } +  } + +  // Reconstruct the best path. We do it in reverse order. +  std::vector<EditType> best_path; +  for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) { +    EditType move = best_move[l_i][r_i]; +    best_path.push_back(move); +    l_i -= move != kAdd; +    r_i -= move != kRemove; +  } +  std::reverse(best_path.begin(), best_path.end()); +  return best_path; +} + +namespace { + +// Helper class to convert string into ids with deduplication. +class InternalStrings { + public: +  size_t GetId(const std::string& str) { +    IdMap::iterator it = ids_.find(str); +    if (it != ids_.end()) return it->second; +    size_t id = ids_.size(); +    return ids_[str] = id; +  } + + private: +  typedef std::map<std::string, size_t> IdMap; +  IdMap ids_; +}; + +}  // namespace + +std::vector<EditType> CalculateOptimalEdits( +    const std::vector<std::string>& left, +    const std::vector<std::string>& right) { +  std::vector<size_t> left_ids, right_ids; +  { +    InternalStrings intern_table; +    for (size_t i = 0; i < left.size(); ++i) { +      left_ids.push_back(intern_table.GetId(left[i])); +    } +    for (size_t i = 0; i < right.size(); ++i) { +      right_ids.push_back(intern_table.GetId(right[i])); +    } +  } +  return CalculateOptimalEdits(left_ids, right_ids); +} + +namespace { + +// Helper class that holds the state for one hunk and prints it out to the +// stream. +// It reorders adds/removes when possible to group all removes before all +// adds. It also adds the hunk header before printint into the stream. +class Hunk { + public: +  Hunk(size_t left_start, size_t right_start) +      : left_start_(left_start), +        right_start_(right_start), +        adds_(), +        removes_(), +        common_() {} + +  void PushLine(char edit, const char* line) { +    switch (edit) { +      case ' ': +        ++common_; +        FlushEdits(); +        hunk_.push_back(std::make_pair(' ', line)); +        break; +      case '-': +        ++removes_; +        hunk_removes_.push_back(std::make_pair('-', line)); +        break; +      case '+': +        ++adds_; +        hunk_adds_.push_back(std::make_pair('+', line)); +        break; +    } +  } + +  void PrintTo(std::ostream* os) { +    PrintHeader(os); +    FlushEdits(); +    for (std::list<std::pair<char, const char*> >::const_iterator it = +             hunk_.begin(); +         it != hunk_.end(); ++it) { +      *os << it->first << it->second << "\n"; +    } +  } + +  bool has_edits() const { return adds_ || removes_; } + + private: +  void FlushEdits() { +    hunk_.splice(hunk_.end(), hunk_removes_); +    hunk_.splice(hunk_.end(), hunk_adds_); +  } + +  // Print a unified diff header for one hunk. +  // The format is +  //   "@@ -<left_start>,<left_length> +<right_start>,<right_length> @@" +  // where the left/right parts are omitted if unnecessary. +  void PrintHeader(std::ostream* ss) const { +    *ss << "@@ "; +    if (removes_) { +      *ss << "-" << left_start_ << "," << (removes_ + common_); +    } +    if (removes_ && adds_) { +      *ss << " "; +    } +    if (adds_) { +      *ss << "+" << right_start_ << "," << (adds_ + common_); +    } +    *ss << " @@\n"; +  } + +  size_t left_start_, right_start_; +  size_t adds_, removes_, common_; +  std::list<std::pair<char, const char*> > hunk_, hunk_adds_, hunk_removes_; +}; + +}  // namespace + +// Create a list of diff hunks in Unified diff format. +// Each hunk has a header generated by PrintHeader above plus a body with +// lines prefixed with ' ' for no change, '-' for deletion and '+' for +// addition. +// 'context' represents the desired unchanged prefix/suffix around the diff. +// If two hunks are close enough that their contexts overlap, then they are +// joined into one hunk. +std::string CreateUnifiedDiff(const std::vector<std::string>& left, +                              const std::vector<std::string>& right, +                              size_t context) { +  const std::vector<EditType> edits = CalculateOptimalEdits(left, right); + +  size_t l_i = 0, r_i = 0, edit_i = 0; +  std::stringstream ss; +  while (edit_i < edits.size()) { +    // Find first edit. +    while (edit_i < edits.size() && edits[edit_i] == kMatch) { +      ++l_i; +      ++r_i; +      ++edit_i; +    } + +    // Find the first line to include in the hunk. +    const size_t prefix_context = std::min(l_i, context); +    Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1); +    for (size_t i = prefix_context; i > 0; --i) { +      hunk.PushLine(' ', left[l_i - i].c_str()); +    } + +    // Iterate the edits until we found enough suffix for the hunk or the input +    // is over. +    size_t n_suffix = 0; +    for (; edit_i < edits.size(); ++edit_i) { +      if (n_suffix >= context) { +        // Continue only if the next hunk is very close. +        std::vector<EditType>::const_iterator it = edits.begin() + edit_i; +        while (it != edits.end() && *it == kMatch) ++it; +        if (it == edits.end() || (it - edits.begin()) - edit_i >= context) { +          // There is no next edit or it is too far away. +          break; +        } +      } + +      EditType edit = edits[edit_i]; +      // Reset count when a non match is found. +      n_suffix = edit == kMatch ? n_suffix + 1 : 0; + +      if (edit == kMatch || edit == kRemove || edit == kReplace) { +        hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str()); +      } +      if (edit == kAdd || edit == kReplace) { +        hunk.PushLine('+', right[r_i].c_str()); +      } + +      // Advance indices, depending on edit type. +      l_i += edit != kAdd; +      r_i += edit != kRemove; +    } + +    if (!hunk.has_edits()) { +      // We are done. We don't want this hunk. +      break; +    } + +    hunk.PrintTo(&ss); +  } +  return ss.str(); +} + +}  // namespace edit_distance +  namespace {  // The string representation of the values received in EqFailure() are already @@ -1138,7 +1379,8 @@ AssertionResult EqFailure(const char* lhs_expression,      const std::vector<std::string> rhs_lines =          SplitEscapedString(rhs_value);      if (lhs_lines.size() > 1 || rhs_lines.size() > 1) { -      msg << "\nWith diff:\n" << CreateUnifiedDiff(lhs_lines, rhs_lines); +      msg << "\nWith diff:\n" +          << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines);      }    } diff --git a/googletest/test/gtest_unittest.cc b/googletest/test/gtest_unittest.cc index 7f6a2ac4..f7213fbf 100644 --- a/googletest/test/gtest_unittest.cc +++ b/googletest/test/gtest_unittest.cc @@ -215,7 +215,6 @@ using testing::GTEST_FLAG(stream_result_to);  using testing::GTEST_FLAG(throw_on_failure);  using testing::IsNotSubstring;  using testing::IsSubstring; -using testing::kMaxStackTraceDepth;  using testing::Message;  using testing::ScopedFakeTestPartResultReporter;  using testing::StaticAssertTypeEq; @@ -235,18 +234,16 @@ using testing::internal::AlwaysTrue;  using testing::internal::AppendUserMessage;  using testing::internal::ArrayAwareFind;  using testing::internal::ArrayEq; -using testing::internal::CalculateOptimalEdits;  using testing::internal::CodePointToUtf8;  using testing::internal::CompileAssertTypesEqual;  using testing::internal::CopyArray;  using testing::internal::CountIf; -using testing::internal::CreateUnifiedDiff; -using testing::internal::EditType;  using testing::internal::EqFailure;  using testing::internal::FloatingPoint;  using testing::internal::ForEach;  using testing::internal::FormatEpochTimeInMillisAsIso8601;  using testing::internal::FormatTimeInMillisAsSeconds; +using testing::internal::GTestFlagSaver;  using testing::internal::GetCurrentOsStackTraceExceptTop;  using testing::internal::GetElementOr;  using testing::internal::GetNextRandomSeed; @@ -255,7 +252,6 @@ using testing::internal::GetTestTypeId;  using testing::internal::GetTimeInMillis;  using testing::internal::GetTypeId;  using testing::internal::GetUnitTestImpl; -using testing::internal::GTestFlagSaver;  using testing::internal::ImplicitlyConvertible;  using testing::internal::Int32;  using testing::internal::Int32FromEnvOrDie; @@ -263,8 +259,6 @@ using testing::internal::IsAProtocolMessage;  using testing::internal::IsContainer;  using testing::internal::IsContainerTest;  using testing::internal::IsNotContainer; -using testing::internal::kMaxRandomSeed; -using testing::internal::kTestTypeIdInGoogleTest;  using testing::internal::NativeArray;  using testing::internal::OsStackTraceGetter;  using testing::internal::OsStackTraceGetterInterface; @@ -286,6 +280,12 @@ using testing::internal::TestResultAccessor;  using testing::internal::UInt32;  using testing::internal::UnitTestImpl;  using testing::internal::WideStringToUtf8; +using testing::internal::edit_distance::CalculateOptimalEdits; +using testing::internal::edit_distance::CreateUnifiedDiff; +using testing::internal::edit_distance::EditType; +using testing::internal::kMaxRandomSeed; +using testing::internal::kTestTypeIdInGoogleTest; +using testing::kMaxStackTraceDepth;  #if GTEST_HAS_STREAM_REDIRECTION  using testing::internal::CaptureStdout; @@ -3517,14 +3517,14 @@ TEST(EditDistance, TestCases) {        {__LINE__, "ABCD", "abcd", "////",         "@@ -1,4 +1,4 @@\n-A\n-B\n-C\n-D\n+a\n+b\n+c\n+d\n"},        // Path finding. -      {__LINE__, "ABCDEFGH", "ABXEGH1", "  /- -  +", +      {__LINE__, "ABCDEFGH", "ABXEGH1", "  -/ -  +",         "@@ -1,8 +1,7 @@\n A\n B\n-C\n-D\n+X\n E\n-F\n G\n H\n+1\n"}, -      {__LINE__, "AAAABCCCC", "ABABCDCDC", " -/   + / ", -       "@@ -1,9 +1,9 @@\n A\n-A\n-A\n+B\n A\n B\n C\n+D\n C\n-C\n+D\n C\n"}, -      {__LINE__, "ABCDE", "BCDCD", "-   /+", +      {__LINE__, "AAAABCCCC", "ABABCDCDC", "- /   + / ", +       "@@ -1,9 +1,9 @@\n-A\n A\n-A\n+B\n A\n B\n C\n+D\n C\n-C\n+D\n C\n"}, +      {__LINE__, "ABCDE", "BCDCD", "-   +/",         "@@ -1,5 +1,5 @@\n-A\n B\n C\n D\n-E\n+C\n+D\n"}, -      {__LINE__, "ABCDEFGHIJKL", "BGDCDEFGJKLJK", "- ++     --   ++", -       "@@ -1,4 +1,5 @@\n-A\n B\n+G\n+D\n C\n D\n" +      {__LINE__, "ABCDEFGHIJKL", "BCDCDEFGJKLJK", "- ++     --   ++", +       "@@ -1,4 +1,5 @@\n-A\n B\n+C\n+D\n C\n D\n"         "@@ -6,7 +7,7 @@\n F\n G\n-H\n-I\n J\n K\n L\n+J\n+K\n"},        {}};    for (const Case* c = kCases; c->left; ++c) { @@ -3542,57 +3542,6 @@ TEST(EditDistance, TestCases) {    }  } -// Tests that we can run CalculateOptimalEdits for a large vector, i.e. we can -// compute diffs for large strings. -TEST(EditDistance, LargeVectorWithDiffs) { -  const int kSize = 300000; -  std::vector<size_t> left; -  std::vector<size_t> right; -  std::vector<EditType> expected(kSize, testing::internal::kEditMatch); - -  left.reserve(kSize); -  right.reserve(kSize); -  for (int i = 0; i < kSize; ++i) { -    // Make the contents of the vectors unique. This greatly speeds up -    // the algorithm, since it doesn't spend time finding matches for -    // different alignments. -    left.push_back(i); -    right.push_back(i); -  } - -  for (int i = 0; i < 10; ++i) { -    right[i] = kSize + i; -    expected[i] = testing::internal::kEditReplace; -    right[kSize - i - 1] = kSize * 2 + i; -    expected[kSize - i - 1] = testing::internal::kEditReplace; -  } -  size_t memory_usage; -  EXPECT_EQ(CalculateOptimalEdits(left, right, &memory_usage), expected); -  EXPECT_GT(memory_usage, kSize); -  EXPECT_LT(memory_usage, kSize * 2); -} - -// Tests that we can run CalculateOptimalEdits for two vectors N and M, where -// M = N plus additional junk at the end. The current algorithm only does O(M) -// "real" work in this case, but allocates some extra memory. We test that this -// is still fast enough for common cases, and we aren't allocating an -// excessive amount of extra memory. -TEST(EditDistance, LargeVectorWithTrailingJunk) { -  const int kSize = 200000; -  const int kAdditionalSize = 2000; -  std::vector<size_t> left(kSize, 0); -  std::vector<size_t> right(kSize + kAdditionalSize, 0); -  std::vector<EditType> expected(kSize + kAdditionalSize, -                                 testing::internal::kEditMatch); -  for (int i = 0; i < kAdditionalSize; ++i) { -    expected[i + kSize] = testing::internal::kEditAdd; -  } -  size_t memory_usage; -  EXPECT_EQ(CalculateOptimalEdits(left, right, &memory_usage), expected); -  EXPECT_GT(memory_usage, kSize); -  EXPECT_LT(memory_usage, 6000000); -} -  // Tests EqFailure(), used for implementing *EQ* assertions.  TEST(AssertionTest, EqFailure) {    const std::string foo_val("5"), bar_val("6"); | 
