19760138266

Committed 28 Nov 2025 09:48AM UTC coverage: 64.395% (-0.007%) from 64.402%

Build # 19760138266

Build Type

Pull #1095

github

Committed by

antonvw

Commit Message

improved handling last of diffs

Pull Request Pull Request #1095: added diff type to unified_diff

Run Details

18554 of 31581 branches covered (58.75%)

Branch coverage included in aggregate %.

14720 of 20091 relevant lines covered (73.27%)

1523.92 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

89.03

/src/factory/unified-diff.cpp

////////////////////////////////////////////////////////////////////////////////
// Name:      unified-diff.cpp
// Purpose:   Implementation of class wex::factory::unified_diff
//            https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html
// Author:    Anton van Wezenbeek
// Copyright: (c) 2024-2025 Anton van Wezenbeek
////////////////////////////////////////////////////////////////////////////////

#include <boost/tokenizer.hpp>
#include <wex/core/log.h>
#include <wex/core/regex.h>
#include <wex/factory/frame.h>
#include <wex/factory/unified-diff.h>

#include <iostream>
#include <utility>

#define NEXT_TOKEN                                                             \
  if (++tok_iter == tokens.end())                                              \
  {                                                                            \
    return false;                                                              \
  }

#define CHANGES_LINES(RANGE, TEXT)                                             \
  for (int i = 0; i < m_range[RANGE]; i++)                                     \
  {                                                                            \
    NEXT_TOKEN                                                                 \
    m_text[TEXT].push_back((*tok_iter).substr(1));                             \
  }

#define HEADER_LINES(REGEX, INTO)                                              \
  if (!parse_header(REGEX, *tok_iter, INTO))                                   \
  {                                                                            \
    return false;                                                              \
  }                                                                            \
  NEXT_TOKEN

#define SKIP_LINES                                                             \
  while (tok_iter != tokens.end())                                             \
  {                                                                            \
    NEXT_TOKEN                                                                 \
    if (!tok_iter->starts_with("diff ") && !tok_iter->starts_with("index "))   \
    {                                                                          \
      break;                                                                   \
    }                                                                          \
  }

namespace wex
{
size_t stoi(const std::string& i)
{
  return i.empty() ? 1 : std::stoi(i);
}
} // namespace wex

wex::factory::unified_diff::unified_diff(std::string input)
  : m_input(std::move(input))
{
  m_range.fill({0});
}

bool wex::factory::unified_diff::parse()
{
  using tokenizer = boost::tokenizer<boost::char_separator<char>>;

  tokenizer tokens(m_input, boost::char_separator<char>("\r\n"));

  tokenizer::iterator tok_iter = tokens.begin();

  m_diffs = 0;
  m_type  = diff_t::UNKNOWN;

  while (tok_iter != tokens.end())
  {
    // skip first lines
    SKIP_LINES;

    // The unified output format starts with a two-line header
    HEADER_LINES("--- a/(.*)", m_path[0]);
    HEADER_LINES("\\+\\+\\+ b/(.*)", m_path[1]);

    m_is_first = true;
    m_is_last  = false;
    m_type     = (m_type == diff_t::UNKNOWN ? diff_t::FIRST : diff_t::OTHER);

    // Next come one or more hunks of differences
    while (tok_iter != tokens.end())
    {
      regex r_hunk("@@ -([0-9]+),?([0-9]*) \\+([0-9]+),?([0-9]*) @@.*");

      if (r_hunk.match(*tok_iter) != 4)
      {
        log("unified_diff") << *tok_iter << r_hunk.size();
        return false;
      }

      m_range[0] = wex::stoi(r_hunk[0]);
      m_range[1] = wex::stoi(r_hunk[1]);
      m_range[2] = wex::stoi(r_hunk[2]);
      m_range[3] = wex::stoi(r_hunk[3]);

      m_text.fill({});

      // Now get all - lines and all + lines, collect them, and invoke callback.
      CHANGES_LINES(1, 0);
      CHANGES_LINES(3, 1);

      if (!report_diff())
      {
        return false;
      }

      trace("hunk");

      m_diffs++;

      m_is_first = false;

      if (++tok_iter != tokens.end() && !(*tok_iter).starts_with("@@"))
      {
        m_is_last = true;
        m_type    = diff_t::OTHER;

        if (!report_diff())
        {
          return false;
        }
        trace("last");
        m_diffs++;

        break; // this was last hunk, continue with header lines
      }
    }
  }

  m_is_last = true;

  if (m_type != diff_t::UNKNOWN)
  {
    m_type = diff_t::LAST;
  }

  report_diff_finish();

  trace("finished");

  return true;
}

bool wex::factory::unified_diff::parse_header(
  const std::string& r,
  const std::string& line,
  path&              p)
{
  regex re(r);

  if (!re.match(line))
  {
    log("unified_diff") << line << re.match_data().text();
    return false;
  }

  p = path(re[0]);

  return true;
}

void wex::factory::unified_diff::trace(const std::string& text) const
{
  using boost::describe::operators::operator<<;

  std::stringstream str;
  str << "type: " << boost::describe::enum_to_string(m_type, "none") << " "
      << *this << " ranges: ";

  std::ranges::for_each(
    m_range,
    [this, &str](const auto& it)
    {
      str << std::to_string(it) << ",";
    });

  str << " text sizes: ";

  std::ranges::for_each(
    m_text,
    [this, &str](const auto& it)
    {
      str << it.size() << ",";
    });

  str << " paths: ";

  std::ranges::for_each(
    m_path,
    [this, &str](const auto& it)
    {
      str << it.string() << ",";
    });

  log::trace("unified_diff::" + text) << str.str();
}

1	////////////////////////////////////////////////////////////////////////////////
2	// Name: unified-diff.cpp
3	// Purpose: Implementation of class wex::factory::unified_diff
4	// https://www.gnu.org/software/diffutils/manual/html_node/Detailed-Unified.html
5	// Author: Anton van Wezenbeek
6	// Copyright: (c) 2024-2025 Anton van Wezenbeek
7	////////////////////////////////////////////////////////////////////////////////
8
9	#include <boost/tokenizer.hpp>
10	#include <wex/core/log.h>
11	#include <wex/core/regex.h>
12	#include <wex/factory/frame.h>
13	#include <wex/factory/unified-diff.h>
14
15	#include <iostream>
16	#include <utility>
17
18	#define NEXT_TOKEN \
19	if (++tok_iter == tokens.end()) \
20	{ \
21	return false; \
22	}
23
24	#define CHANGES_LINES(RANGE, TEXT) \
25	for (int i = 0; i < m_range[RANGE]; i++) \
26	{ \
27	NEXT_TOKEN \
28	m_text[TEXT].push_back((*tok_iter).substr(1)); \
29	}
30
31	#define HEADER_LINES(REGEX, INTO) \
32	if (!parse_header(REGEX, *tok_iter, INTO)) \
33	{ \
34	return false; \
35	} \
36	NEXT_TOKEN
37
38	#define SKIP_LINES \
39	while (tok_iter != tokens.end()) \
40	{ \
41	NEXT_TOKEN \
42	if (!tok_iter->starts_with("diff ") && !tok_iter->starts_with("index ")) \
43	{ \
44	break; \
45	} \
46	}
47
48	namespace wex
49	{
50	size_t stoi(const std::string& i)	68✔
51	{
52	return i.empty() ? 1 : std::stoi(i);	68✔
53	}
54	} // namespace wex
55
56	wex::factory::unified_diff::unified_diff(std::string input)	16✔
57	: m_input(std::move(input))	16✔
58	{
59	m_range.fill({0});	16✔
60	}	16✔
61
62	bool wex::factory::unified_diff::parse()	11✔
63	{
64	using tokenizer = boost::tokenizer<boost::char_separator<char>>;
65
66	tokenizer tokens(m_input, boost::char_separator<char>("\r\n"));	11✔
67
68	tokenizer::iterator tok_iter = tokens.begin();	11✔
69
70	m_diffs = 0;	11✔
71	m_type = diff_t::UNKNOWN;	11✔
72
73	while (tok_iter != tokens.end())	34✔
74	{
75	// skip first lines
76	SKIP_LINES;	25!
77
78	// The unified output format starts with a two-line header
79	HEADER_LINES("--- a/(.*)", m_path[0]);	42!
80	HEADER_LINES("\\+\\+\\+ b/(.*)", m_path[1]);	42!
81
82	m_is_first = true;	14✔
83	m_is_last = false;	14✔
84	m_type = (m_type == diff_t::UNKNOWN ? diff_t::FIRST : diff_t::OTHER);	14✔
85
86	// Next come one or more hunks of differences
87	while (tok_iter != tokens.end())	25✔
88	{
89	regex r_hunk("@@ -([0-9]+),?([0-9]) \\+([0-9]+),?([0-9]) @@.*");	19✔
90
91	if (r_hunk.match(*tok_iter) != 4)	19✔
92	{
93	log("unified_diff") << *tok_iter << r_hunk.size();	2✔
94	return false;	2✔
95	}
96
97	m_range[0] = wex::stoi(r_hunk[0]);	17✔
98	m_range[1] = wex::stoi(r_hunk[1]);	17✔
99	m_range[2] = wex::stoi(r_hunk[2]);	17✔
100	m_range[3] = wex::stoi(r_hunk[3]);	17✔
101
102	m_text.fill({});	17✔
103
104	// Now get all - lines and all + lines, collect them, and invoke callback.
105	CHANGES_LINES(1, 0);	33!
106	CHANGES_LINES(3, 1);	29!
107
108	if (!report_diff())	17!
109	{
110	return false;	×
111	}
112
113	trace("hunk");	17✔
114
115	m_diffs++;	17✔
116
117	m_is_first = false;	17✔
118
119	if (++tok_iter != tokens.end() && !(*tok_iter).starts_with("@@"))	17!
120	{
121	m_is_last = true;	6✔
122	m_type = diff_t::OTHER;	6✔
123
124	if (!report_diff())	6!
125	{
126	return false;	×
127	}
128	trace("last");	6✔
129	m_diffs++;	6✔
130
131	break; // this was last hunk, continue with header lines	6✔
132	}
133	}	19✔
134	}
135
136	m_is_last = true;	8✔
137
138	if (m_type != diff_t::UNKNOWN)	8✔
139	{
140	m_type = diff_t::LAST;	6✔
141	}
142
143	report_diff_finish();	8✔
144
145	trace("finished");	8✔
146
147	return true;	8✔
148	}	11✔
149
150	bool wex::factory::unified_diff::parse_header(	28✔
151	const std::string& r,
152	const std::string& line,
153	path& p)
154	{
155	regex re(r);	28✔
156
157	if (!re.match(line))	28!
158	{
159	log("unified_diff") << line << re.match_data().text();	×
160	return false;	×
161	}
162
163	p = path(re[0]);	28✔
164
165	return true;	28✔
166	}	28✔
167
168	void wex::factory::unified_diff::trace(const std::string& text) const	37✔
169	{
170	using boost::describe::operators::operator<<;
171
172	std::stringstream str;	37✔
173	str << "type: " << boost::describe::enum_to_string(m_type, "none") << " "	37✔
174	<< *this << " ranges: ";	37✔
175
176	std::ranges::for_each(	×
177	m_range,	37✔
178	[this, &str](const auto& it)	148✔
179	{
180	str << std::to_string(it) << ",";	148✔
181	});	148✔
182
183	str << " text sizes: ";	37✔
184
185	std::ranges::for_each(	×
186	m_text,	37✔
187	[this, &str](const auto& it)	74✔
188	{
189	str << it.size() << ",";	74✔
190	});	74✔
191
192	str << " paths: ";	37✔
193
194	std::ranges::for_each(	×
195	m_path,	37✔
196	[this, &str](const auto& it)	74✔
197	{
198	str << it.string() << ",";	74✔
199	});	74✔
200
201	log::trace("unified_diff::" + text) << str.str();	37✔
202	}	37✔

antonvw / wex / 19760138266

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous