15883576848

Committed 25 Jun 2025 05:55PM UTC coverage: 59.136% (+0.008%) from 59.128%

Build # 15883576848

Build Type

push

github

Committed by

web-flow

Commit Message

Download more external libs from official GitHub repos (#787)

* download Curl, Kaitai, PicoJson, Utf8, ZLib from github
updated versions

* typo

* updated curl cache

Run Details

116 of 119 new or added lines in 2 files covered. (97.48%)

20 existing lines in 6 files now uncovered.

42684 of 72179 relevant lines covered (59.14%)

446612.3 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.25

/pol-core/clib/strutil.cpp

/** @file
 *
 * @par History
 * - 2009/09/12 MuadDib:   Disabled 4244 in this file due to it being on a string iter. Makes no
 * sense.
 * - 2014/06/10 Nando:  Removed pragma that disabled 4244. (tolower()/toupper() used ints because -1
 * is a valid output).
 */


#include "strutil.h"

#include <boost/algorithm/string.hpp>
#include <boost/algorithm/string/case_conv.hpp>
#include <boost/algorithm/string/trim.hpp>
#include <utf8cpp/utf8.h>

#include "logfacility.h"
#include "stlutil.h"

namespace Pol
{
namespace Clib
{
void splitnamevalue( const std::string& istr, std::string& propname, std::string& propvalue )
{
  std::string::size_type start = istr.find_first_not_of( " \t\r\n" );
  if ( start != std::string::npos )
  {
    std::string::size_type delimpos = istr.find_first_of( " \t\r\n=", start + 1 );
    if ( delimpos != std::string::npos )
    {
      std::string::size_type valuestart = istr.find_first_not_of( " \t\r\n", delimpos + 1 );
      std::string::size_type valueend = istr.find_last_not_of( " \t\r\n" );
      propname = istr.substr( start, delimpos - start );
      if ( valuestart != std::string::npos && valueend != std::string::npos )
      {
        propvalue = istr.substr( valuestart, valueend - valuestart + 1 );
      }
      else
      {
        propvalue = "";
      }
    }
    else
    {
      propname = istr.substr( start, std::string::npos );
      propvalue = "";
    }
  }
  else
  {
    propname = "";
    propvalue = "";
  }
}

void decodequotedstring( std::string& str )
{
  std::string tmp;
  tmp.swap( str );
  const char* s = tmp.c_str();
  str.reserve( tmp.size() );
  ++s;
  while ( *s )
  {
    char ch = *s++;

    switch ( ch )
    {
    case '\\':
      ch = *s++;
      switch ( ch )
      {
      case '\0':
        return;
      case 'n':  // newline
        str += "\n";
        break;
      default:  // slash, quote, etc
        str += ch;
        break;
      }
      break;

    case '\"':
      return;

    default:
      str += ch;
      break;
    }
  }
}
void encodequotedstring( std::string& str )
{
  std::string tmp;
  tmp.swap( str );
  const char* s = tmp.c_str();
  str.reserve( tmp.size() + 2 );
  str += "\"";

  while ( *s )
  {
    char ch = *s++;
    switch ( ch )
    {
    case '\\':
      str += "\\\\";
      break;
    case '\"':
      str += "\\\"";
      break;
    case '\n':
      str += "\\n";
      break;
    default:
      str += ch;
      break;
    }
  }

  str += "\"";
}

std::string getencodedquotedstring( const std::string& in )
{
  std::string tmp = in;
  encodequotedstring( tmp );
  return tmp;
}

// If we have boost, I think we should use it...
void mklowerASCII( std::string& str )
{
  boost::to_lower( str );
}

void mkupperASCII( std::string& str )
{
  boost::to_upper( str );
}

std::string strlowerASCII( const std::string& str )
{
  return boost::to_lower_copy( str );
}

std::string strupperASCII( const std::string& str )
{
  return boost::to_upper_copy( str );
}

std::string strtrim( const std::string& str )
{
  return boost::trim_copy( str );
}

bool isValidUnicode( const std::string& str )
{
  return utf8::find_invalid( str.begin(), str.end() ) == str.end();
}

void sanitizeUnicodeWithIso( std::string* str )
{
  if ( isValidUnicode( *str ) )
    return;
  // assume iso8859
  std::string utf8( "" );
  utf8.reserve( 2 * str->size() + 1 );

  for ( const auto& s : *str )
  {
    if ( !( s & 0x80 ) )
    {
      utf8.push_back( s );
    }
    else
    {
      utf8.push_back( 0xc2 | ( (unsigned char)( s ) >> 6 ) );
      utf8.push_back( 0xbf & s );
    }
  }
  *str = utf8;
}

void sanitizeUnicode( std::string* str )
{
  if ( !isValidUnicode( *str ) )
  {
    try
    {
      std::string new_s;
      utf8::replace_invalid( str->begin(), str->end(), std::back_inserter( new_s ) );
      *str = new_s;
    }
    catch ( utf8::exception& )
    {
      *str = "Invalid unicode";
    }
  }
  auto begin = str->begin();
  auto end = str->end();
  auto invalid_chr = []( u32 c )
  {
    return ( c >= 0x1u && c < 0x9u ) /*0x9 \t 0x10 \n*/ ||
           ( c >= 0x11u && c < 0x13u ) /*0x13 \r*/ || ( c >= 0x14u && c < 0x20u ) || c == 0x7Fu ||
           ( c >= 0x80u && c <= 0x9Fu );
  };
  while ( begin != end )
  {
    auto c = utf8::unchecked::next( begin );
    if ( invalid_chr( c ) )
    {
      // control character found build new string skipping them
      std::string new_s;
      begin = str->begin();
      while ( begin != end )
      {
        c = utf8::unchecked::next( begin );
        if ( invalid_chr( c ) )
          continue;
        utf8::unchecked::append( c, std::back_inserter( new_s ) );
      }
      *str = new_s;
      break;
    }
  }
}

void remove_bom( std::string* strbuf )
{
  if ( strbuf->size() >= 3 )
  {
    if ( utf8::starts_with_bom( strbuf->cbegin(), strbuf->cend() ) )
      strbuf->erase( 0, 3 );
  }
}

uint8_t unicodeToCp1252( uint32_t codepoint )
{
  if ( codepoint >= 0x80 && codepoint <= 0x9f )
    return '?';
  else if ( codepoint <= 0xff )
    return (char)codepoint;
  else
  {
    switch ( codepoint )
    {
    case 0x20AC:
      return 128;  // €
    case 0x201A:
      return 130;  // ‚
    case 0x0192:
      return 131;  // ƒ
    case 0x201E:
      return 132;  // „
    case 0x2026:
      return 133;  // …
    case 0x2020:
      return 134;  // †
    case 0x2021:
      return 135;  // ‡
    case 0x02C6:
      return 136;  // ˆ
    case 0x2030:
      return 137;  // ‰
    case 0x0160:
      return 138;  // Š
    case 0x2039:
      return 139;  // ‹
    case 0x0152:
      return 140;  // Œ
    case 0x017D:
      return 142;  // Ž
    case 0x2018:
      return 145;  // ‘
    case 0x2019:
      return 146;  // ’
    case 0x201C:
      return 147;  // “
    case 0x201D:
      return 148;  // ”
    case 0x2022:
      return 149;  // •
    case 0x2013:
      return 150;  // –
    case 0x2014:
      return 151;  // —
    case 0x02DC:
      return 152;  // ˜
    case 0x2122:
      return 153;  // ™
    case 0x0161:
      return 154;  // š
    case 0x203A:
      return 155;  // ›
    case 0x0153:
      return 156;  // œ
    case 0x017E:
      return 158;  // ž
    case 0x0178:
      return 159;  // Ÿ
    default:
      return '?';
    }
  }
}

uint32_t cp1252ToUnicode( uint8_t codepoint )
{
  switch ( codepoint )
  {
  case 128:
    return 0x20AC;  // €
  case 130:
    return 0x201A;  // ‚
  case 131:
    return 0x0192;  // ƒ
  case 132:
    return 0x201E;  // „
  case 133:
    return 0x2026;  // …
  case 134:
    return 0x2020;  // †
  case 135:
    return 0x2021;  // ‡
  case 136:
    return 0x02C6;  // ˆ
  case 137:
    return 0x2030;  // ‰
  case 138:
    return 0x0160;  // Š
  case 139:
    return 0x2039;  // ‹
  case 140:
    return 0x0152;  // Œ
  case 142:
    return 0x017D;  // Ž
  case 145:
    return 0x2018;  // ‘
  case 146:
    return 0x2019;  // ’
  case 147:
    return 0x201C;  // “
  case 148:
    return 0x201D;  // ”
  case 149:
    return 0x2022;  // •
  case 150:
    return 0x2013;  // –
  case 151:
    return 0x2014;  // —
  case 152:
    return 0x02DC;  // ˜
  case 153:
    return 0x2122;  // ™
  case 154:
    return 0x0161;  // š
  case 155:
    return 0x203A;  // ›
  case 156:
    return 0x0153;  // œ
  case 158:
    return 0x017E;  // ž
  case 159:
    return 0x0178;  // Ÿ
  default:
    return codepoint;
  }
}

std::string strUtf8ToCp1252( const std::string& utf8string )
{
  auto itr = utf8string.begin();
  auto end = utf8string.end();
  std::string outstring;
  while ( itr != end )
  {
    auto c = unicodeToCp1252( utf8::unchecked::next( itr ) );
    outstring.push_back( c );
  }
  return outstring;
}

std::string strCp1252ToUtf8( const std::string& cp1252string )
{
  auto itr = cp1252string.begin();
  auto end = cp1252string.end();
  std::string outstring;
  auto inserter = std::back_inserter( outstring );
  while ( itr != end )
  {
    utf8::unchecked::append( cp1252ToUnicode( *itr++ ), inserter );
  }
  return outstring;
}

bool caseInsensitiveEqual( const std::string& input, const std::string& test )
{
  return boost::iequals( input, test );
}


}  // namespace Clib
}  // namespace Pol

1	/** @file
2	*
3	* @par History
4	* - 2009/09/12 MuadDib: Disabled 4244 in this file due to it being on a string iter. Makes no
5	* sense.
6	* - 2014/06/10 Nando: Removed pragma that disabled 4244. (tolower()/toupper() used ints because -1
7	* is a valid output).
8	*/
9
10
11	#include "strutil.h"
12
13	#include <boost/algorithm/string.hpp>
14	#include <boost/algorithm/string/case_conv.hpp>
15	#include <boost/algorithm/string/trim.hpp>
16	#include <utf8cpp/utf8.h>
17
18	#include "logfacility.h"
19	#include "stlutil.h"
20
21	namespace Pol
22	{
23	namespace Clib
24	{
25	void splitnamevalue( const std::string& istr, std::string& propname, std::string& propvalue )	107,174✔
26	{
27	std::string::size_type start = istr.find_first_not_of( " \t\r\n" );	107,174✔
28	if ( start != std::string::npos )	107,174✔
29	{
30	std::string::size_type delimpos = istr.find_first_of( " \t\r\n=", start + 1 );	100,830✔
31	if ( delimpos != std::string::npos )	100,830✔
32	{
33	std::string::size_type valuestart = istr.find_first_not_of( " \t\r\n", delimpos + 1 );	89,114✔
34	std::string::size_type valueend = istr.find_last_not_of( " \t\r\n" );	89,114✔
35	propname = istr.substr( start, delimpos - start );	89,114✔
36	if ( valuestart != std::string::npos && valueend != std::string::npos )	89,114✔
37	{
38	propvalue = istr.substr( valuestart, valueend - valuestart + 1 );	89,013✔
39	}
40	else
41	{
42	propvalue = "";	101✔
43	}
44	}
45	else
46	{
47	propname = istr.substr( start, std::string::npos );	11,716✔
48	propvalue = "";	11,716✔
49	}
50	}
51	else
52	{
53	propname = "";	6,344✔
54	propvalue = "";	6,344✔
55	}
56	}	107,174✔
57
58	void decodequotedstring( std::string& str )	8✔
59	{
60	std::string tmp;	8✔
61	tmp.swap( str );	8✔
62	const char* s = tmp.c_str();	8✔
63	str.reserve( tmp.size() );	8✔
64	++s;	8✔
65	while ( *s )	50✔
66	{
67	char ch = *s++;	50✔
68
69	switch ( ch )	50✔
70	{
71	case '\\':	1✔
72	ch = *s++;	1✔
73	switch ( ch )	1✔
74	{
75	case '\0':	×
76	return;	×
77	case 'n': // newline	×
78	str += "\n";	×
79	break;	×
80	default: // slash, quote, etc	1✔
81	str += ch;	1✔
82	break;	1✔
83	}
84	break;	1✔
85
86	case '\"':	8✔
87	return;	8✔
88
89	default:	41✔
90	str += ch;	41✔
91	break;	41✔
92	}
93	}
94	}	8✔
95	void encodequotedstring( std::string& str )	11,044✔
96	{
97	std::string tmp;	11,044✔
98	tmp.swap( str );	11,044✔
99	const char* s = tmp.c_str();	11,044✔
100	str.reserve( tmp.size() + 2 );	11,044✔
101	str += "\"";	11,044✔
102
103	while ( *s )	51,385✔
104	{
105	char ch = *s++;	40,341✔
106	switch ( ch )	40,341✔
107	{
108	case '\\':	6✔
109	str += "\\\\";	6✔
110	break;	6✔
111	case '\"':	27✔
112	str += "\\\"";	27✔
113	break;	27✔
114	case '\n':	16✔
115	str += "\\n";	16✔
116	break;	16✔
117	default:	40,292✔
118	str += ch;	40,292✔
119	break;	40,292✔
120	}
121	}
122
123	str += "\"";	11,044✔
124	}	11,044✔
125
126	std::string getencodedquotedstring( const std::string& in )	11,040✔
127	{
128	std::string tmp = in;	11,040✔
129	encodequotedstring( tmp );	11,040✔
130	return tmp;	11,040✔
131	}	×
132
133	// If we have boost, I think we should use it...
134	void mklowerASCII( std::string& str )	5,885✔
135	{
136	boost::to_lower( str );	5,885✔
137	}	5,885✔
138
139	void mkupperASCII( std::string& str )	20✔
140	{
141	boost::to_upper( str );	20✔
142	}	20✔
143
144	std::string strlowerASCII( const std::string& str )	2,853✔
145	{
146	return boost::to_lower_copy( str );	2,853✔
147	}
148
149	std::string strupperASCII( const std::string& str )	×
150	{
151	return boost::to_upper_copy( str );	×
152	}
153
154	std::string strtrim( const std::string& str )	13✔
155	{
156	return boost::trim_copy( str );	13✔
157	}
158
159	bool isValidUnicode( const std::string& str )	156,239✔
160	{
161	return utf8::find_invalid( str.begin(), str.end() ) == str.end();	156,239✔
162	}
163
164	void sanitizeUnicodeWithIso( std::string* str )	123,807✔
165	{
166	if ( isValidUnicode( *str ) )	123,807✔
167	return;	123,800✔
168	// assume iso8859
169	std::string utf8( "" );	7✔
170	utf8.reserve( 2 * str->size() + 1 );	7✔
171
172	for ( const auto& s : *str )	256✔
173	{
174	if ( !( s & 0x80 ) )	249✔
175	{
176	utf8.push_back( s );	238✔
177	}
178	else
179	{
180	utf8.push_back( 0xc2 \| ( (unsigned char)( s ) >> 6 ) );	11✔
181	utf8.push_back( 0xbf & s );	11✔
182	}
183	}
184	*str = utf8;	7✔
185	}	7✔
186
187	void sanitizeUnicode( std::string* str )	213✔
188	{
189	if ( !isValidUnicode( *str ) )	213✔
190	{
191	try
192	{
193	std::string new_s;	×
194	utf8::replace_invalid( str->begin(), str->end(), std::back_inserter( new_s ) );	×
195	*str = new_s;	×
196	}	×
197	catch ( utf8::exception& )	×
198	{
199	*str = "Invalid unicode";	×
200	}	×
201	}
202	auto begin = str->begin();	213✔
203	auto end = str->end();	213✔
204	auto invalid_chr = []( u32 c )	252✔
205	{
206	return ( c >= 0x1u && c < 0x9u ) /0x9 \t 0x10 \n/ \|\|	252✔
207	( c >= 0x11u && c < 0x13u ) /0x13 \r/ \|\| ( c >= 0x14u && c < 0x20u ) \|\| c == 0x7Fu \|\|	508✔
208	( c >= 0x80u && c <= 0x9Fu );	256✔
209	};
210	while ( begin != end )	461✔
211	{
212	auto c = utf8::unchecked::next( begin );	250✔
213	if ( invalid_chr( c ) )	250✔
214	{
215	// control character found build new string skipping them
216	std::string new_s;	2✔
217	begin = str->begin();	2✔
218	while ( begin != end )	4✔
219	{
220	c = utf8::unchecked::next( begin );	2✔
221	if ( invalid_chr( c ) )	2✔
222	continue;	2✔
223	utf8::unchecked::append( c, std::back_inserter( new_s ) );	×
224	}
225	*str = new_s;	2✔
226	break;	2✔
227	}	2✔
228	}
229	}	213✔
230
231	void remove_bom( std::string* strbuf )	16,225✔
232	{
233	if ( strbuf->size() >= 3 )	16,225✔
234	{
235	if ( utf8::starts_with_bom( strbuf->cbegin(), strbuf->cend() ) )	16,107✔
236	strbuf->erase( 0, 3 );	9✔
237	}
238	}	16,225✔
239
240	uint8_t unicodeToCp1252( uint32_t codepoint )	1,671✔
241	{
242	if ( codepoint >= 0x80 && codepoint <= 0x9f )	1,671✔
243	return '?';	×
244	else if ( codepoint <= 0xff )	1,671✔
245	return (char)codepoint;	1,644✔
246	else
247	{
248	switch ( codepoint )	27✔
249	{
250	case 0x20AC:	1✔
251	return 128; // €	1✔
252	case 0x201A:	1✔
253	return 130; // ‚	1✔
254	case 0x0192:	1✔
255	return 131; // ƒ	1✔
256	case 0x201E:	1✔
257	return 132; // „	1✔
258	case 0x2026:	1✔
259	return 133; // …	1✔
260	case 0x2020:	1✔
261	return 134; // †	1✔
262	case 0x2021:	1✔
263	return 135; // ‡	1✔
264	case 0x02C6:	1✔
265	return 136; // ˆ	1✔
266	case 0x2030:	1✔
267	return 137; // ‰	1✔
268	case 0x0160:	1✔
269	return 138; // Š	1✔
270	case 0x2039:	1✔
271	return 139; // ‹	1✔
272	case 0x0152:	1✔
273	return 140; // Œ	1✔
274	case 0x017D:	1✔
275	return 142; // Ž	1✔
276	case 0x2018:	1✔
277	return 145; // ‘	1✔
278	case 0x2019:	1✔
279	return 146; // ’	1✔
280	case 0x201C:	1✔
281	return 147; // “	1✔
282	case 0x201D:	1✔
283	return 148; // ”	1✔
284	case 0x2022:	1✔
285	return 149; // •	1✔
286	case 0x2013:	1✔
287	return 150; // –	1✔
288	case 0x2014:	1✔
289	return 151; // —	1✔
290	case 0x02DC:	1✔
291	return 152; // ˜	1✔
292	case 0x2122:	1✔
293	return 153; // ™	1✔
294	case 0x0161:	1✔
295	return 154; // š	1✔
296	case 0x203A:	1✔
297	return 155; // ›	1✔
298	case 0x0153:	1✔
299	return 156; // œ	1✔
300	case 0x017E:	1✔
301	return 158; // ž	1✔
302	case 0x0178:	1✔
303	return 159; // Ÿ	1✔
NEW 304	default:	×
NEW 305	return '?';	×
306	}
307	}
308	}
309
310	uint32_t cp1252ToUnicode( uint8_t codepoint )	218✔
311	{
312	switch ( codepoint )	218✔
313	{
314	case 128:	1✔
315	return 0x20AC; // €	1✔
316	case 130:	1✔
317	return 0x201A; // ‚	1✔
318	case 131:	1✔
319	return 0x0192; // ƒ	1✔
320	case 132:	1✔
321	return 0x201E; // „	1✔
322	case 133:	1✔
323	return 0x2026; // …	1✔
324	case 134:	1✔
325	return 0x2020; // †	1✔
326	case 135:	1✔
327	return 0x2021; // ‡	1✔
328	case 136:	1✔
329	return 0x02C6; // ˆ	1✔
330	case 137:	1✔
331	return 0x2030; // ‰	1✔
332	case 138:	1✔
333	return 0x0160; // Š	1✔
334	case 139:	1✔
335	return 0x2039; // ‹	1✔
336	case 140:	1✔
337	return 0x0152; // Œ	1✔
338	case 142:	1✔
339	return 0x017D; // Ž	1✔
340	case 145:	1✔
341	return 0x2018; // ‘	1✔
342	case 146:	1✔
343	return 0x2019; // ’	1✔
344	case 147:	1✔
345	return 0x201C; // “	1✔
346	case 148:	1✔
347	return 0x201D; // ”	1✔
348	case 149:	1✔
349	return 0x2022; // •	1✔
350	case 150:	1✔
351	return 0x2013; // –	1✔
352	case 151:	1✔
353	return 0x2014; // —	1✔
354	case 152:	1✔
355	return 0x02DC; // ˜	1✔
356	case 153:	1✔
357	return 0x2122; // ™	1✔
358	case 154:	1✔
359	return 0x0161; // š	1✔
360	case 155:	1✔
361	return 0x203A; // ›	1✔
362	case 156:	1✔
363	return 0x0153; // œ	1✔
364	case 158:	1✔
365	return 0x017E; // ž	1✔
366	case 159:	1✔
367	return 0x0178; // Ÿ	1✔
368	default:	191✔
369	return codepoint;	191✔
370	}
371	}
372
373	std::string strUtf8ToCp1252( const std::string& utf8string )	169✔
374	{
375	auto itr = utf8string.begin();	169✔
376	auto end = utf8string.end();	169✔
377	std::string outstring;	169✔
378	while ( itr != end )	1,840✔
379	{
380	auto c = unicodeToCp1252( utf8::unchecked::next( itr ) );	1,671✔
381	outstring.push_back( c );	1,671✔
382	}
383	return outstring;	338✔
384	}	×
385
386	std::string strCp1252ToUtf8( const std::string& cp1252string )	1✔
387	{
388	auto itr = cp1252string.begin();	1✔
389	auto end = cp1252string.end();	1✔
390	std::string outstring;	1✔
391	auto inserter = std::back_inserter( outstring );	1✔
392	while ( itr != end )	219✔
393	{
394	utf8::unchecked::append( cp1252ToUnicode( *itr++ ), inserter );	218✔
395	}
396	return outstring;	2✔
397	}	×
398
399	bool caseInsensitiveEqual( const std::string& input, const std::string& test )	16,674✔
400	{
401	return boost::iequals( input, test );	16,674✔
402	}
403
404
405	} // namespace Clib
406	} // namespace Pol

polserver / polserver / 15883576848

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous