• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

tstack / lnav / 17589970077-2502

09 Sep 2025 05:00PM UTC coverage: 65.196% (-5.0%) from 70.225%
17589970077-2502

push

github

tstack
[format] add fields for source file/line

Knowing the source file/line context in a log
message can help find log messages when using
log2src.

56 of 70 new or added lines in 2 files covered. (80.0%)

13954 existing lines in 210 files now uncovered.

45516 of 69814 relevant lines covered (65.2%)

404154.37 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

85.96
/src/text_format.cc
1
/**
2
 * Copyright (c) 2017, Timothy Stack
3
 *
4
 * All rights reserved.
5
 *
6
 * Redistribution and use in source and binary forms, with or without
7
 * modification, are permitted provided that the following conditions are met:
8
 *
9
 * * Redistributions of source code must retain the above copyright notice, this
10
 * list of conditions and the following disclaimer.
11
 * * Redistributions in binary form must reproduce the above copyright notice,
12
 * this list of conditions and the following disclaimer in the documentation
13
 * and/or other materials provided with the distribution.
14
 * * Neither the name of Timothy Stack nor the names of its contributors
15
 * may be used to endorse or promote products derived from this software
16
 * without specific prior written permission.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ''AS IS'' AND ANY
19
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
20
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21
 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
22
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
23
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
24
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
25
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
27
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28
 *
29
 * @file text_format.cc
30
 */
31

32
#include <set>
33

34
#include "text_format.hh"
35

36
#include "base/from_trait.hh"
37
#include "base/is_utf8.hh"
38
#include "base/itertools.enumerate.hh"
39
#include "base/itertools.hh"
40
#include "base/lnav_log.hh"
41
#include "config.h"
42
#include "pcrepp/pcre2pp.hh"
43
#include "yajl/api/yajl_parse.h"
44

45
constexpr string_fragment TEXT_FORMAT_STRINGS[text_format_count] = {
46
    "application/octet-stream"_frag,
47
    "text/c"_frag,
48
    "text/java"_frag,
49
    "application/json"_frag,
50
    "text/log"_frag,
51
    "text/x-makefile"_frag,
52
    "text/man"_frag,
53
    "text/markdown"_frag,
54
    "text/python"_frag,
55
    "application/x-pcre"_frag,
56
    "text/rust"_frag,
57
    "application/sql"_frag,
58
    "text/xml"_frag,
59
    "application/yaml"_frag,
60
    "application/toml"_frag,
61
    "text/x-diff"_frag,
62
    "text/x-shellscript"_frag,
63
    "text/x-lnav-script"_frag,
64
    "text/x-rst"_frag,
65
    "text/ini"_frag,
66
    "text/plain"_frag,
67
};
68

69
text_format_t
70
detect_text_format(string_fragment sf,
1,168✔
71
                   std::optional<std::filesystem::path> path)
72
{
73
    static const std::set<std::filesystem::path> FILTER_EXTS = {
74
        ".bz2",
75
        ".gz",
76
        ".lzma",
77
        ".xz",
78
        ".zst",
79
    };
5,109✔
80
    static const auto C_EXTS = std::set<std::filesystem::path>{
81
        ".h",
82
        ".hh",
83
        ".hpp",
84
        ".c",
85
        ".cc",
86
        ".cpp",
87
        ".tpp",
88
    };
6,235✔
89
    static const auto PY_EXT = std::filesystem::path(".py");
1,168✔
90
    static const auto RS_EXT = std::filesystem::path(".rs");
1,168✔
91
    static const auto SQL_EXT = std::filesystem::path(".sql");
1,168✔
92
    static const auto JAVA_EXT = std::filesystem::path(".java");
1,168✔
93
    static const auto TOML_EXT = std::filesystem::path(".toml");
1,168✔
94
    static const auto XML_EXT = std::filesystem::path(".xml");
1,168✔
95
    static const auto YAML_EXT = std::filesystem::path(".yaml");
1,168✔
96
    static const auto YML_EXT = std::filesystem::path(".yml");
1,168✔
97
    static const auto MAKEFILE_STEM = std::filesystem::path("Makefile");
1,168✔
98
    static const auto MD_EXT = std::filesystem::path(".md");
1,168✔
99
    static const auto MARKDOWN_EXT = std::filesystem::path(".markdown");
1,168✔
100
    static const auto SH_EXT = std::filesystem::path(".sh");
1,168✔
101
    static const auto LNAV_EXT = std::filesystem::path(".lnav");
1,168✔
102
    static const auto RST_EXT = std::filesystem::path(".rst");
1,168✔
103
    static const auto INI_EXT = std::filesystem::path(".ini");
1,168✔
104

105
    static const auto DIFF_MATCHERS = lnav::pcre2pp::code::from_const(
106
        R"(^--- .*\n\+\+\+ .*\n)", PCRE2_MULTILINE);
1,168✔
107

108
    static const auto MAN_MATCHERS = lnav::pcre2pp::code::from_const(
109
        R"(^[A-Za-z][A-Za-z\-_\+0-9]+\(\d\)\s+)", PCRE2_MULTILINE);
1,168✔
110

111
    // XXX This is a pretty crude way of
112
    // detecting format...
113
    static const auto PYTHON_MATCHERS = lnav::pcre2pp::code::from_const(
114
        "(?:"
115
        "^\\s*def\\s+\\w+\\([^)]*\\):"
116
        "[^\\n]*$|"
117
        "^\\s*try:[^\\n]*$"
118
        ")",
119
        PCRE2_MULTILINE);
1,168✔
120

121
    static const auto RUST_MATCHERS = lnav::pcre2pp::code::from_const(
122
        R"(
123
(?:
124
^\s*use\s+[\w+:\{\}]+;$|
125
^\s*(?:pub enum|pub const|(?:pub )?fn)\s+\w+.*$|
126
^\s*impl\s+\w+.*$
127
)
128
)",
129
        PCRE2_MULTILINE);
1,168✔
130

131
    static const auto JAVA_MATCHERS = lnav::pcre2pp::code::from_const(
132
        "(?:"
133
        "^package\\s+|"
134
        "^import\\s+|"
135
        "^\\s*(?:public)?\\s*"
136
        "class\\s*(\\w+\\s+)*\\s*{"
137
        ")",
138
        PCRE2_MULTILINE);
1,168✔
139

140
    static const auto C_LIKE_MATCHERS = lnav::pcre2pp::code::from_const(
141
        "(?:"
142
        "^#\\s*include\\s+|"
143
        "^#\\s*define\\s+|"
144
        "^\\s*if\\s+\\([^)]+\\)[^\\n]"
145
        "*$|"
146
        "^\\s*(?:\\w+\\s+)*class "
147
        "\\w+ {"
148
        ")",
149
        PCRE2_MULTILINE);
1,168✔
150

151
    static const auto SQL_MATCHERS = lnav::pcre2pp::code::from_const(
152
        "(?:"
153
        "create\\s+table\\s+|"
154
        "select\\s+.+\\s+from\\s+|"
155
        "insert\\s+into\\s+.+\\s+"
156
        "values"
157
        ")",
158
        PCRE2_MULTILINE | PCRE2_CASELESS);
1,168✔
159

160
    static const auto XML_MATCHERS = lnav::pcre2pp::code::from_const(
161
        "(?:"
162
        R"(<\?xml(\s+\w+\s*=\s*"[^"]*")*\?>|)"
163
        R"(</?\w+(\s+\w+\s*=\s*"[^"]*")*\s*>)"
164
        ")",
165
        PCRE2_MULTILINE | PCRE2_CASELESS);
1,168✔
166

167
    static const auto SH_MATCHERS
168
        = lnav::pcre2pp::code::from_const("^#!.+sh\\b", PCRE2_MULTILINE);
1,168✔
169

170
    static const auto LNAV_MATCHERS = lnav::pcre2pp::code::from_const(
171
        "(?:"
172
        "^;SELECT\\s+|"
173
        "^:[a-z0-9\\-]+\\s+"
174
        ")",
175
        PCRE2_MULTILINE | PCRE2_CASELESS);
1,168✔
176

177
    static const auto INI_MATCHERS = lnav::pcre2pp::code::from_const(
178
        R"(
179
        \A
180
        (?:\s*[;#].*\n)*                             # Optional multi-line comment at top
181
        (?:\s*\[[^\]\r\n]+\]\s*\n                    # Section header
182
          (?:\s*[a-zA-Z0-9_.-]+\s*=\s*.*\n)+         # One or more key=value lines
183
        )
184
)",
185
        PCRE2_MULTILINE | PCRE2_CASELESS | PCRE2_EXTENDED);
1,168✔
186

187
    auto utf_res = is_utf8(sf);
1,168✔
188
    if (!utf_res.is_valid()) {
1,168✔
189
        return text_format_t::TF_UNKNOWN;
69✔
190
    }
191

192
    if (path) {
1,099✔
193
        while (FILTER_EXTS.count(path->extension()) > 0) {
1,103✔
194
            path = path->stem();
4✔
195
        }
196

197
        auto stem = path->stem();
1,099✔
198
        auto ext = path->extension();
1,099✔
199
        if (ext == MD_EXT || ext == MARKDOWN_EXT) {
1,099✔
200
            return text_format_t::TF_MARKDOWN;
23✔
201
        }
202

203
        if (C_EXTS.count(ext) > 0) {
1,076✔
204
            return text_format_t::TF_C_LIKE;
2✔
205
        }
206

207
        if (ext == PY_EXT) {
1,074✔
208
            return text_format_t::TF_PYTHON;
2✔
209
        }
210

211
        if (ext == RS_EXT) {
1,072✔
UNCOV
212
            return text_format_t::TF_RUST;
×
213
        }
214

215
        if (ext == SQL_EXT) {
1,072✔
UNCOV
216
            return text_format_t::TF_SQL;
×
217
        }
218

219
        if (ext == TOML_EXT) {
1,072✔
UNCOV
220
            return text_format_t::TF_TOML;
×
221
        }
222

223
        if (ext == JAVA_EXT) {
1,072✔
UNCOV
224
            return text_format_t::TF_JAVA;
×
225
        }
226

227
        if (ext == YAML_EXT || ext == YML_EXT) {
1,072✔
UNCOV
228
            return text_format_t::TF_YAML;
×
229
        }
230

231
        if (ext == XML_EXT) {
1,072✔
232
            return text_format_t::TF_XML;
8✔
233
        }
234

235
        if (stem == MAKEFILE_STEM) {
1,064✔
UNCOV
236
            return text_format_t::TF_MAKEFILE;
×
237
        }
238

239
        if (ext == SH_EXT) {
1,064✔
UNCOV
240
            return text_format_t::TF_SHELL_SCRIPT;
×
241
        }
242

243
        if (ext == LNAV_EXT) {
1,064✔
244
            return text_format_t::TF_LNAV_SCRIPT;
2✔
245
        }
246

247
        if (ext == RST_EXT) {
1,062✔
UNCOV
248
            return text_format_t::TF_RESTRUCTURED_TEXT;
×
249
        }
250

251
        if (ext == INI_EXT) {
1,062✔
UNCOV
252
            return text_format_t::TF_INI;
×
253
        }
254
    }
1,136✔
255

256
    {
257
        auto_mem<yajl_handle_t> jhandle(yajl_free);
1,062✔
258

259
        jhandle = yajl_alloc(nullptr, nullptr, nullptr);
1,062✔
260
        if (yajl_parse(jhandle, sf.udata(), sf.length()) == yajl_status_ok) {
1,062✔
261
            return text_format_t::TF_JSON;
79✔
262
        }
263
    }
1,062✔
264

265
    if (DIFF_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
983✔
266
        return text_format_t::TF_DIFF;
1✔
267
    }
268

269
    if (SH_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
982✔
270
        return text_format_t::TF_SHELL_SCRIPT;
1✔
271
    }
272

273
    if (MAN_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
981✔
274
        return text_format_t::TF_MAN;
5✔
275
    }
276

277
    if (PYTHON_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
976✔
UNCOV
278
        return text_format_t::TF_PYTHON;
×
279
    }
280

281
    if (RUST_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
976✔
UNCOV
282
        return text_format_t::TF_RUST;
×
283
    }
284

285
    if (JAVA_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
976✔
UNCOV
286
        return text_format_t::TF_JAVA;
×
287
    }
288

289
    if (C_LIKE_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
976✔
UNCOV
290
        return text_format_t::TF_C_LIKE;
×
291
    }
292

293
    if (LNAV_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
976✔
294
        return text_format_t::TF_LNAV_SCRIPT;
1✔
295
    }
296

297
    if (SQL_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
975✔
298
        return text_format_t::TF_SQL;
6✔
299
    }
300

301
    if (XML_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
969✔
302
        return text_format_t::TF_XML;
16✔
303
    }
304

305
    if (INI_MATCHERS.find_in(sf, PCRE2_NO_UTF_CHECK).ignore_error()) {
953✔
UNCOV
306
        return text_format_t::TF_INI;
×
307
    }
308

309
    return text_format_t::TF_UNKNOWN;
953✔
310
}
1,126✔
311

312
std::optional<text_format_meta_t>
313
extract_text_meta(string_fragment sf, text_format_t tf)
87✔
314
{
315
    static const auto MAN_NAME = lnav::pcre2pp::code::from_const(
316
        R"(^([A-Za-z][A-Za-z\-_\+0-9]+\(\d\))\s+)", PCRE2_MULTILINE);
87✔
317

318
    switch (tf) {
87✔
319
        case text_format_t::TF_MAN: {
3✔
320
            thread_local auto md = lnav::pcre2pp::match_data::unitialized();
3✔
321

322
            auto find_res
323
                = MAN_NAME.capture_from(sf).into(md).matches().ignore_error();
3✔
324

325
            if (find_res) {
3✔
326
                return text_format_meta_t{
6✔
327
                    md.to_string(),
328
                };
3✔
329
            }
UNCOV
330
            break;
×
331
        }
332
        default:
84✔
333
            break;
84✔
334
    }
335

336
    return std::nullopt;
84✔
337
}
338

339
template<>
340
Result<text_format_t, std::string>
341
from(const string_fragment sf)
729✔
342
{
343
    for (const auto& [index, format_sf] :
6,561✔
344
         lnav::itertools::enumerate(TEXT_FORMAT_STRINGS))
6,561✔
345
    {
346
        if (format_sf == sf) {
5,832✔
347
            return Ok(static_cast<text_format_t>(index));
1,458✔
348
        }
349
    }
UNCOV
350
    return Err(fmt::format(FMT_STRING("unrecognized text format: {}"), sf));
×
351
}
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc