• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

llnl / dftracer-utils / 28496595030

01 Jul 2026 05:50AM UTC coverage: 50.727% (-1.6%) from 52.278%
28496595030

Pull #83

github

web-flow
Merge 8f1ff4df5 into 2efed6649
Pull Request #83: refactor and improve code QoL

31872 of 80367 branches covered (39.66%)

Branch coverage included in aggregate %.

770 of 1591 new or added lines in 85 files covered. (48.4%)

5070 existing lines in 182 files now uncovered.

32742 of 47009 relevant lines covered (69.65%)

9887.52 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

24.28
/src/dftracer/utils/core/common/format_detector.cpp
1
#include <dftracer/utils/core/common/constants.h>
2
#include <dftracer/utils/core/common/format_detector.h>
3
#include <dftracer/utils/core/common/logging.h>
4
#include <zlib.h>
5

6
#include <algorithm>
7
#include <cstring>
8

9
namespace dftracer::utils {
10

11
ArchiveFormat FormatDetector::detect(const std::string& file_path) {
2,167✔
12
    if (file_path.size() >= 7 &&
4,331✔
13
        file_path.substr(file_path.size() - 7) == ".tar.gz") {
2,164✔
14
        return ArchiveFormat::TAR_GZ;
22✔
15
    } else if (file_path.size() >= 4 &&
4,279!
16
               file_path.substr(file_path.size() - 4) == ".tgz") {
2,134✔
17
        return ArchiveFormat::TAR_GZ;
×
18
    } else if (file_path.size() >= 3 &&
4,282✔
19
               file_path.substr(file_path.size() - 3) == ".gz") {
2,137✔
20
        return ArchiveFormat::GZIP;
2,079✔
21
    } else if (file_path.size() >= 5 &&
131!
22
               file_path.substr(file_path.size() - 5) == ".gzip") {
65✔
23
        return ArchiveFormat::GZIP;
×
24
    }
25

26
    FILE* file = std::fopen(file_path.c_str(), "rb");
66✔
27
    if (!file) {
66✔
28
        DFTRACER_UTILS_LOG_ERROR("Failed to open file for format detection: %s",
1!
29
                                 file_path.c_str());
30
        return ArchiveFormat::UNKNOWN;
1✔
31
    }
32

33
    ArchiveFormat format = detect_from_content(file);
65✔
34
    std::fclose(file);
65✔
35
    return format;
65✔
36
}
2,167✔
37

38
ArchiveFormat FormatDetector::detect_from_content(FILE* file) {
65✔
39
    if (!has_gzip_magic(file)) {
65!
40
        return ArchiveFormat::UNKNOWN;
65✔
41
    }
42

43
    if (has_tar_header_after_gzip(file)) {
×
44
        return ArchiveFormat::TAR_GZ;
×
45
    }
46

47
    return ArchiveFormat::GZIP;
×
48
}
65✔
49

50
bool FormatDetector::is_tar_gz(FILE* file) {
×
51
    return detect_from_content(file) == ArchiveFormat::TAR_GZ;
×
52
}
53

54
bool FormatDetector::is_gzip(FILE* file) {
×
55
    return detect_from_content(file) == ArchiveFormat::GZIP;
×
56
}
57

58
bool FormatDetector::has_gzip_magic(FILE* file) {
65✔
59
    if (fseeko(file, 0, SEEK_SET) != 0) {
65!
60
        return false;
×
61
    }
62

63
    unsigned char magic[2];
64
    if (fread(magic, 1, 2, file) != 2) {
65✔
65
        return false;
26✔
66
    }
67

68
    return magic[0] == constants::indexer::GZIP_MAGIC_BYTE_0 &&
39!
NEW
69
           magic[1] == constants::indexer::GZIP_MAGIC_BYTE_1;
×
70
}
65✔
71

72
bool FormatDetector::has_tar_header_after_gzip(FILE* file) {
×
73
    // Seek to the beginning of the file
74
    if (fseeko(file, 0, SEEK_SET) != 0) {
×
75
        return false;
×
76
    }
77

78
    // Initialize zlib for GZIP decompression
79
    z_stream stream;
80
    memset(&stream, 0, sizeof(stream));
×
81

NEW
82
    if (inflateInit2(&stream, constants::indexer::ZLIB_GZIP_WINDOW_BITS) !=
×
83
        Z_OK) {
UNCOV
84
        return false;
×
85
    }
86

87
    // Read input buffer
88
    const size_t buffer_size = 8192;
×
89
    unsigned char in_buffer[buffer_size];
90
    unsigned char out_buffer[buffer_size];
91

92
    bool found_tar_header = false;
×
93
    size_t total_out = 0;
×
94

95
    while (total_out < 512) {  // Need at least 512 bytes for TAR header
×
96
        // Read compressed data
97
        size_t bytes_read = fread(in_buffer, 1, buffer_size, file);
×
98
        if (bytes_read == 0) {
×
99
            if (ferror(file)) {
×
UNCOV
100
                DFTRACER_UTILS_LOG_DEBUG(
×
101
                    "%s", "Error reading file during TAR detection");
UNCOV
102
            }
×
103
            break;
×
104
        }
105

106
        stream.next_in = in_buffer;
×
107
        stream.avail_in = static_cast<uInt>(bytes_read);
×
108

109
        while (stream.avail_in > 0 && total_out < 512) {
×
110
            stream.next_out = out_buffer;
×
111
            stream.avail_out =
×
112
                static_cast<uInt>(std::min(buffer_size, 512 - total_out));
×
113

114
            int ret = inflate(&stream, Z_NO_FLUSH);
×
115
            if (ret != Z_OK && ret != Z_STREAM_END) {
×
116
                break;
×
117
            }
118

UNCOV
119
            size_t bytes_out =
×
120
                (std::min(buffer_size, 512 - total_out)) - stream.avail_out;
×
121

122
            // Check if we have enough bytes to validate TAR header
123
            if (total_out + bytes_out >= 512) {
×
124
                // We need to copy existing data if any and append new data
125
                unsigned char tar_header[512];
126
                memset(tar_header, 0, 512);
×
127

128
                if (total_out > 0) {
×
129
                    // This is more complex - we'd need to store previous output
130
                    // For simplicity, let's check if we got the header in one
131
                    // go
132
                    if (total_out == 0 && bytes_out >= 512) {
×
133
                        memcpy(tar_header, out_buffer, 512);
×
134
                        found_tar_header = is_valid_tar_header(tar_header);
×
UNCOV
135
                    }
×
136
                } else if (bytes_out >= 512) {
×
137
                    memcpy(tar_header, out_buffer, 512);
×
138
                    found_tar_header = is_valid_tar_header(tar_header);
×
UNCOV
139
                }
×
140
                break;
×
141
            }
142

143
            total_out += bytes_out;
×
144

145
            if (ret == Z_STREAM_END) {
×
146
                break;
×
147
            }
148
        }
149

150
        if (found_tar_header || total_out >= 512) {
×
UNCOV
151
            break;
×
152
        }
153
    }
154

155
    inflateEnd(&stream);
×
156
    return found_tar_header;
×
UNCOV
157
}
×
158

159
bool FormatDetector::is_valid_tar_header(const unsigned char* header) {
×
160
    // Check for POSIX TAR magic: "ustar\0"
161
    const char* ustar_magic = "ustar";
×
162
    if (memcmp(header + 257, ustar_magic, 5) == 0 && header[262] == 0) {
×
163
        // Validate checksum
164
        unsigned int stored_checksum = 0;
×
165

166
        // Read checksum field (bytes 148-155) as octal
167
        for (int i = 148; i < 156; i++) {
×
168
            char c = header[i];
×
169
            if (c >= '0' && c <= '7') {
×
170
                stored_checksum = stored_checksum * 8 + (c - '0');
×
171
            } else if (c == ' ' || c == '\0') {
×
UNCOV
172
                break;
×
173
            } else {
174
                return false;
×
175
            }
UNCOV
176
        }
×
177

178
        unsigned int calculated_checksum = calculate_tar_checksum(header);
×
179
        return stored_checksum == calculated_checksum;
×
180
    }
181

182
    // Check for old GNU tar format - look for reasonable filename
183
    // and verify that most of the header fields make sense
184
    bool has_filename = false;
×
185
    for (int i = 0; i < 100; i++) {
×
186
        if (header[i] == '\0') {
×
187
            has_filename = (i > 0);  // Non-empty filename
×
188
            break;
×
189
        }
190
        if (!isprint(header[i]) && header[i] != '/') {
×
191
            return false;
×
192
        }
UNCOV
193
    }
×
194

195
    if (!has_filename) {
×
196
        return false;
×
197
    }
198

199
    // Check file mode (should be reasonable octal value)
200
    bool mode_ok = true;
×
201
    for (int i = 100; i < 108; i++) {
×
202
        char c = header[i];
×
203
        if (c != ' ' && c != '\0' && (c < '0' || c > '7')) {
×
204
            mode_ok = false;
×
205
            break;
×
206
        }
UNCOV
207
    }
×
208

209
    return mode_ok;
×
UNCOV
210
}
×
211

212
unsigned int FormatDetector::calculate_tar_checksum(
×
213
    const unsigned char* header) {
214
    unsigned int checksum = 0;
×
215

216
    // Sum all bytes, treating checksum field (148-155) as spaces
217
    for (int i = 0; i < 512; i++) {
×
218
        if (i >= 148 && i < 156) {
×
219
            checksum += ' ';  // Checksum field treated as spaces
×
UNCOV
220
        } else {
×
221
            checksum += header[i];
×
222
        }
UNCOV
223
    }
×
224

225
    return checksum;
×
226
}
227
}  // namespace dftracer::utils
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc