• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

BlueBrain / MorphIO / 11254817533

09 Oct 2024 12:20PM UTC coverage: 77.697% (+0.08%) from 77.615%
11254817533

Pull #476

github

mgeplf
Merge remote-tracking branch 'origin/master' into efficient-swc-build
Pull Request #476: Efficient swc build

286 of 321 new or added lines in 10 files covered. (89.1%)

9 existing lines in 4 files now uncovered.

2125 of 2735 relevant lines covered (77.7%)

901.01 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

92.73
/src/readers/morphologySWC.cpp
1
/* Copyright (c) 2013-2023, EPFL/Blue Brain Project
2
 *
3
 * SPDX-License-Identifier: Apache-2.0
4
 */
5

6
#include "morphologySWC.h"
7

8
#include <cctype>         // isdigit
9
#include <cstdint>        // uint32_t
10
#include <memory>         // std::shared_ptr
11
#include <string>         // std::string
12
#include <unordered_map>  // std::unordered_map
13
#include <utility>
14
#include <vector>         // std::vector
15

16
#include <morphio/mut/morphology.h>
17
#include <morphio/mut/section.h>
18
#include <morphio/mut/soma.h>
19
#include <morphio/properties.h>
20
#include <morphio/warning_handling.h>
21

22
#include "../error_message_generation.h"
23
#include "../shared_utils.hpp"
24
#include "utils.h"
25

26
namespace morphio {
27
namespace details {
28

29
// It's not clear if -1 is the only way of identifying a root section.
30
const int SWC_UNDEFINED_PARENT = -1;
31
const unsigned int SWC_ROOT = 0xFFFFFFFD;
32

33
/* simple stream parser for SWC file format which is a line oriented format
34
 *
35
 * This parser advances across comments and blank lines, and allows the caller
36
 * to get integers and floats
37
 */
38
class SWCTokenizer
39
{
40
public:
41
  explicit SWCTokenizer(std::string contents, std::string path)
94✔
42
      : contents_(std::move(contents))
94✔
43
      , path_(std::move(path)) {
94✔
44
      // ensure null termination
45
      (void) contents_.c_str();
94✔
46
  }
94✔
47

48
  bool done() const noexcept {
15,584✔
49
      return pos_ >= contents_.size();
15,584✔
50
  }
51

52
  size_t lineNumber() const noexcept {
692✔
53
      return line_;
692✔
54
  }
55

56
  void skip_to(char value) {
1,508✔
57
      std::size_t pos = contents_.find_first_of(value, pos_);
1,508✔
58
      if (pos == std::string::npos) {
1,508✔
59
          pos_ = contents_.size();
2✔
60
      }
61
      pos_ = pos;
1,508✔
62
  }
1,508✔
63

64
  void advance_to_non_whitespace() {
6,544✔
65
      if (done()) {
6,544✔
66
          return;
64✔
67
      }
68
      std::size_t pos = contents_.find_first_not_of(" \t\r", pos_);
6,480✔
69
      if (pos == std::string::npos) {
6,480✔
70
          pos_ = contents_.size();
18✔
71
          return;
18✔
72
      }
73
      pos_ = pos;
6,462✔
74
  }
75

76
  void advance_to_number() {
4,810✔
77
      advance_to_non_whitespace();
4,810✔
78

79
      if (done()) {
4,810✔
NEW
80
          details::ErrorMessages err(path_);
×
NEW
81
          throw RawDataError(err.EARLY_END_OF_FILE(line_));
×
82
      }
83

84
      auto c = contents_.at(pos_);
4,810✔
85
      if (std::isdigit(c) != 0 || c == '-' || c == '+' || c == '.') {
4,810✔
86
          return;
4,806✔
87
      }
88

89
      details::ErrorMessages err(path_);
8✔
90
      throw RawDataError(err.ERROR_LINE_NON_PARSABLE(line_));
4✔
91
  }
92

93
  int64_t read_int() {
2,066✔
94
      advance_to_number();
2,066✔
95
      try {
96
          auto parsed = stn_.toInt(contents_, pos_);
2,062✔
97
          pos_ = std::get<1>(parsed);
2,062✔
98
          return std::get<0>(parsed);
2,062✔
NEW
99
      } catch(std::invalid_argument&e) {
×
NEW
100
          throw RawDataError(e.what());
×
101
      }
102
  }
103

104
  floatType read_float() {
2,744✔
105
      advance_to_number();
2,744✔
106
      auto parsed = stn_.toFloat(contents_, pos_);
2,744✔
107
      pos_ = std::get<1>(parsed);
2,744✔
108
      return std::get<0>(parsed);
2,744✔
109
  }
110

111
  void skip_blank_lines_and_comments() {
774✔
112
      advance_to_non_whitespace();
774✔
113

114
      while (!done() && (contents_.at(pos_) == '#' || contents_.at(pos_) == '\n')) {
1,734✔
115
          if (contents_.at(pos_) == '#') {
960✔
116
              skip_to('\n');
828✔
117
          }
118

119
          if (!done() && contents_.at(pos_) == '\n') {
960✔
120
              ++line_;
960✔
121
              ++pos_;
960✔
122
          }
123
          advance_to_non_whitespace();
960✔
124
      }
125
  }
774✔
126

127
  void finish_line() {
680✔
128
      skip_to('\n');
680✔
129
      if (!done() && contents_.at(pos_) == '\n') {
680✔
130
          ++line_;
678✔
131
          ++pos_;
678✔
132
      }
133
  }
680✔
134

135

136
private:
137
  size_t pos_ = 0;
138
  size_t line_ = 1;
139
  std::string contents_;
140
  StringToNumber stn_;
141
  std::string path_;
142
};
143

144
struct SWCSample {
145
    enum : unsigned int { UNKNOWN_ID = 0xFFFFFFFE };
146

UNCOV
147
    SWCSample() = default;  // XXX
×
148
    floatType diameter = -1.;
149
    Point point{};
150
    SectionType type = SECTION_UNDEFINED;
151
    unsigned int parentId = UNKNOWN_ID;
152
    unsigned int id = UNKNOWN_ID;
153
    unsigned int lineNumber = 0;
154
};
155

156
static std::vector<unsigned int> gatherLineNumbers(const std::vector<SWCSample>& samples) {
4✔
157
    std::vector<unsigned int> lineNumbers;
4✔
158
    std::transform(samples.begin(),
159
                   samples.cend(),
160
                   std::back_inserter(lineNumbers),
161
                   [](const SWCSample& sample) { return sample.lineNumber; });
12✔
162
    return lineNumbers;
4✔
163
}
164

165
static std::vector<SWCSample> readSamples(const std::string& contents, const std::string& path) {
94✔
166
    std::vector<SWCSample> samples;
94✔
167
    SWCSample sample;
94✔
168

169
    SWCTokenizer tokenizer{contents, path};
282✔
170

171
    tokenizer.skip_blank_lines_and_comments();
94✔
172
    while (!tokenizer.done()) {
774✔
173
        sample.lineNumber = static_cast<unsigned int>(tokenizer.lineNumber());
692✔
174

175
        int64_t id = tokenizer.read_int();
692✔
176
        if (id < 0) {
692✔
177
            details::ErrorMessages err(path);
8✔
178
            throw RawDataError(err.ERROR_NEGATIVE_ID(sample.lineNumber));
4✔
179
        }else if(id > std::numeric_limits<unsigned int>::max()){
688✔
NEW
180
            throw RawDataError("SWC does not support ids larger than" + std::to_string(std::numeric_limits<unsigned int>::max()));
×
181
        }
182

183
        sample.id = static_cast<unsigned int>(id);
688✔
184

185
        sample.type = static_cast<SectionType>(tokenizer.read_int());
688✔
186

187
        for (auto& point : sample.point) {
2,744✔
188
            point = tokenizer.read_float();
2,058✔
189
        }
190

191
        sample.diameter = 2 * tokenizer.read_float();
686✔
192

193
        int64_t parentId = tokenizer.read_int();
686✔
194
        if (parentId < -1) {
684✔
195
            details::ErrorMessages err(path);
8✔
196
            throw RawDataError(err.ERROR_NEGATIVE_ID(sample.lineNumber));
4✔
197
        }else if(parentId > std::numeric_limits<unsigned int>::max()){
680✔
NEW
198
            throw RawDataError("SWC does not support parent ids larger than" + std::to_string(std::numeric_limits<unsigned int>::max()));
×
199
        } else if (parentId == SWC_UNDEFINED_PARENT) {
680✔
200
            sample.parentId = SWC_ROOT;
86✔
201
        } else {
202
            sample.parentId = static_cast<unsigned int>(parentId);
594✔
203
        }
204

205
        samples.push_back(sample);
680✔
206

207
        // Normally one would just `skip_blank_lines_and_comments` directly,
208
        // but we support "extra columns" that are ignored; so since people can put anything
209
        // in these "extra columns", we have to ignore everything until the end of the line
210
        tokenizer.finish_line();
680✔
211

212
        tokenizer.skip_blank_lines_and_comments();
680✔
213
    }
214

215
    if (!tokenizer.done()) {
82✔
NEW
216
        details::ErrorMessages err(path);
×
NEW
217
        throw RawDataError(err.EARLY_END_OF_FILE(0));
×
218
    }
219

220
    return samples;
164✔
221
}
222

223
/**
224
  Parsing SWC according to this specification:
225
http://www.neuronland.org/NLMorphologyConverter/MorphologyFormats/SWC/Spec.html
226
 **/
227

228
enum class DeclaredID : unsigned int {};
229

230
class SWCBuilder
231
{
232
    using MorphioID = uint32_t;
233
    using PhysicalID = size_t;
234
    using Samples = std::vector<SWCSample>;
235

236
  public:
237
    SWCBuilder(std::string path, WarningHandler* warning_handler)
94✔
238
        : path_(std::move(path))
188✔
239
        , warning_handler_(warning_handler) {}
94✔
240

241
    Property::Properties buildProperties(const std::string& contents, unsigned int options) {
94✔
242
        const Samples samples = readSamples(contents, path_);
176✔
243
        buildSWC(samples);
82✔
244
        morph_.applyModifiers(options);
68✔
245
        return morph_.buildReadOnly();
136✔
246
    }
247

248
  private:
249
    void build_soma(const Samples& soma_samples) {
74✔
250
        auto& soma = morph_.soma();
74✔
251

252
        if (soma_samples.empty()) {
74✔
253
            soma->type() = SOMA_UNDEFINED;
4✔
254
            warning_handler_->emit(std::make_unique<NoSomaFound>(path_));
4✔
255
            return;
4✔
256
        } else if (soma_samples.size() == 1) {
70✔
257
            SWCSample sample = soma_samples[0];
48✔
258

259
            if (sample.parentId != SWC_ROOT && samples_.at(sample.parentId).type != SECTION_SOMA) {
48✔
260
                details::ErrorMessages err_(path_);
4✔
261
                throw SomaError(err_.ERROR_SOMA_WITH_NEURITE_PARENT(sample.lineNumber));
2✔
262
            }
263

264
            soma->type() = SOMA_SINGLE_POINT;
46✔
265
            soma->points() = {sample.point};
46✔
266
            soma->diameters() = {sample.diameter};
46✔
267
            return;
46✔
268
        } else if (soma_samples.size() == 3 &&
30✔
269
                   (soma_samples[0].id == 1 && soma_samples[0].parentId == SWC_ROOT &&
8✔
270
                    soma_samples[1].id == 2 && soma_samples[1].parentId == 1 &&
8✔
271
                    soma_samples[2].id == 3 && soma_samples[2].parentId == 1)) {
8✔
272
            const std::array<Point, 3> points = {
273
                soma_samples[0].point,
8✔
274
                soma_samples[1].point,
8✔
275
                soma_samples[2].point,
8✔
276
            };
8✔
277
            // All soma that bifurcate with the first parent having two children are considered
278
            // SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS
279
            const details::ThreePointSomaStatus status =
280
                details::checkNeuroMorphoSoma(points, soma_samples[0].diameter / 2);
8✔
281
            if (status != details::ThreePointSomaStatus::Conforms) {
8✔
NEW
282
                std::stringstream stream;
×
NEW
283
                stream << status;
×
NEW
284
                warning_handler_->emit(std::make_unique<SomaNonConform>(path_, stream.str()));
×
285
            }
286
            soma->type() = SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS;
8✔
287
            soma->points() = std::vector<Point>(points.begin(), points.end());
8✔
288
            soma->diameters() = {
8✔
289
                soma_samples[0].diameter,
8✔
290
                soma_samples[1].diameter,
8✔
291
                soma_samples[2].diameter,
8✔
292
            };
8✔
293
            return;
8✔
294
        }
295
        // might also have 3 points at this point, as well
296

297
        // a "normal" SWC soma
298
        soma->type() = SOMA_CYLINDERS;
14✔
299
        auto& points = soma->points();
14✔
300
        auto& diameters = soma->diameters();
14✔
301
        points.reserve(soma_samples.size());
14✔
302
        diameters.reserve(soma_samples.size());
14✔
303

304
        size_t parent_count = 0;
14✔
305
        for (const auto& s : soma_samples) {
56✔
306
            if (s.parentId == SWC_ROOT) {
44✔
307
                parent_count++;
16✔
308
            } else if (samples_.count(s.parentId) == 0) {
28✔
NEW
309
                details::ErrorMessages err_(path_);
×
310
                throw MissingParentError(
NEW
311
                    err_.ERROR_MISSING_PARENT(s.id, static_cast<int>(s.parentId), s.lineNumber));
×
312
            } else if (samples_.at(s.parentId).type != SECTION_SOMA) {
28✔
NEW
313
                details::ErrorMessages err_(path_);
×
NEW
314
                throw SomaError(err_.ERROR_SOMA_WITH_NEURITE_PARENT(s.lineNumber));
×
315
            }
316

317
            if (children_.count(s.id) > 0 && children_.at(s.id).size() > 1) {
44✔
318
                std::vector<SWCSample> soma_bifurcations;
20✔
319
                for (auto id : children_.at(s.id)) {
30✔
320
                    if (samples_[id].type == SECTION_SOMA && s.parentId != SWC_ROOT) {
20✔
321
                        soma_bifurcations.push_back(samples_[id]);
12✔
322
                    }
323
                }
324
                if (soma_bifurcations.size() > 1) {
10✔
325
                    details::ErrorMessages err_(path_);
4✔
326
                    throw SomaError(
327
                        err_.ERROR_SOMA_BIFURCATION(s.lineNumber,
4✔
328
                                                    gatherLineNumbers(soma_bifurcations)));
6✔
329
                }
330
            }
331
            points.push_back(s.point);
42✔
332
            diameters.push_back(s.diameter);
42✔
333
        }
334

335
        if (parent_count > 1) {
12✔
336
            details::ErrorMessages err_(path_);
4✔
337
            throw SomaError(err_.ERROR_MULTIPLE_SOMATA(gatherLineNumbers(soma_samples)));
2✔
338
        }
339
    }
340

341
    void buildSWC(const Samples& samples) {
82✔
342
        Samples soma_samples;
164✔
343
        Samples root_samples;
164✔
344

345
        for (const auto& sample: samples) {
756✔
346
            // { checks
347
            if (sample.diameter < morphio::epsilon) {
680✔
NEW
348
                details::ErrorMessages err_(path_);
×
NEW
349
                warning_handler_->emit(std::make_unique<ZeroDiameter>(path_, sample.lineNumber));
×
350
            }
351

352
            if (sample.parentId == sample.id) {
680✔
353
                details::ErrorMessages err_(path_);
4✔
354
                throw RawDataError(err_.ERROR_SELF_PARENT(sample.id));
2✔
355
            }
356

357
            if (sample.type >= SECTION_OUT_OF_RANGE_START || sample.type <= 0) {
678✔
358
                details::ErrorMessages err_(path_);
4✔
359
                throw RawDataError(
360
                    err_.ERROR_UNSUPPORTED_SECTION_TYPE(sample.lineNumber, sample.type));
2✔
361
            }
362
            if (sample.parentId == SWC_ROOT && sample.type != SECTION_SOMA) {
676✔
363
                warning_handler_->emit(
10✔
364
                    std::make_unique<DisconnectedNeurite>(path_, sample.lineNumber));
20✔
365
            }
366
            // } checks
367

368
            if (sample.type == SECTION_SOMA) {
676✔
369
                soma_samples.push_back(sample);
126✔
370
            }
371

372
            if (sample.parentId == SWC_ROOT || sample.type == SECTION_SOMA) {
676✔
373
                root_samples.push_back(sample);
136✔
374
            }
375

376
            if (!samples_.insert({sample.id, sample}).second) {
676✔
377
                const auto& original = samples_[sample.id];
2✔
378
                details::ErrorMessages err_(path_);
4✔
379
                throw RawDataError(err_.ERROR_REPEATED_ID(original.id, original.lineNumber, sample.id));
2✔
380
            }
381

382
            children_[sample.parentId].push_back(sample.id);
674✔
383
        }
384

385
        // can only check for missing parents once all samples are loaded
386
        // since it's possible there may be forward references
387
        for (const auto& sample: samples) {
742✔
388
            if(sample.parentId != SWC_ROOT && samples_.count(sample.parentId) == 0){
668✔
389
                details::ErrorMessages err_(path_);
4✔
390
                throw MissingParentError(err_.ERROR_MISSING_PARENT(
2✔
391
                    sample.id, static_cast<int>(sample.parentId), sample.lineNumber));
4✔
392
            }
393
        }
394

395
        build_soma(soma_samples);
74✔
396

397
        std::unordered_map<DeclaredID, std::shared_ptr<morphio::mut::Section>> declared_to_swc;
136✔
398
        declared_to_swc.reserve(samples.size());
68✔
399

400
        for (const SWCSample& root_sample : root_samples) {
180✔
401
            if (children_.count(root_sample.id) == 0) {
112✔
402
                continue;
28✔
403
            }
404

405
            // https://neuromorpho.org/SomaFormat.html
406
            // "The second and third soma points, as well as all starting points
407
            // (roots) of dendritic and axonal arbors have this first point as
408
            // the parent (parent ID 1)."
409
            if (morph_.soma()->type() == SOMA_NEUROMORPHO_THREE_POINT_CYLINDERS &&
84✔
410
                root_sample.type == SECTION_SOMA && root_sample.id != 1) {
84✔
NEW
411
                warning_handler_->emit(std::make_unique<WrongRootPoint>(
×
NEW
412
                    path_, std::vector<unsigned int>{root_sample.lineNumber}));
×
413
            }
414

415
            for (unsigned int child_id : children_.at(root_sample.id)) {
278✔
416
                if (samples_.at(child_id).type == SECTION_SOMA) {
198✔
417
                    continue;
42✔
418
                }
419
                if (root_sample.type == SECTION_SOMA) {
156✔
420
                    assembleSections(child_id,
304✔
421
                                     DeclaredID(root_sample.id),
152✔
422
                                     declared_to_swc,
423
                                     morph_.soma()->points()[0],
152✔
424
                                     morph_.soma()->diameters()[0],
152✔
425
                                     true);
426
                } else {
427
                    // this is neurite as the start
428
                    assembleSections(root_sample.id,
4✔
429
                                     DeclaredID(SWC_ROOT),
430
                                     declared_to_swc,
431
                                     root_sample.point,
4✔
432
                                     root_sample.diameter,
4✔
433
                                     true);
434
                    break;
4✔
435
                }
436
            }
437
        }
438
    }
68✔
439

440
    void assembleSections(
310✔
441
        unsigned int id,
442
        DeclaredID parent_id,
443
        std::unordered_map<DeclaredID, std::shared_ptr<morphio::mut::Section>>& declared_to_swc,
444
        const Point& start_point,
445
        floatType start_diameter,
446
        bool is_root)
447
    {
448
        Property::PointLevel properties;
620✔
449
        auto& points = properties._points;
310✔
450
        auto& diameters = properties._diameters;
310✔
451

452
        auto appendSection = [&](DeclaredID section_id_, DeclaredID parent_id_, SectionType starting_section_type) {
310✔
453
            std::shared_ptr<morphio::mut::Section> new_section;
310✔
454
            if (is_root) {
310✔
455
                new_section = morph_.appendRootSection(properties, starting_section_type);
156✔
456
            } else {
457
                new_section = declared_to_swc.at(parent_id_)
154✔
458
                                  ->appendSection(properties, starting_section_type);
154✔
459
            }
460
            declared_to_swc[section_id_] = new_section;
310✔
461
        };
310✔
462

463
        auto get_child_count = [&](unsigned int child_id) {
540✔
464
            return children_.count(child_id) == 0 ? 0 : children_.at(child_id).size();
540✔
465
        };
310✔
466

467
        const SWCSample* sample = &samples_.at(id);
310✔
468

469
        // create duplicate point if needed
470
        if (!is_root && sample->point != start_point /*|| sample->diameter != start_diameter */) {
310✔
471
        //if (!(is_root && sample->type == SECTION_SOMA) && sample->point != start_point /*|| sample->diameter != start_diameter */) {
472
            points.push_back(start_point);
154✔
473
            diameters.push_back(start_diameter);
154✔
474
        }
475

476
        // try and combine as many single samples into a single section as possible
477
        size_t children_count = get_child_count(id);
310✔
478
        while (children_count == 1) {
540✔
479
            sample = &samples_.at(id);
232✔
480
            if(sample->type != samples_.at(children_.at(id)[0]).type){
232✔
481
                break;
2✔
482
            }
483
            points.push_back(sample->point);
230✔
484
            diameters.push_back(sample->diameter);
230✔
485
            id = children_.at(id)[0];
230✔
486
            children_count = get_child_count(id);
230✔
487
        }
488

489
        sample = &samples_.at(id);
310✔
490
        points.push_back(sample->point);
310✔
491
        diameters.push_back(sample->diameter);
310✔
492
        appendSection(DeclaredID(id), parent_id, sample->type);
310✔
493

494
        if (children_count == 0) {
310✔
495
            // section was already appended above, nothing to do
496
        } else if (children_count == 1) {
78✔
497
            // section_type changed
498
            size_t offset = properties._points.size() - 1;
2✔
499
            const Point& new_start_point = properties._points[offset];
2✔
500
            floatType new_start_diameter = properties._diameters[offset];
2✔
501
            assembleSections(children_.at(id)[0], DeclaredID(id), declared_to_swc, new_start_point, new_start_diameter, false);
2✔
502
        } else {
503
            size_t offset = properties._points.size() - 1;
76✔
504
            const Point& new_start_point = properties._points[offset];
76✔
505
            floatType new_start_diameter = properties._diameters[offset];
76✔
506
            for (unsigned int child_id : children_.at(id)) {
228✔
507
                assembleSections(child_id,
152✔
508
                                 DeclaredID(id),
509
                                 declared_to_swc,
510
                                 new_start_point,
511
                                 new_start_diameter,
512
                                 false);
513
            }
514
        }
515
    }
310✔
516

517
    std::unordered_map<unsigned int, std::vector<unsigned int>> children_;
518
    std::unordered_map<unsigned int, SWCSample> samples_;
519
    mut::Morphology morph_;
520
    std::string path_;
521
    WarningHandler* warning_handler_;
522
};
523

524

525
}  // namespace details
526

527
namespace readers {
528
namespace swc {
529
Property::Properties load(const std::string& path,
94✔
530
                          const std::string& contents,
531
                          unsigned int options,
532
                          std::shared_ptr<WarningHandler>& warning_handler) {
533
    auto properties =
534
        details::SWCBuilder(path, warning_handler.get()).buildProperties(contents, options);
214✔
535

536
    properties._cellLevel._cellFamily = NEURON;
68✔
537
    properties._cellLevel._version = {"swc", 1, 0};
68✔
538
    return properties;
68✔
539
}
540

541
}  // namespace swc
542
}  // namespace readers
543
}  // namespace morphio
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc