• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

griffithlab / regtools / 3689436247

pending completion
3689436247

push

github

GitHub
Merge pull request #169 from griffithlab/doc_update

2407 of 2774 relevant lines covered (86.77%)

3662.66 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

70.05
/src/junctions/junctions_extractor.cc
1
/*  junctions_extractor.h -- Declarations for `junctions extract` command
2

3
    Copyright (c) 2015, The Griffith Lab
4

5
    Author: Avinash Ramu <aramu@genome.wustl.edu>
6

7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13

14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16

17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24

25
#include <getopt.h>
26
#include <iostream>
27
#include <iomanip>
28
#include <sstream>
29
#include <stdexcept>
30
#include "common.h"
31
#include "junctions_extractor.h"
32
#include "htslib/sam.h"
33
#include "htslib/hts.h"
34
#include "htslib/faidx.h"
35
#include "htslib/kstring.h"
36
#include <unordered_map>
37
#include <unordered_set>
38

39
using namespace std;
40

41
//Parse the options passed to this tool
42
int JunctionsExtractor::parse_options(int argc, char *argv[]) {
13✔
43
    optind = 1; //Reset before parsing again.
13✔
44
    int c;
45
    stringstream help_ss;
13✔
46
    while((c = getopt(argc, argv, "ha:m:M:o:r:t:s:b:")) != -1) {
36✔
47
        switch(c) {
25✔
48
            case 'h':
1✔
49
                usage(help_ss);
1✔
50
                throw common::cmdline_help_exception(help_ss.str());
1✔
51
            case 'a':
2✔
52
                min_anchor_length_ = atoi(optarg);
2✔
53
                break;
2✔
54
            case 'm':
1✔
55
                min_intron_length_ = atoi(optarg);
1✔
56
                break;
1✔
57
            case 'M':
1✔
58
                max_intron_length_ = atoi(optarg);
1✔
59
                break;
1✔
60
            case 'o':
9✔
61
                output_file_ = string(optarg);
9✔
62
                break;
9✔
63
            case 'r':
1✔
64
                region_ = string(optarg);
1✔
65
                break;
1✔
66
            case 't':
×
67
                strand_tag_ = string(optarg);
×
68
                break;
×
69
            case 's':
9✔
70
                if (string(optarg).compare("XS") == 0){
9✔
71
                    strandness_ = 0;
7✔
72
                } else if (string(optarg).compare("RF") == 0) {
2✔
73
                    strandness_ = 1;
2✔
74
                } else if (string(optarg).compare("FR") == 0) {
×
75
                    strandness_ = 2;
×
76
                } else if (string(optarg).compare("intron-motif") == 0) {
×
77
                    strandness_ = 3;
×
78
                } else {
79
                    throw runtime_error("Unrecognized strandness argument!\n\n");
×
80
                }
81
                break;
9✔
82
            case 'b':
×
83
                output_barcodes_file_ = string(optarg);
×
84
                break;
×
85
            case '?':
1✔
86
            default:
87
                usage();
1✔
88
                throw runtime_error("Error parsing inputs!(1)\n\n");
1✔
89
        }
90
    }
91
    if(argc - optind >= 1) {
11✔
92
        bam_ = string(argv[optind++]);
8✔
93
    }
94
    if(argc - optind >= 1) {
11✔
95
        ref_ = string(argv[optind++]);
×
96
    }
97
    if(optind < argc || bam_ == "NA") {
11✔
98
        usage();
3✔
99
        throw runtime_error("Error parsing inputs!(2)\n\n");
3✔
100
    }
101
    if(strandness_ == -1){
8✔
102
        usage();
×
103
        throw runtime_error("Please supply strandness mode with '-s' option!\n\n");
×
104
    }
105
    if(strandness_ == 3){
8✔
106
        if (ref_ == "NA"){
×
107
            usage();
×
108
            throw runtime_error("Strandness mode 'intron-motif' requires a fasta file!\n\n");
×
109
        }
110
    }
111

112
    cerr << "Minimum junction anchor length: " << min_anchor_length_ << endl;
8✔
113
    cerr << "Minimum intron length: " << min_intron_length_ << endl;
8✔
114
    cerr << "Maximum intron length: " << max_intron_length_ << endl;
8✔
115
    cerr << "Alignment: " << bam_ << endl;
8✔
116
    cerr << "Output file: " << output_file_ << endl;
8✔
117
    if (output_barcodes_file_ != "NA"){
8✔
118
        cerr << "Barcode file: " << output_barcodes_file_ << endl;
×
119
    }
120
    cerr << endl;
8✔
121
    return 0;
8✔
122
}
13✔
123

124
//Usage statement for this tool
125
int JunctionsExtractor::usage(ostream& out) {
5✔
126
    out << "Usage:" 
127
        << "\t\t" << "regtools junctions extract [options] indexed_alignments.bam" << endl;
5✔
128
    out << "Options:" << endl;
5✔
129
    out << "\t\t" << "-a INT\tMinimum anchor length. Junctions which satisfy a minimum \n"
130
        << "\t\t\t " << "anchor length on both sides are reported. [8]" << endl;
5✔
131
    out << "\t\t" << "-m INT\tMinimum intron length. [70]" << endl;
5✔
132
    out << "\t\t" << "-M INT\tMaximum intron length. [500000]" << endl;
5✔
133
    out << "\t\t" << "-o FILE\tThe file to write output to. [STDOUT]" << endl;
5✔
134
    out << "\t\t" << "-r STR\tThe region to identify junctions \n"
135
        << "\t\t\t " << "in \"chr:start-end\" format. Entire BAM by default." << endl;
5✔
136
    out << "\t\t" << "-s INT\tStrandness mode \n"
137
        << "\t\t\t " << "XS, use XS tags provided by aligner; RF, first-strand; FR, second-strand. REQUIRED" << endl;
5✔
138
    out << "\t\t" << "-t STR\tTag used in bam to label strand. [XS]" << endl;
5✔
139
        
140
    out << endl;
5✔
141
    return 0;
5✔
142
}
143

144
//Get the BAM filename
145
string JunctionsExtractor::get_bam() {
1✔
146
    return bam_;
1✔
147
}
148

149
//Name the junction based on the number of junctions
150
// in the map.
151
string JunctionsExtractor::get_new_junction_name() {
203✔
152
    int index = junctions_.size() + 1;
203✔
153
    stringstream name_ss;
203✔
154
    name_ss << "JUNC" << setfill('0') << setw(8) << index;
203✔
155
    return name_ss.str();
406✔
156
}
203✔
157

158
//Do some basic qc on the junction
159
bool JunctionsExtractor::junction_qc(Junction &j1) {
77,421✔
160
    if(j1.end - j1.start < min_intron_length_ ||
77,421✔
161
       j1.end - j1.start > max_intron_length_) {
63,237✔
162
        return false;
15,084✔
163
    }
164
    if(j1.start - j1.thick_start >= min_anchor_length_)
62,337✔
165
        j1.has_left_min_anchor = true;
48,793✔
166
    if(j1.thick_end - j1.end >= min_anchor_length_)
62,337✔
167
        j1.has_right_min_anchor = true;
52,342✔
168
    return true;
62,337✔
169
}
170

171
//Add a junction to the junctions map
172
//The read_count field is the number of reads supporting the junction.
173
int JunctionsExtractor::add_junction(Junction j1) {
77,421✔
174
    //Check junction_qc
175
    if(!junction_qc(j1)) {
77,421✔
176
        return 0;
15,084✔
177
    }
178

179
    //Construct key chr:start-end:strand
180
    stringstream s1;
62,337✔
181
    string start, end;
62,337✔
182
    s1 << j1.start; start = s1.str();
62,337✔
183
    s1 << j1.end; end = s1.str();
62,337✔
184
    //since ?,+,- sort differently on different systems
185
    string strand_proxy;
62,337✔
186
    if(j1.strand == "+") {
62,337✔
187
        strand_proxy = "0";
45,743✔
188
    } else if(j1.strand == "-") {
16,594✔
189
        strand_proxy = "1";
13,932✔
190
    } else {
191
        strand_proxy = "2";
2,662✔
192
    }
193
    string key = j1.chrom + string(":") + start + "-" + end + ":" + strand_proxy;
124,674✔
194

195
    //Check if new junction
196
    if(!junctions_.count(key)) {
62,337✔
197
        j1.name = get_new_junction_name();
201✔
198
        j1.read_count = 1;
201✔
199
        j1.score = common::num_to_str(j1.read_count);
201✔
200
    } else { //existing junction
201
        Junction j0 = junctions_[key];
62,136✔
202
        
203
        if (output_barcodes_file_ != "NA"){
62,136✔
204
            unordered_map<string, int>::const_iterator it = j0.barcodes.find(j1.barcodes.begin()->first);
×
205
            
206
            if (it != j0.barcodes.end()) {// barcode exists already
×
207
                j1.barcodes = j0.barcodes;
×
208
                j1.barcodes[it->first]++;
×
209
            } else {
210
                // this block is where the slowness happens - not sure if it's the instantiation or the insertion
211
                //  well, tried to get around instantion by just inserting into j0 but that made it like another 2x slower so I don't think it's that
212
                pair<string, int> tmp_barcode = *j1.barcodes.begin();
×
213
                j1.barcodes = j0.barcodes;
×
214
                j1.barcodes.insert(tmp_barcode);
×
215
            }
×
216
        }
217
        //increment read count
218
        j1.read_count = j0.read_count + 1;
62,136✔
219
        j1.score = common::num_to_str(j1.read_count);
62,136✔
220
        //Keep the same name
221
        j1.name = j0.name;
62,136✔
222
        //Check if thick starts are any better
223
        if(j0.thick_start < j1.thick_start)
62,136✔
224
            j1.thick_start = j0.thick_start;
57,023✔
225
        if(j0.thick_end > j1.thick_end)
62,136✔
226
            j1.thick_end = j0.thick_end;
12,558✔
227
        //preserve min anchor information
228
        j1.has_left_min_anchor = j1.has_left_min_anchor || j0.has_left_min_anchor;
62,136✔
229
        j1.has_right_min_anchor = j1.has_right_min_anchor || j0.has_right_min_anchor;
62,136✔
230
    }
62,136✔
231
    //Add junction and check anchor while printing.
232
    junctions_[key] = j1;
62,337✔
233
    return 0;
62,337✔
234
}
62,337✔
235

236
//Print all the junctions - this function needs work
237
vector<Junction> JunctionsExtractor::get_all_junctions() {
142✔
238
    //Sort junctions by position
239
    if(!junctions_sorted_) {
142✔
240
        create_junctions_vector();
142✔
241
        sort_junctions(junctions_vector_);
142✔
242
        junctions_sorted_ = true;
142✔
243
    }
244
    return junctions_vector_;
142✔
245
}
246

247
//Print all the junctions - this function needs work
248
void JunctionsExtractor::print_all_junctions(ostream& out) {
7✔
249
    ofstream fout;
7✔
250
    ofstream fout_barcodes;
7✔
251
    if(output_file_ != string("NA")) {
7✔
252
        fout.open(output_file_.c_str());
6✔
253
    }
254
    if(output_barcodes_file_!= string("NA")) {
7✔
255
        fout_barcodes.open(output_barcodes_file_.c_str());
×
256
    }
257
    //Sort junctions by position
258
    if(!junctions_sorted_) {
7✔
259
        create_junctions_vector();
7✔
260
        sort_junctions(junctions_vector_);
7✔
261
        junctions_sorted_ = true;
7✔
262
    }
263
    for(vector<Junction> :: iterator it = junctions_vector_.begin();
7✔
264
        it != junctions_vector_.end(); it++) {
173✔
265
        Junction j1 = *it;
166✔
266
        if(j1.has_left_min_anchor && j1.has_right_min_anchor) {
166✔
267
            if(fout.is_open())
146✔
268
                j1.print(fout);
143✔
269
            else
270
                j1.print(out);
3✔
271
            if(fout_barcodes.is_open())
146✔
272
                j1.print_barcodes(fout_barcodes);
×
273
        }
274
    }
166✔
275
    if(fout.is_open())
7✔
276
        fout.close();
6✔
277
    if(fout_barcodes.is_open())
7✔
278
        fout_barcodes.close();
×
279
}
7✔
280

281
//Get the strand from the XS aux tag
282
void JunctionsExtractor::set_junction_strand_XS(bam1_t *aln, Junction& j1) {
47,235✔
283
    uint8_t *p = bam_aux_get(aln, strand_tag_.c_str());
47,235✔
284
    if(p != NULL) {
47,235✔
285
        char strand = bam_aux2A(p);
47,235✔
286
        strand ? j1.strand = string(1, strand) : j1.strand = string(1, '?');
47,235✔
287
        //cerr <<"XS strand is " << strand << endl;
288
    } else {
289
        //cerr <<"XS strand is NULL" << endl;
290
        j1.strand = string(1, '?');
×
291
        return;
×
292
    }
293
}
294

295
//Get the strand from the bitwise flag
296
void JunctionsExtractor::set_junction_strand_flag(bam1_t *aln, Junction& j1) {
30,180✔
297
    uint32_t flag = (aln->core).flag;
30,180✔
298
    int reversed = (flag >> 4) % 2;
30,180✔
299
    int mate_reversed = (flag >> 5) % 2;
30,180✔
300
    int first_in_pair = (flag >> 6) % 2;
30,180✔
301
    int second_in_pair = (flag >> 7) % 2;
30,180✔
302
    // strandness_ is 1 for RF, and 2 for FR
303
    int bool_strandness = strandness_ - 1;
30,180✔
304
    int first_strand = !bool_strandness ^ first_in_pair ^ reversed;
30,180✔
305
    int second_strand = !bool_strandness ^ second_in_pair ^ mate_reversed;
30,180✔
306
    char strand;
307
    if (first_strand){
30,180✔
308
        strand = '+';
15,022✔
309
    } else {
310
        strand = '-';
15,158✔
311
    }
312
    //cerr << "flag is " << flag << endl;
313
    // if strand inferences from first and second in pair don't agree, we've got a problem
314
    if (first_strand == second_strand){
30,180✔
315
        j1.strand = string(1, strand);
27,518✔
316
    } else {
317
        j1.strand = string(1, '?');
2,662✔
318
    }
319
    //cerr <<"flag strand is " << j1.strand << endl;
320
    return;
30,180✔
321
}
322

323
//Get strand based on splice-site/intron motif
324
void JunctionsExtractor::set_junction_strand_intron_motif(string intron_motif, Junction& j1) {
×
325
    unordered_set<string> plus_motifs;
×
326
    plus_motifs.insert("GT-AG");
×
327
    plus_motifs.insert("GC-AG");
×
328
    plus_motifs.insert("AT-AC");
×
329
    unordered_set<string> minus_motifs;
×
330
    minus_motifs.insert("CT-AC");
×
331
    minus_motifs.insert("CT-GC");
×
332
    minus_motifs.insert("GT-AT");
×
333

334
    if (plus_motifs.find(intron_motif) != plus_motifs.end()){
×
335
        j1.strand = string(1, '+');
×
336
    } else if (minus_motifs.find(intron_motif) != minus_motifs.end()){
×
337
        j1.strand = string(1, '-');
×
338
    } else {
339
        j1.strand = string(1, '?');
×
340
    }
341
}
×
342

343
//Get the strand
344
void JunctionsExtractor::set_junction_strand(bam1_t *aln, Junction& j1, string intron_motif) {
77,415✔
345
    // if fasta was supplied
346
    if (ref_ != "NA"){
77,415✔
347
        set_junction_strand_intron_motif(intron_motif, j1);
×
348
    }
349
    // if you supplied extra strand information, try to override ?
350
    if (ref_ == "NA" || j1.strand.compare("?") == 0){
77,415✔
351
        if (strandness_ == 0){
77,415✔
352
            set_junction_strand_XS(aln, j1);
47,235✔
353
        } else {
354
            set_junction_strand_flag(aln, j1);
30,180✔
355
        }
356
    }
357
    return;
77,415✔
358
}
359

360
//Get the the barcode
361
void JunctionsExtractor::set_junction_barcode(bam1_t *aln, Junction& j1) {
×
362
    uint8_t *p = bam_aux_get(aln, barcode_tag_.c_str());
×
363
    if(p != NULL) {
×
364
        char *barcode_ptr = bam_aux2Z(p);
×
365
        string barcode (barcode_ptr);
×
366
        // cerr << barcode << " DEBUGGING" << endl;
367
        j1.barcodes.insert(pair<string, int>(barcode,1));
×
368
    } else {
×
369
        j1.barcodes.insert(pair<string, int>(string(1, '?'),1));
×
370
        cerr << "WARNING: No CB tag found for alignment (id = " << to_string(aln->id) << ")" << endl;
×
371
        return;
×
372
    }
373
}
374

375
//Parse junctions from the read and store in junction map
376
int JunctionsExtractor::parse_alignment_into_junctions(bam_hdr_t *header, bam1_t *aln) {
161,902✔
377
    int n_cigar = aln->core.n_cigar;
161,902✔
378
    if (n_cigar <= 1) // max one cigar operation exists(likely all matches)
161,902✔
379
        return 0;
89,380✔
380

381
    int chr_id = aln->core.tid;
72,522✔
382
    int read_pos = aln->core.pos;
72,522✔
383
    string chr(header->target_name[chr_id]);
72,522✔
384
    uint32_t *cigar = bam_get_cigar(aln);
72,522✔
385

386
    Junction j1;
72,522✔
387
    j1.chrom = chr;
72,522✔
388
    j1.start = read_pos; //maintain start pos of junction
72,522✔
389
    j1.thick_start = read_pos;
72,522✔
390
    string intron_motif;
72,522✔
391
    
392
    if (output_barcodes_file_ != "NA"){
72,522✔
393
        set_junction_barcode(aln, j1);
×
394
    }
395
    bool started_junction = false;
72,522✔
396
    for (int i = 0; i < n_cigar; ++i) {
302,092✔
397
        char op =
229,570✔
398
               bam_cigar_opchr(cigar[i]);
229,570✔
399
        int len =
229,570✔
400
               bam_cigar_oplen(cigar[i]);
229,570✔
401
        switch(op) {
229,570✔
402
            case 'N':
77,415✔
403
                if(!started_junction) {
77,415✔
404
                    j1.end = j1.start + len;
71,770✔
405
                    j1.thick_end = j1.end;
71,770✔
406
                    //Start the first one and remains started
407
                    started_junction = true;
71,770✔
408
                } else {
409
                    //Add the previous junction
410
                    try {
411
                        if (ref_ != "NA"){
5,645✔
412
                            intron_motif = get_splice_site(j1);
×
413
                        }
414
                        set_junction_strand(aln, j1, intron_motif);
5,645✔
415
                        add_junction(j1);
5,645✔
416
                    } catch (const std::logic_error& e) {
×
417
                        cout << e.what() << '\n';
×
418
                    }
×
419
                    j1.thick_start = j1.end;
5,645✔
420
                    j1.start = j1.thick_end;
5,645✔
421
                    j1.end = j1.start + len;
5,645✔
422
                    j1.thick_end = j1.end;
5,645✔
423
                    //For clarity - the next junction is now open
424
                    started_junction = true;
5,645✔
425
                    // YYF reset intron_motif? I guess theoretically it doesn't matter since N is the only case where it will/must be changed
426
                }
427
                break;
77,415✔
428
            case '=':
151,046✔
429
            case 'M':
430
                if(!started_junction)
151,046✔
431
                    j1.start += len;
73,631✔
432
                else
433
                    j1.thick_end += len;
77,415✔
434
                break;
151,046✔
435
            //No mismatches allowed in anchor
436
            case 'D':
577✔
437
            case 'X':
438
                if(!started_junction) {
577✔
439
                    j1.start += len;
443✔
440
                    j1.thick_start = j1.start;
443✔
441
                } else {
442
                    try {
443
                        if (ref_ != "NA"){
134✔
444
                            intron_motif = get_splice_site(j1);
×
445
                        }
446
                        set_junction_strand(aln, j1, intron_motif);
134✔
447
                        add_junction(j1);
134✔
448
                    } catch (const std::logic_error& e) {
×
449
                        cout << e.what() << '\n';
×
450
                    }
×
451
                    //Don't include these in the next anchor
452
                    j1.start = j1.thick_end + len;
134✔
453
                    j1.thick_start = j1.start;
134✔
454
                }
455
                started_junction = false;
577✔
456
                break;
577✔
457
            case 'I':
532✔
458
            case 'S':
459
                if(!started_junction)
532✔
460
                    j1.thick_start = j1.start;
491✔
461
                else {
462
                    try {
463
                        if (ref_ != "NA"){
41✔
464
                            intron_motif = get_splice_site(j1);
×
465
                        }
466
                        set_junction_strand(aln, j1, intron_motif);
41✔
467
                        add_junction(j1);
41✔
468
                    } catch (const std::logic_error& e) {
×
469
                        cout << e.what() << '\n';
×
470
                    }
×
471
                    //Don't include these in the next anchor
472
                    j1.start = j1.thick_end;
41✔
473
                    j1.thick_start = j1.start;
41✔
474
                }
475
                started_junction = false;
532✔
476
                break;
532✔
477
            case 'H':
×
478
                break;
×
479
            default:
×
480
                cerr << "Unknown cigar " << op;
×
481
                break;
×
482
        }
483
    }
484
    if(started_junction) {
72,522✔
485
        try {
486
            if (ref_ != "NA"){
71,595✔
487
                intron_motif = get_splice_site(j1);
×
488
            }
489
            set_junction_strand(aln, j1, intron_motif);
71,595✔
490
            add_junction(j1);
71,595✔
491
        } catch (const std::logic_error& e) {
×
492
            cout << e.what() << '\n';
×
493
        }
×
494
    }
495
    return 0;
72,522✔
496
}
72,522✔
497

498
//The workhorse - identifies junctions from BAM
499
int JunctionsExtractor::identify_junctions_from_BAM() {
149✔
500
    if(!bam_.empty()) {
149✔
501
        //open BAM for reading
502
        samFile *in = sam_open(bam_.c_str(), "r");
149✔
503
        if(in == NULL) {
149✔
504
            throw runtime_error("Unable to open BAM/SAM file.\n\n");
1✔
505
        }
506
        //Load the index
507
        hts_idx_t *idx = sam_index_load(in, bam_.c_str());
148✔
508
        if(idx == NULL) {
148✔
509
            throw runtime_error("Unable to open BAM/SAM index."
×
510
                                " Make sure alignments are indexed\n\n");
×
511
        }
512
        //Get the header
513
        bam_hdr_t *header = sam_hdr_read(in);
148✔
514
        //Initialize iterator
515
        hts_itr_t *iter = NULL;
148✔
516
        //Move the iterator to the region we are interested in
517
        iter  = sam_itr_querys(idx, header, region_.c_str());
148✔
518
        if(header == NULL || iter == NULL) {
148✔
519
            sam_close(in);
×
520
            throw runtime_error("Unable to iterate to region within BAM.\n\n");
×
521
        }
522
        //Initiate the alignment record
523
        bam1_t *aln = bam_init1();
148✔
524
        while(sam_itr_next(in, iter, aln) >= 0) {
162,050✔
525
            parse_alignment_into_junctions(header, aln);
161,902✔
526
        }
527
        hts_itr_destroy(iter);
148✔
528
        hts_idx_destroy(idx);
148✔
529
        bam_destroy1(aln);
148✔
530
        bam_hdr_destroy(header);
148✔
531
        sam_close(in);
148✔
532
    }
533
    return 0;
148✔
534
}
535

536
//Create the junctions vector from the map
537
void JunctionsExtractor::create_junctions_vector() {
149✔
538
    for(map<string, Junction> :: iterator it = junctions_.begin();
149✔
539
        it != junctions_.end(); it++) {
349✔
540
        Junction j1 = it->second;
200✔
541
        junctions_vector_.push_back(j1);
200✔
542
    }
200✔
543
}
149✔
544

545
// I'm stealing these from JunctionsAnnotator - so there's some code redundancy - YYF
546
//Get the reference sequence at a particular coordinate
547
string JunctionsExtractor::get_reference_sequence(string position) {
×
548
    int len;
549
    faidx_t *fai = fai_load(ref_.c_str());
×
550
    char *s = fai_fetch(fai, position.c_str(), &len);
×
551
    if(s == NULL)
×
552
        throw runtime_error("Unable to extract FASTA sequence "
×
553
                             "for position " + position + "\n\n");
×
554
    std::string seq(s);
×
555
    free(s);
×
556
    fai_destroy(fai);
×
557
    return seq;
×
558
}
×
559

560
//Get the splice_site bases
561
// note this was basically just taken from junctions annotator but now line is a Junction not an AnnotatedJunction
562
//  so the end is off by 1 and I'm returning a string since i will just be checking it and then tossing/not saving as a member
563
string JunctionsExtractor::get_splice_site(Junction & line) {
×
564
    string position1 = line.chrom + ":" +
×
565
                      common::num_to_str(line.start + 1) + "-" + common::num_to_str(line.start + 1 + 1);
×
566
    string position2 = line.chrom + ":" +
×
567
                      common::num_to_str(line.end + 1 - 2 ) + "-" + common::num_to_str(line.end + 1 - 1);
×
568
    string seq1, seq2, splice_site;
×
569
    try {
570
        seq1 = get_reference_sequence(position1);
×
571
        seq2 = get_reference_sequence(position2);
×
572
    } catch (const runtime_error& e) {
×
573
        throw e;
×
574
    }
×
575
    if(line.strand == "-") {
×
576
        seq1 = common::rev_comp(seq1);
×
577
        seq2 = common::rev_comp(seq2);
×
578
        splice_site = seq2 + "-" + seq1;
×
579
    } else {
580
        splice_site = seq1 + "-" + seq2;
×
581
    }
582
    return splice_site;
×
583
}
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2026 Coveralls, Inc