• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / fold-grammars / 13565741064

27 Feb 2025 11:52AM UTC coverage: 36.296% (+36.3%) from 0.0%
13565741064

Pull #90

github

web-flow
Merge c9a78c7c6 into 0ad54ba86
Pull Request #90: Rnahybrid add targetbreaking

334 of 367 new or added lines in 8 files covered. (91.01%)

1454 of 4006 relevant lines covered (36.3%)

0.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

30.3
/Misc/Applications/RNAhybrid/RNAhybrid.py
1
from math import log, exp
1✔
2
import os
1✔
3
import click
1✔
4
import click_option_group
1✔
5
from hashlib import md5
1✔
6
from multiprocessing import Pool, cpu_count
1✔
7

8
from parse_gapc import *
1✔
9
from execute import *
1✔
10
from tempfile import gettempdir
1✔
11
from input import read_CT_file, disentangle_knots, get_minimal_valid_substructure, nested_pairs_to_dotBracket, get_left_right_positions
1✔
12
from output import *
1✔
13
import pickle
1✔
14

15
PROGNAME = 'RNAhybrid'
1✔
16
# values taken from RNAhybrid-2.1.2/src/globals.h
17
DISTRIBUTION = {
1✔
18
    '3utr_fly': {
19
        'xi_slope': -0.03144,
20
        'xi_intercept': 0.7201,
21
        'theta_slope': -0.003429,
22
        'theta_intercept': 0.01634},
23
    '3utr_human':{
24
        'xi_slope': -0.01237,
25
        'xi_intercept': 1.901,
26
        'theta_slope': -0.001678,
27
        'theta_intercept': 0.1349},
28
    '3utr_worm':{
29
        'xi_slope': -0.01074,
30
        'xi_intercept': 1.769,
31
        'theta_slope': -0.001154,
32
        'theta_intercept': 0.1419}}
33

34
def wrap_process(all_data):
1✔
35
    return process_onetarget_onemirna(*all_data)
×
36

37
def process_eval(sequence, dotBracket, verbose, cache, settings):
1✔
38
    # settings for eval binary call
NEW
39
    eval_settings = settings.copy()
×
NEW
40
    eval_settings.update({'allow_lonelypairs': True})
×
41

NEW
42
    cmd_eval = compose_call('eval', 'microstate', sequence, None, inp_structure=dotBracket, **eval_settings)
×
NEW
43
    raw_eval = cache_execute(cmd_eval, cache, '.rnaeval', verbose)
×
NEW
44
    res_eval = Product(TypeMFE()).parse_lines(raw_eval)
×
45

NEW
46
    return res_eval[0]['mfe']
×
47

48
def process_onetarget_onemirna(entry_target, pos_target, entry_mirna, pos_mirna, mdes, distribution, pretrained_set, verbose, cache, settings):
1✔
49
    settings['xi'] = 0
×
50
    settings['theta'] = 0
×
51

NEW
52
    if not distribution and pretrained_set:
×
53
        # estimate evd parameters from maximal duplex energy
NEW
54
        settings['xi'] = DISTRIBUTION[pretrained_set]['xi_slope'] * mdes[entry_mirna[0]] + DISTRIBUTION[pretrained_set]['xi_intercept']
×
NEW
55
        settings['theta'] = DISTRIBUTION[pretrained_set]['theta_slope'] * mdes[entry_mirna[0]] + DISTRIBUTION[pretrained_set]['theta_intercept']
×
56

57
    cmd_hybrid = compose_call('khorshid', 'rnahybrid', entry_target[1], entry_mirna[1], **settings)
×
NEW
58
    raw_hybrid = cache_execute(cmd_hybrid, cache, '.rnahybrid', verbose)
×
59

60
    #res_stacklen = Product(Product(TypeInt('stacklen'), TypeMFE()), TypeHybrid()).parse_lines(raw_hybrid)
61
    res_stacklen = Product(Product(TypeKhorshid(), TypeMFE()), TypeHybrid()).parse_lines(raw_hybrid)
×
62
    # add original positions for each answer of
63
    #   a) target sequence position in input file
64
    #   b) mirna sequence position in input file
65
    #   c) position in gapc raw result
NEW
66
    target_structure = None
×
NEW
67
    if (len(entry_target) >= 3) and (entry_target[2] is not None):
×
68
        # If user provides ONE given secondary structure for target sequence,
69
        # we get this set of pairs as the third component of the entry_target tuple.
NEW
70
        target_structure = entry_target[2]
×
71

72
    for pos_answer, answer in enumerate(res_stacklen):
×
73
        answer['pos_target'] = pos_target
×
74
        answer['pos_mirna'] = pos_mirna
×
75
        answer['pos_answer'] = pos_answer
×
76

77
        answer['target_name'] = entry_target[0]
×
78
        answer['target_length'] = len(entry_target[1])
×
79
        answer['mirna_name'] = entry_mirna[0]
×
80
        answer['mirna_length'] = len(entry_mirna[1])
×
81

82
        # calculate p-value for answer
83
        normalized_energy = (answer['mfe'] / 100) / log(answer['target_length'] * answer['mirna_length'])
×
84
        answer['p-value'] = 1 - exp(-1 * exp(-1 * ((-1 * normalized_energy - settings['xi']) / settings['theta'])))
×
85

86
        # calculate the free energy that is lost if existing base-pairs in the target structure have to be broken to enable new pairs with miRNA
NEW
87
        if target_structure is not None:
×
88
            # get list of target bases involved in novel miRNA binding
NEW
89
            novel_target_pairing_partners = [int(x) for x in answer['listOfTargetPartners'].split(',') if x != ""]
×
90
            # obtain original pairing partners in target structure
NEW
91
            original_target_pairs = {o: c for o, c in {x: target_structure.get(x, -1) for x in novel_target_pairing_partners}.items() if c != -1}
×
NEW
92
            if len(original_target_pairs) > 0:
×
93
                # extend set of target pairs to valid sub-structure
NEW
94
                sub_structure = get_minimal_valid_substructure(target_structure, original_target_pairs)
×
95
                # sort involved base-pairs for extended sub-structure to enable easy access to extended left and right limits
NEW
96
                (left, right) = get_left_right_positions(sub_structure)
×
NEW
97
                sub_sequence = entry_target[1][left-1:right]
×
98
                # free energy of affected target sub-structure
NEW
99
                answer['original_target_substructure_energy'] = process_eval(sub_sequence, nested_pairs_to_dotBracket(sub_structure), verbose, cache, settings)
×
100
                # free energy of affected target sub-structure, when existing base-pairs are broken to enable miRNA binding
NEW
101
                answer['broken_target_substructure_energy'] = process_eval(sub_sequence, nested_pairs_to_dotBracket(sub_structure, novel_target_pairing_partners), verbose, cache, settings)
×
102

103
    return res_stacklen
×
104

105

106
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
1✔
107
@click_option_group.optgroup.group(
1✔
108
    'Target Input', cls=click_option_group.RequiredMutuallyExclusiveOptionGroup)
109
@click_option_group.optgroup.option(
1✔
110
    '-t', '--target', type=str, help='An plain (i.e. no header/name) RNA sequence to be searched for miRNA targets.')
111
@click_option_group.optgroup.option(
1✔
112
    '-tf', '--target_file', type=click.Path(exists=True, dir_okay=False), help='Read one or multiple target RNA sequences from a FASTA file.')
113
@click_option_group.optgroup.option(
1✔
114
    '-tct', '--target_CT_file', type=click.Path(exists=True, dir_okay=False), help='Read a target sequence/structure pair from a CT file and relate hybrid energies to energy necessary to break given target structure.')
115
@click_option_group.optgroup.group(
1✔
116
    'miRNA Input', cls=click_option_group.RequiredMutuallyExclusiveOptionGroup)
117
@click_option_group.optgroup.option(
1✔
118
    '-q', '--mirna', type=str, help='A plain (i.e. no header/name) microRNA sequence that shall bind to a target sequence.')
119
@click_option_group.optgroup.option(
1✔
120
    '-qf', '--mirna_file', type=click.Path(exists=True, dir_okay=False), help='Read one or multiple miRNA sequences from a FASTA file.')
121
@click_option_group.optgroup.group(
1✔
122
    'p-value calculation', cls=click_option_group.RequiredMutuallyExclusiveOptionGroup)
123
@click_option_group.optgroup.option(
1✔
124
    '-s', '--pretrained_set', type=click.Choice(['3utr_fly','3utr_worm','3utr_human']), help='A pre-trained set of organism specific values.')
125
@click_option_group.optgroup.option(
1✔
126
    '-d', '--distribution', nargs=2, type=float, help='<xi> and <theta> are the position and shape parameters, respectively, of the extreme value distribution assumed for p-value calculation. For example enter "0.91 0.25" without the quotes.')
127
@click.option(
1✔
128
    '--binPath', type=str, default="", help='%s expects that according Bellman\'s GAP compiled binaries are located in the same directory as the Python wrapper is. Should you moved them into another directory, you must set --binPath to this new location!' % PROGNAME)
129
@click.option(
1✔
130
    '--filter-minmax-seedlength', type=int, nargs=2, default=(0,9999), help='Minimal and maximal number of base-pairs in the seed helix.')
131
@click.option(
1✔
132
    '--filter-minmax-mirnabulgelength', type=int, nargs=2, default=(0,9999), help='Minimal and maximal number of bulged bases in microRNA directly after seed helix.')
133
@click.option(
1✔
134
    '--filter-max-energy', type=int, nargs=1, default=0, help='Maximal free energy (not that stable energies are negative)')
135
@click.option(
1✔
136
    '--filter-max-pvalue', type=click.FloatRange(0, 1), default=1, help='Maximal p-value (the smaller the less likely the result is due to rare chance)')
137
@click.option(
1✔
138
    '--verbose/--no-verbose', type=bool, default=False, help="Print verbose messages.")
139
@click.option(
1✔
140
    '--cache/--no-cache', type=bool, default=False, help="Cache the last execution and reuse if input does not change. Will store files to '%s'" % gettempdir())
141
@click.option(
1✔
142
    '--stream-output/--no-stream-output', type=bool, default=False, help="Internally, computation is done per target per miRNA. By default, results are reported once ALL computations are done. With this setting you can make %s print results as soon as they are available - with the downside that ordering might be a bit more unintuitive." % (PROGNAME))
143
@click.option(
1✔
144
    "--num-cpus", type=click.IntRange(1, cpu_count()), default=1, help="Number of CPU-cores to use. Default is 1, i.e. single-threaded. Note that --stream-output cannot be used as concurrent sub-tasks would overwrite their results.")
145
@click.option(
1✔
146
    '--sam', type=click.File('w'), required=False, help="Provide a filename if you want to make %s report results as a *.sam file, such that you can view hit positions in a genome browser like IGV." % PROGNAME)
147
def RNAhybrid(target, target_file, target_ct_file, mirna, mirna_file, pretrained_set, distribution, binpath, filter_minmax_seedlength, filter_minmax_mirnabulgelength, filter_max_energy, filter_max_pvalue, verbose, cache, stream_output, num_cpus, sam):
1✔
148
    settings = dict()
×
149

150
    settings['binpath'] = binpath
×
151
    settings['program_name'] = PROGNAME
×
152

153
    if filter_minmax_seedlength[0] > filter_minmax_seedlength[1]:
×
154
        raise ValueError("minimum for seed length must be smaller than maximum!")
×
155
    if filter_minmax_mirnabulgelength[0] > filter_minmax_mirnabulgelength[1]:
×
156
        raise ValueError("minimum for bulge must be smaller than maximum!")
×
157

158
    entries_mirnas = []
×
159
    if mirna_file:
×
160
        entries_mirnas = list(read_fasta(mirna_file))
×
161
    else:
162
        entries_mirnas = [('from commandline', mirna)]
×
163

164
    entries_targets = []
×
165
    if target_file:
×
166
        entries_targets = read_fasta(target_file)
×
NEW
167
    elif target_ct_file:
×
NEW
168
        entries_targets = map(lambda x: (x[0], x[1], disentangle_knots(x[2])['nested']),
×
169
                              read_CT_file(target_ct_file))
170
    else:
171
        entries_targets = [('from commandline', target)]
×
172

173
    # compute maximum duplex energies once per miRNA
174
    mdes = dict()
×
NEW
175
    if not distribution and pretrained_set:
×
176
        for entry_mirna in entries_mirnas:
×
177
            cmd_mde = compose_call('mde', '', entry_mirna[1], complement(entry_mirna[1]), **settings)
×
NEW
178
            raw_mde = cache_execute(cmd_mde, cache, '.mde', verbose)
×
NEW
179
            res_mde = Product(TypeFloat('mde')).parse_lines(raw_mde)[0]
×
180
            mdes[entry_mirna[0]] = res_mde['mde'] / 100
×
181

182
    result_nr = 1
×
183
    answers = []
×
184
    tasks = []
×
185
    targets = []
×
186
    total_mirnas = 0
×
187
    for pos_target, entry_target in enumerate(entries_targets):
×
188
        targets.append([entry_target[0], len(entry_target[1])])
×
189
        for pos_mirna, entry_mirna in enumerate(entries_mirnas):
×
190
            if pos_target == 0:
×
191
                total_mirnas += 1
×
NEW
192
            args = (entry_target, pos_target, entry_mirna, pos_mirna, mdes, distribution, pretrained_set, verbose, cache, settings)
×
193
            if num_cpus == 1:
×
194
                res_stacklen = process_onetarget_onemirna(*args)
×
195
                if stream_output:
×
196
                    answers = res_stacklen
×
197
                    answers = filter_answers(answers, filter_minmax_seedlength, filter_minmax_mirnabulgelength, filter_max_energy, filter_max_pvalue)
×
198
                    result_nr += print_answers(answers, result_nr)
×
199
                else:
200
                    answers.extend(res_stacklen)
×
201
            else:
202
                tasks.append(args)
×
203

204
    if tasks != []:
×
205
        pool = Pool(num_cpus)
×
206
        for res in zip(pool.map(wrap_process, tasks)):
×
207
            answers.extend(res[0])
×
208

209
    if (not stream_output) or (tasks != []):
×
210
        answers = filter_answers(answers, filter_minmax_seedlength, filter_minmax_mirnabulgelength, filter_max_energy, filter_max_pvalue)
×
211
        result_nr += print_answers(answers, result_nr, out=sys.stdout)
×
212

213
    print_summary(answers, len(targets), total_mirnas)
×
214
    if sam:
×
215
        print_sam(answers, targets, sam)
×
216

217
if __name__ == '__main__':
1✔
218
    RNAhybrid()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc