• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

jlab / fold-grammars / 13575009990

27 Feb 2025 08:11PM UTC coverage: 36.409% (+36.4%) from 0.0%
13575009990

Pull #90

github

web-flow
Merge 2912168c2 into 0ad54ba86
Pull Request #90: Rnahybrid add targetbreaking

350 of 410 new or added lines in 9 files covered. (85.37%)

1472 of 4043 relevant lines covered (36.41%)

0.36 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

30.13
/Misc/Applications/RNAhybrid/RNAhybrid.py
1
from math import log, exp
1✔
2
import os
1✔
3
import click
1✔
4
import click_option_group
1✔
5
from hashlib import md5
1✔
6
from multiprocessing import Pool, cpu_count
1✔
7
from tempfile import mkstemp
1✔
8
from io import StringIO
1✔
9
from pathlib import Path
1✔
10

11
from parse_gapc import *
1✔
12
from execute import *
1✔
13
from tempfile import gettempdir
1✔
14
from input import read_fasta, read_CT_file, disentangle_knots, get_minimal_valid_substructure, nested_pairs_to_dotBracket, get_left_right_positions
1✔
15
from output import *
1✔
16
import pickle
1✔
17
import datetime
1✔
18
import getpass
1✔
19
import socket
1✔
20

21
PROGNAME = 'RNAhybrid'
1✔
22
# values taken from RNAhybrid-2.1.2/src/globals.h
23
DISTRIBUTION = {
1✔
24
    '3utr_fly': {
25
        'xi_slope': -0.03144,
26
        'xi_intercept': 0.7201,
27
        'theta_slope': -0.003429,
28
        'theta_intercept': 0.01634},
29
    '3utr_human':{
30
        'xi_slope': -0.01237,
31
        'xi_intercept': 1.901,
32
        'theta_slope': -0.001678,
33
        'theta_intercept': 0.1349},
34
    '3utr_worm':{
35
        'xi_slope': -0.01074,
36
        'xi_intercept': 1.769,
37
        'theta_slope': -0.001154,
38
        'theta_intercept': 0.1419}}
39

40
def wrap_process(all_data):
1✔
41
    return process_onetarget_onemirna(*all_data)
×
42

43
def process_eval(sequence, dotBracket, verbose, cache, settings):
1✔
44
    # settings for eval binary call
NEW
45
    eval_settings = settings.copy()
×
NEW
46
    eval_settings.update({'allow_lonelypairs': True})
×
47

NEW
48
    cmd_eval = compose_call('eval', 'microstate', sequence, None, inp_structure=dotBracket, **eval_settings)
×
NEW
49
    raw_eval = cache_execute(cmd_eval, cache, '.rnaeval', verbose)
×
NEW
50
    res_eval = Product(TypeMFE()).parse_lines(raw_eval)
×
51

NEW
52
    return res_eval[0]['mfe']
×
53

54
def process_onetarget_onemirna(entry_target, pos_target, entry_mirna, pos_mirna, mdes, distribution, pretrained_set, verbose, cache, settings):
1✔
55
    settings['xi'] = 0
×
56
    settings['theta'] = 0
×
57

NEW
58
    if not distribution and pretrained_set:
×
59
        # estimate evd parameters from maximal duplex energy
NEW
60
        settings['xi'] = DISTRIBUTION[pretrained_set]['xi_slope'] * mdes[entry_mirna[0]] + DISTRIBUTION[pretrained_set]['xi_intercept']
×
NEW
61
        settings['theta'] = DISTRIBUTION[pretrained_set]['theta_slope'] * mdes[entry_mirna[0]] + DISTRIBUTION[pretrained_set]['theta_intercept']
×
62

63
    cmd_hybrid = compose_call('khorshid', 'rnahybrid', entry_target[1], entry_mirna[1], **settings)
×
NEW
64
    raw_hybrid = cache_execute(cmd_hybrid, cache, '.rnahybrid', verbose)
×
65

66
    #res_stacklen = Product(Product(TypeInt('stacklen'), TypeMFE()), TypeHybrid()).parse_lines(raw_hybrid)
67
    res_stacklen = Product(Product(TypeKhorshid(), TypeMFE()), TypeHybrid()).parse_lines(raw_hybrid)
×
68
    # add original positions for each answer of
69
    #   a) target sequence position in input file
70
    #   b) mirna sequence position in input file
71
    #   c) position in gapc raw result
NEW
72
    target_structure = None
×
NEW
73
    if (len(entry_target) >= 3) and (entry_target[2] is not None):
×
74
        # If user provides ONE given secondary structure for target sequence,
75
        # we get this set of pairs as the third component of the entry_target tuple.
NEW
76
        target_structure = entry_target[2]
×
77

78
    for pos_answer, answer in enumerate(res_stacklen):
×
79
        answer['pos_target'] = pos_target
×
80
        answer['pos_mirna'] = pos_mirna
×
81
        answer['pos_answer'] = pos_answer
×
82

83
        answer['target_name'] = entry_target[0]
×
84
        answer['target_length'] = len(entry_target[1])
×
85
        answer['mirna_name'] = entry_mirna[0]
×
86
        answer['mirna_length'] = len(entry_mirna[1])
×
87

88
        # calculate p-value for answer
89
        normalized_energy = (answer['mfe'] / 100) / log(answer['target_length'] * answer['mirna_length'])
×
90
        answer['p-value'] = 1 - exp(-1 * exp(-1 * ((-1 * normalized_energy - settings['xi']) / settings['theta'])))
×
91

92
        # calculate the free energy that is lost if existing base-pairs in the target structure have to be broken to enable new pairs with miRNA
NEW
93
        if target_structure is not None:
×
94
            # get list of target bases involved in novel miRNA binding
NEW
95
            novel_target_pairing_partners = [int(x) for x in answer['listOfTargetPartners'].split(',') if x != ""]
×
96
            # obtain original pairing partners in target structure
NEW
97
            original_target_pairs = {o: c for o, c in {x: target_structure.get(x, -1) for x in novel_target_pairing_partners}.items() if c != -1}
×
NEW
98
            if len(original_target_pairs) > 0:
×
99
                # extend set of target pairs to valid sub-structure
NEW
100
                sub_structure = get_minimal_valid_substructure(target_structure, original_target_pairs)
×
101
                # sort involved base-pairs for extended sub-structure to enable easy access to extended left and right limits
NEW
102
                (left, right) = get_left_right_positions(sub_structure)
×
NEW
103
                sub_sequence = entry_target[1][left-1:right]
×
104
                # free energy of affected target sub-structure
NEW
105
                answer['original_target_substructure_energy'] = process_eval(sub_sequence, nested_pairs_to_dotBracket(sub_structure), verbose, cache, settings)
×
106
                # free energy of affected target sub-structure, when existing base-pairs are broken to enable miRNA binding
NEW
107
                answer['broken_target_substructure_energy'] = process_eval(sub_sequence, nested_pairs_to_dotBracket(sub_structure, novel_target_pairing_partners), verbose, cache, settings)
×
108

109
    return res_stacklen
×
110

111

112
@click.command(context_settings=dict(help_option_names=['-h', '--help']))
1✔
113
@click_option_group.optgroup.group(
1✔
114
    'Target Input', cls=click_option_group.RequiredMutuallyExclusiveOptionGroup)
115
@click_option_group.optgroup.option(
1✔
116
    '-t', '--target', type=str, help='An plain (i.e. no header/name) RNA sequence to be searched for miRNA targets.')
117
@click_option_group.optgroup.option(
1✔
118
    '-tf', '--target_file', type=click.Path(exists=True, dir_okay=False), help='Read one or multiple target RNA sequences from a FASTA file.')
119
@click_option_group.optgroup.option(
1✔
120
    '-tct', '--target_CT_file', type=click.Path(exists=True, dir_okay=False), help='Read a target sequence/structure pair from a CT file and relate hybrid energies to energy necessary to break given target structure.')
121
@click_option_group.optgroup.group(
1✔
122
    'miRNA Input', cls=click_option_group.RequiredMutuallyExclusiveOptionGroup)
123
@click_option_group.optgroup.option(
1✔
124
    '-q', '--mirna', type=str, help='A plain (i.e. no header/name) microRNA sequence that shall bind to a target sequence.')
125
@click_option_group.optgroup.option(
1✔
126
    '-qf', '--mirna_file', type=click.Path(exists=True, dir_okay=False), help='Read one or multiple miRNA sequences from a FASTA file.')
127
@click_option_group.optgroup.group(
1✔
128
    'p-value calculation', cls=click_option_group.RequiredMutuallyExclusiveOptionGroup)
129
@click_option_group.optgroup.option(
1✔
130
    '-s', '--pretrained_set', type=click.Choice(['3utr_fly','3utr_worm','3utr_human']), help='A pre-trained set of organism specific values.')
131
@click_option_group.optgroup.option(
1✔
132
    '-d', '--distribution', nargs=2, type=float, help='<xi> and <theta> are the position and shape parameters, respectively, of the extreme value distribution assumed for p-value calculation. For example enter "0.91 0.25" without the quotes.')
133
@click.option(
1✔
134
    '--binPath', type=str, default="", help='%s expects that according Bellman\'s GAP compiled binaries are located in the same directory as the Python wrapper is. Should you moved them into another directory, you must set --binPath to this new location!' % PROGNAME)
135
@click.option(
1✔
136
    '--filter-minmax-seedlength', type=int, nargs=2, default=(0,9999), help='Minimal and maximal number of base-pairs in the seed helix.')
137
@click.option(
1✔
138
    '--filter-minmax-mirnabulgelength', type=int, nargs=2, default=(0,9999), help='Minimal and maximal number of bulged bases in microRNA directly after seed helix.')
139
@click.option(
1✔
140
    '--filter-max-energy', type=int, nargs=1, default=0, help='Maximal free energy (not that stable energies are negative)')
141
@click.option(
1✔
142
    '--filter-max-pvalue', type=click.FloatRange(0, 1), default=1, help='Maximal p-value (the smaller the less likely the result is due to rare chance)')
143
@click.option(
1✔
144
    '--verbose/--no-verbose', type=bool, default=False, help="Print verbose messages.")
145
@click.option(
1✔
146
    '--cache/--no-cache', type=bool, default=False, help="Cache the last execution and reuse if input does not change. Will store files to '%s'" % gettempdir())
147
@click.option(
1✔
148
    '--stream-output/--no-stream-output', type=bool, default=False, help="Internally, computation is done per target per miRNA. By default, results are reported once ALL computations are done. With this setting you can make %s print results as soon as they are available - with the downside that ordering might be a bit more unintuitive." % (PROGNAME))
149
@click.option(
1✔
150
    "--num-cpus", type=click.IntRange(1, cpu_count()), default=1, help="Number of CPU-cores to use. Default is 1, i.e. single-threaded. Note that --stream-output cannot be used as concurrent sub-tasks would overwrite their results.")
151
@click.option(
1✔
152
    '--sam', type=click.File('w'), required=False, help="Provide a filename if you want to make %s report results as a *.sam file, such that you can view hit positions in a genome browser like IGV." % PROGNAME)
153
@click.pass_context
1✔
154
def RNAhybrid(ctx, target, target_file, target_ct_file, mirna, mirna_file, pretrained_set, distribution, binpath, filter_minmax_seedlength, filter_minmax_mirnabulgelength, filter_max_energy, filter_max_pvalue, verbose, cache, stream_output, num_cpus, sam):
1✔
NEW
155
    click.echo("## RNAhybrid was called with following parameters:")
×
NEW
156
    click.echo("## started execution on %s by %s on %s" % (datetime.datetime.now(), getpass.getuser(), socket.gethostname()))
×
NEW
157
    for param, value in ctx.params.items():
×
NEW
158
        click.echo(f"## - {param}: {value}")
×
159

160
    settings = dict()
×
161

162
    settings['binpath'] = binpath
×
163
    settings['program_name'] = PROGNAME
×
NEW
164
    if verbose:
×
NEW
165
        verbose = sys.stderr
×
166
    else:
NEW
167
        verbose = None
×
168

169
    if filter_minmax_seedlength[0] > filter_minmax_seedlength[1]:
×
170
        raise ValueError("minimum for seed length must be smaller than maximum!")
×
171
    if filter_minmax_mirnabulgelength[0] > filter_minmax_mirnabulgelength[1]:
×
172
        raise ValueError("minimum for bulge must be smaller than maximum!")
×
173

174
    entries_mirnas = []
×
175
    if mirna_file:
×
176
        entries_mirnas = list(read_fasta(mirna_file))
×
177
    else:
178
        entries_mirnas = [('from commandline', mirna)]
×
179

180
    entries_targets = []
×
181
    if target_file:
×
182
        entries_targets = read_fasta(target_file)
×
NEW
183
    elif target_ct_file:
×
NEW
184
        entries_targets = map(lambda x: (x[0], x[1], disentangle_knots(x[2])['nested']),
×
185
                              read_CT_file(target_ct_file))
186
    else:
187
        entries_targets = [('from commandline', target)]
×
188

189
    # compute maximum duplex energies once per miRNA
190
    mdes = dict()
×
NEW
191
    if not distribution and pretrained_set:
×
192
        for entry_mirna in entries_mirnas:
×
193
            cmd_mde = compose_call('mde', '', entry_mirna[1], complement(entry_mirna[1]), **settings)
×
NEW
194
            raw_mde = cache_execute(cmd_mde, cache, '.mde', verbose)
×
NEW
195
            res_mde = Product(TypeFloat('mde')).parse_lines(raw_mde)[0]
×
196
            mdes[entry_mirna[0]] = res_mde['mde'] / 100
×
197

198
    result_nr = 1
×
199
    answers = []
×
200
    tasks = []
×
201
    targets = []
×
202
    total_mirnas = 0
×
NEW
203
    fps_verbose = []
×
204
    for pos_target, entry_target in enumerate(entries_targets):
×
205
        targets.append([entry_target[0], len(entry_target[1])])
×
206
        for pos_mirna, entry_mirna in enumerate(entries_mirnas):
×
207
            if pos_target == 0:
×
208
                total_mirnas += 1
×
NEW
209
            args = (entry_target, pos_target, entry_mirna, pos_mirna, mdes, distribution, pretrained_set, verbose, cache, settings)
×
210
            if num_cpus == 1:
×
211
                res_stacklen = process_onetarget_onemirna(*args)
×
212
                if stream_output:
×
213
                    answers = res_stacklen
×
214
                    answers = filter_answers(answers, filter_minmax_seedlength, filter_minmax_mirnabulgelength, filter_max_energy, filter_max_pvalue)
×
215
                    result_nr += print_answers(answers, result_nr)
×
216
                else:
217
                    answers.extend(res_stacklen)
×
218
            else:
219
                # replace the verbose value to enable parallel processing and writing to multiple independent files
NEW
220
                FH, fp_verbose = mkstemp(suffix='.%s.verbose' % os.getpid())
×
NEW
221
                tmp_args = list(args)
×
NEW
222
                tmp_args[7] = Path(fp_verbose)
×
NEW
223
                fps_verbose.append(fp_verbose)
×
NEW
224
                tasks.append(tuple(tmp_args))
×
225

226
    if tasks != []:
×
227
        pool = Pool(num_cpus)
×
228
        for res in zip(pool.map(wrap_process, tasks)):
×
229
            answers.extend(res[0])
×
230

231
    if (not stream_output) or (tasks != []):
×
232
        answers = filter_answers(answers, filter_minmax_seedlength, filter_minmax_mirnabulgelength, filter_max_energy, filter_max_pvalue)
×
233
        result_nr += print_answers(answers, result_nr, out=sys.stdout)
×
234

NEW
235
    for fp_verbose in fps_verbose:
×
NEW
236
        with open(fp_verbose, 'r') as f:
×
NEW
237
            print(''.join(f.readlines()), file=verbose)
×
NEW
238
        os.remove(fp_verbose)
×
239

240
    print_summary(answers, len(targets), total_mirnas)
×
241
    if sam:
×
242
        print_sam(answers, targets, sam)
×
243

NEW
244
    click.echo("## finished execution on %s" % (datetime.datetime.now()))
×
245

246

247
if __name__ == '__main__':
1✔
248
    RNAhybrid()
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc