• Home
  • Features
  • Pricing
  • Docs
  • Announcements
  • Sign In

Clinical-Genomics / demultiplexing / 4627485829

pending completion
4627485829

push

github-actions

karlnyr
remova conda stuff, expand aliases

501 of 941 relevant lines covered (53.24%)

0.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0
/demux/cli/samplesheet.py
1
""" CLI points for samplesheet action """
2
import copy
×
3
import click
×
4
import logging
×
5
import sys
×
6

7
from cglims.api import ClinicalLims
×
8
from demux.exc import NoValidReagentKitFound
×
9
from demux.constants.constants import COMMA
×
10

11
from ..utils import (
×
12
    Create2500Samplesheet,
13
    CreateNovaseqSamplesheet,
14
    HiSeq2500Samplesheet,
15
    HiSeqXSamplesheet,
16
    MiseqSamplesheet,
17
    NIPTSamplesheet,
18
    Samplesheet,
19
)
20

21
LOG = logging.getLogger(__name__)
×
22

23
ARGUMENT_SAMPLE_SHEET = click.argument("samplesheet", type=str)
×
24

25

26
@click.group()
×
27
def sheet() -> None:
×
28
    """Samplesheet commands"""
29
    pass
×
30

31

32
@sheet.command()
×
33
@ARGUMENT_SAMPLE_SHEET
×
34
@click.option(
×
35
    "-a",
36
    "--application",
37
    type=click.Choice(["wgs", "wes", "nipt", "miseq"]),
38
    help="sequencing type",
39
)
40
def validate(samplesheet: str, application: str) -> None:
×
41
    """validate a samplesheet"""
42
    if application == "nipt":
×
43
        NIPTSamplesheet(samplesheet).validate()
×
44
    elif application == "wes":
×
45
        HiSeq2500Samplesheet(samplesheet).validate()
×
46
    elif application == "miseq":
×
47
        MiseqSamplesheet(samplesheet).validate()
×
48
    elif application == "wgs":
×
49
        HiSeqXSamplesheet(samplesheet).validate()
×
50

51

52
@sheet.command()
×
53
@ARGUMENT_SAMPLE_SHEET
×
54
def massage(samplesheet: str) -> None:
×
55
    """create a NIPT ready SampleSheet"""
56
    click.echo(NIPTSamplesheet(samplesheet).massage())
×
57

58

59
@sheet.command()
×
60
@click.argument("sample", type=str)
×
61
@ARGUMENT_SAMPLE_SHEET
×
62
def sample_in_pooled_lane(sample: str, samplesheet: str) -> None:
×
63
    """Check if a sample is in a pooled lane"""
64
    sample_sheet = Samplesheet(samplesheet)
×
65

66
    if not any([sample == seq_sample for seq_sample in sample_sheet.samples()]):
×
67
        LOG.error(f"{sample} not in {samplesheet}")
×
68
        raise click.Abort()
×
69
    if sample_sheet.sample_in_pooled_lane(sample):
×
70
        click.echo("true")
×
71
    else:
72
        click.echo("false")
×
73

74

75
@sheet.command()
×
76
@ARGUMENT_SAMPLE_SHEET
×
77
@click.option(
×
78
    "-a", "--application", type=click.Choice(["miseq", "nipt"]), help="sequencing type"
79
)
80
@click.option("-f", "--flowcell", help="for miseq, please provide a flowcell id")
×
81
def demux(samplesheet: str, application: str, flowcell: str):
×
82
    if application == "nipt":
×
83
        """convert NIPT samplesheet to demux'able samplesheet"""
84
        click.echo(NIPTSamplesheet(samplesheet).to_demux())
×
85
    elif application == "miseq":
×
86
        """convert MiSeq samplesheet to demux'able samplesheet"""
87
        click.echo(MiseqSamplesheet(samplesheet, flowcell).to_demux())
×
88
    else:
89
        LOG.error("No application provided!")
×
90
        sys.exit(1)
×
91

92

93
@sheet.command()
×
94
@click.argument("flowcell")
×
95
@click.option(
×
96
    "-a",
97
    "--application",
98
    type=click.Choice(["wgs", "wes", "nova", "iseq"]),
99
    help="application type",
100
)
101
@click.option(
×
102
    "-d", "--delimiter", default=COMMA, show_default=True, help="column delimiter"
103
)
104
@click.option(
×
105
    "-i",
106
    "--dualindex",
107
    is_flag=True,
108
    default=False,
109
    help="X: force dual index, not used \
110
              for NovaSeq!",
111
)
112
@click.option(
×
113
    "-l",
114
    "--index-length",
115
    default=None,
116
    help="2500 and NovaSeq: only return this index length",
117
)
118
@click.option(
×
119
    "-L", "--longest", is_flag=True, help="2500 and NovaSeq: only return longest index"
120
)
121
@click.option(
×
122
    "-p",
123
    "--pad",
124
    is_flag=True,
125
    default=False,
126
    help="add 2 bases to indices with length 8",
127
)
128
@click.option(
×
129
    "-S",
130
    "--shortest",
131
    is_flag=True,
132
    help="2500 and NovaSeq: only return shortest index",
133
)
134
@click.pass_context
×
135
def fetch(
×
136
    context,
137
    application: str,
138
    delimiter: str,
139
    dualindex: bool,
140
    flowcell: str,
141
    index_length: str,
142
    longest: bool,
143
    pad: bool,
144
    shortest: bool,
145
) -> None:
146
    """
147
    Fetch a samplesheet from LIMS.
148
    If a flowcell has dual indices of length 10+10 bp (dual 10) and/or 8+8 bp (dual 8), use
149
    the option -p, or --pad to add two bases to length 8 indices (AT for index1, AC for index2).
150
    This will ensure that all indices in the sample sheet are of the same length, namely 10.
151
    """
152

153
    def reverse_complement(dna: str) -> str:
×
154
        complement = {"A": "T", "C": "G", "G": "C", "T": "A"}
×
155
        return "".join([complement[base] for base in dna[::-1]])
×
156

157
    def get_project(project: str) -> str:
×
158
        """Only keeps the first part of the project name"""
159
        return project.split(" ")[0]
×
160

161
    if application == "nova":
×
162
        lims_config = context.obj["lims"]
×
163
        dummy_indexes = context.obj["dummy_indexes"]
×
164
        runs_dir = context.obj["runs_dir"]["novaseq"]
×
165

166
        try:
×
167
            demux_samplesheet = CreateNovaseqSamplesheet(
×
168
                dummy_indexes,
169
                flowcell,
170
                lims_config,
171
                pad,
172
                runs_dir,
173
            ).construct_samplesheet()
174

175
            # add [section] header
176
            click.echo("[Data]")
×
177
            click.echo(demux_samplesheet)
×
178
            return
×
179
        except NoValidReagentKitFound as error:
×
180
            LOG.error(error.message)
×
181
            raise click.Abort()
×
182

183
    lims_api = ClinicalLims(**context.obj["lims"])
×
184
    raw_samplesheet = list(lims_api.samplesheet(flowcell))
×
185

186
    if len(raw_samplesheet) == 0:
×
187
        sys.stderr.write(f"Samplesheet for {flowcell} not found in LIMS! ")
×
188
        context.abort()
×
189

190
    if longest:
×
191
        longest_row = max(
×
192
            raw_samplesheet, key=lambda x: len(x["index"].replace("-", ""))
193
        )
194
        index_length = len(longest_row["index"].replace("-", ""))
×
195

196
    if shortest:
×
197
        shortest_row = min(
×
198
            raw_samplesheet, key=lambda x: len(x["index"].replace("-", ""))
199
        )
200
        index_length = len(shortest_row["index"].replace("-", ""))
×
201

202
    # ... fix some 2500 specifics
203
    if application == "wes":
×
204
        demux_samplesheet = Create2500Samplesheet(
×
205
            flowcell, index_length, raw_samplesheet
206
        ).construct_samplesheet()
207

208
        # add [section] header
209
        click.echo("[Data]")
×
210
        click.echo(demux_samplesheet)
×
211
        return
×
212

213
    # ... fix some X specifics
214
    if application == "wgs":
×
215
        if dualindex:
×
216
            lims_keys = [
×
217
                "fcid",
218
                "lane",
219
                "sample_id",
220
                "sample_ref",
221
                "index",
222
                "index2",
223
                "sample_name",
224
                "control",
225
                "recipe",
226
                "operator",
227
                "project",
228
            ]
229
            for line in raw_samplesheet:
×
230
                line["index2"] = ""
×
231
        else:
232
            lims_keys = [
×
233
                "fcid",
234
                "lane",
235
                "sample_id",
236
                "sample_ref",
237
                "index",
238
                "sample_name",
239
                "control",
240
                "recipe",
241
                "operator",
242
                "project",
243
            ]
244

245
        header = [Samplesheet.header_map[head] for head in lims_keys]
×
246

247
        # first do some 10X magic, if any
248
        new_samplesheet = []
×
249
        for i, line in enumerate(raw_samplesheet):
×
250
            index = line["index"]
×
251
            if len(index.split("-")) == 4:
×
252
                for tenx_index in index.split("-"):
×
253
                    tenx_line = copy.deepcopy(line)
×
254
                    tenx_line["sample_id"] = "{}_{}".format(
×
255
                        line["sample_id"], tenx_index
256
                    )
257
                    tenx_line["index"] = tenx_index
×
258
                    new_samplesheet.append(tenx_line)
×
259
            else:
260
                new_samplesheet.append(line)
×
261
        raw_samplesheet = new_samplesheet
×
262

263
        # do some single/dual index stuff
264
        for i, line in enumerate(raw_samplesheet):
×
265
            if not dualindex:
×
266
                index = line["index"].split("-")[0]
×
267
                raw_samplesheet[i]["index"] = index
×
268
                raw_samplesheet[i]["sample_id"] = "{}_{}".format(
×
269
                    line["sample_id"], index
270
                )
271
            else:
272
                ori_index = line["index"]
×
273
                indexes = ori_index.split("-")
×
274
                if len(indexes) == 2:
×
275
                    (index1, index2) = indexes
×
276
                    raw_samplesheet[i]["index"] = index1
×
277
                    raw_samplesheet[i]["index2"] = reverse_complement(index2)
×
278
                    raw_samplesheet[i]["sample_id"] = "{}_{}".format(
×
279
                        line["sample_id"], ori_index
280
                    )
281

282
        # add [section] header
283
        click.echo("[Data]")
×
284

285
    click.echo(delimiter.join(header))
×
286
    final_samplesheet = []
×
287
    for line in raw_samplesheet:
×
288
        # fix the project content
289
        project = get_project(line["project"])
×
290
        line["project"] = project
×
291
        line["sample_name"] = project
×
292
        final_samplesheet.append(
×
293
            delimiter.join([str(line[lims_key]) for lims_key in lims_keys])
294
        )
295

296
    print("\n".join(final_samplesheet), end="")
×
297

298

299
@sheet.command()
×
300
@ARGUMENT_SAMPLE_SHEET
×
301
def convert(samplesheet: str):
×
302
    """CLI command to convert an old HiSeq2500 sample sheet for use on Hasta"""
303
    click.echo(HiSeq2500Samplesheet(samplesheet).convert())
×
STATUS · Troubleshooting · Open an Issue · Sales · Support · CAREERS · ENTERPRISE · START FREE · SCHEDULE DEMO
ANNOUNCEMENTS · TWITTER · TOS & SLA · Supported CI Services · What's a CI service? · Automated Testing

© 2025 Coveralls, Inc