4627485829

Build Type

push

github-actions

Committed by karlnyr

Commit Message

remova conda stuff, expand aliases

Run Details

501 of 941 relevant lines covered (53.24%)

0.53 hits per line

Source File
Press 'n' to go to next uncovered line, 'b' for previous

0.0

/demux/cli/samplesheet.py

""" CLI points for samplesheet action """
import copy
import click
import logging
import sys

from cglims.api import ClinicalLims
from demux.exc import NoValidReagentKitFound
from demux.constants.constants import COMMA

from ..utils import (
    Create2500Samplesheet,
    CreateNovaseqSamplesheet,
    HiSeq2500Samplesheet,
    HiSeqXSamplesheet,
    MiseqSamplesheet,
    NIPTSamplesheet,
    Samplesheet,
)

LOG = logging.getLogger(__name__)

ARGUMENT_SAMPLE_SHEET = click.argument("samplesheet", type=str)


@click.group()
def sheet() -> None:
    """Samplesheet commands"""
    pass


@sheet.command()
@ARGUMENT_SAMPLE_SHEET
@click.option(
    "-a",
    "--application",
    type=click.Choice(["wgs", "wes", "nipt", "miseq"]),
    help="sequencing type",
)
def validate(samplesheet: str, application: str) -> None:
    """validate a samplesheet"""
    if application == "nipt":
        NIPTSamplesheet(samplesheet).validate()
    elif application == "wes":
        HiSeq2500Samplesheet(samplesheet).validate()
    elif application == "miseq":
        MiseqSamplesheet(samplesheet).validate()
    elif application == "wgs":
        HiSeqXSamplesheet(samplesheet).validate()


@sheet.command()
@ARGUMENT_SAMPLE_SHEET
def massage(samplesheet: str) -> None:
    """create a NIPT ready SampleSheet"""
    click.echo(NIPTSamplesheet(samplesheet).massage())


@sheet.command()
@click.argument("sample", type=str)
@ARGUMENT_SAMPLE_SHEET
def sample_in_pooled_lane(sample: str, samplesheet: str) -> None:
    """Check if a sample is in a pooled lane"""
    sample_sheet = Samplesheet(samplesheet)

    if not any([sample == seq_sample for seq_sample in sample_sheet.samples()]):
        LOG.error(f"{sample} not in {samplesheet}")
        raise click.Abort()
    if sample_sheet.sample_in_pooled_lane(sample):
        click.echo("true")
    else:
        click.echo("false")


@sheet.command()
@ARGUMENT_SAMPLE_SHEET
@click.option(
    "-a", "--application", type=click.Choice(["miseq", "nipt"]), help="sequencing type"
)
@click.option("-f", "--flowcell", help="for miseq, please provide a flowcell id")
def demux(samplesheet: str, application: str, flowcell: str):
    if application == "nipt":
        """convert NIPT samplesheet to demux'able samplesheet"""
        click.echo(NIPTSamplesheet(samplesheet).to_demux())
    elif application == "miseq":
        """convert MiSeq samplesheet to demux'able samplesheet"""
        click.echo(MiseqSamplesheet(samplesheet, flowcell).to_demux())
    else:
        LOG.error("No application provided!")
        sys.exit(1)


@sheet.command()
@click.argument("flowcell")
@click.option(
    "-a",
    "--application",
    type=click.Choice(["wgs", "wes", "nova", "iseq"]),
    help="application type",
)
@click.option(
    "-d", "--delimiter", default=COMMA, show_default=True, help="column delimiter"
)
@click.option(
    "-i",
    "--dualindex",
    is_flag=True,
    default=False,
    help="X: force dual index, not used \
              for NovaSeq!",
)
@click.option(
    "-l",
    "--index-length",
    default=None,
    help="2500 and NovaSeq: only return this index length",
)
@click.option(
    "-L", "--longest", is_flag=True, help="2500 and NovaSeq: only return longest index"
)
@click.option(
    "-p",
    "--pad",
    is_flag=True,
    default=False,
    help="add 2 bases to indices with length 8",
)
@click.option(
    "-S",
    "--shortest",
    is_flag=True,
    help="2500 and NovaSeq: only return shortest index",
)
@click.pass_context
def fetch(
    context,
    application: str,
    delimiter: str,
    dualindex: bool,
    flowcell: str,
    index_length: str,
    longest: bool,
    pad: bool,
    shortest: bool,
) -> None:
    """
    Fetch a samplesheet from LIMS.
    If a flowcell has dual indices of length 10+10 bp (dual 10) and/or 8+8 bp (dual 8), use
    the option -p, or --pad to add two bases to length 8 indices (AT for index1, AC for index2).
    This will ensure that all indices in the sample sheet are of the same length, namely 10.
    """

    def reverse_complement(dna: str) -> str:
        complement = {"A": "T", "C": "G", "G": "C", "T": "A"}
        return "".join([complement[base] for base in dna[::-1]])

    def get_project(project: str) -> str:
        """Only keeps the first part of the project name"""
        return project.split(" ")[0]

    if application == "nova":
        lims_config = context.obj["lims"]
        dummy_indexes = context.obj["dummy_indexes"]
        runs_dir = context.obj["runs_dir"]["novaseq"]

        try:
            demux_samplesheet = CreateNovaseqSamplesheet(
                dummy_indexes,
                flowcell,
                lims_config,
                pad,
                runs_dir,
            ).construct_samplesheet()

            # add [section] header
            click.echo("[Data]")
            click.echo(demux_samplesheet)
            return
        except NoValidReagentKitFound as error:
            LOG.error(error.message)
            raise click.Abort()

    lims_api = ClinicalLims(**context.obj["lims"])
    raw_samplesheet = list(lims_api.samplesheet(flowcell))

    if len(raw_samplesheet) == 0:
        sys.stderr.write(f"Samplesheet for {flowcell} not found in LIMS! ")
        context.abort()

    if longest:
        longest_row = max(
            raw_samplesheet, key=lambda x: len(x["index"].replace("-", ""))
        )
        index_length = len(longest_row["index"].replace("-", ""))

    if shortest:
        shortest_row = min(
            raw_samplesheet, key=lambda x: len(x["index"].replace("-", ""))
        )
        index_length = len(shortest_row["index"].replace("-", ""))

    # ... fix some 2500 specifics
    if application == "wes":
        demux_samplesheet = Create2500Samplesheet(
            flowcell, index_length, raw_samplesheet
        ).construct_samplesheet()

        # add [section] header
        click.echo("[Data]")
        click.echo(demux_samplesheet)
        return

    # ... fix some X specifics
    if application == "wgs":
        if dualindex:
            lims_keys = [
                "fcid",
                "lane",
                "sample_id",
                "sample_ref",
                "index",
                "index2",
                "sample_name",
                "control",
                "recipe",
                "operator",
                "project",
            ]
            for line in raw_samplesheet:
                line["index2"] = ""
        else:
            lims_keys = [
                "fcid",
                "lane",
                "sample_id",
                "sample_ref",
                "index",
                "sample_name",
                "control",
                "recipe",
                "operator",
                "project",
            ]

        header = [Samplesheet.header_map[head] for head in lims_keys]

        # first do some 10X magic, if any
        new_samplesheet = []
        for i, line in enumerate(raw_samplesheet):
            index = line["index"]
            if len(index.split("-")) == 4:
                for tenx_index in index.split("-"):
                    tenx_line = copy.deepcopy(line)
                    tenx_line["sample_id"] = "{}_{}".format(
                        line["sample_id"], tenx_index
                    )
                    tenx_line["index"] = tenx_index
                    new_samplesheet.append(tenx_line)
            else:
                new_samplesheet.append(line)
        raw_samplesheet = new_samplesheet

        # do some single/dual index stuff
        for i, line in enumerate(raw_samplesheet):
            if not dualindex:
                index = line["index"].split("-")[0]
                raw_samplesheet[i]["index"] = index
                raw_samplesheet[i]["sample_id"] = "{}_{}".format(
                    line["sample_id"], index
                )
            else:
                ori_index = line["index"]
                indexes = ori_index.split("-")
                if len(indexes) == 2:
                    (index1, index2) = indexes
                    raw_samplesheet[i]["index"] = index1
                    raw_samplesheet[i]["index2"] = reverse_complement(index2)
                    raw_samplesheet[i]["sample_id"] = "{}_{}".format(
                        line["sample_id"], ori_index
                    )

        # add [section] header
        click.echo("[Data]")

    click.echo(delimiter.join(header))
    final_samplesheet = []
    for line in raw_samplesheet:
        # fix the project content
        project = get_project(line["project"])
        line["project"] = project
        line["sample_name"] = project
        final_samplesheet.append(
            delimiter.join([str(line[lims_key]) for lims_key in lims_keys])
        )

    print("\n".join(final_samplesheet), end="")


@sheet.command()
@ARGUMENT_SAMPLE_SHEET
def convert(samplesheet: str):
    """CLI command to convert an old HiSeq2500 sample sheet for use on Hasta"""
    click.echo(HiSeq2500Samplesheet(samplesheet).convert())

1	""" CLI points for samplesheet action """
2	import copy	×
3	import click	×
4	import logging	×
5	import sys	×
6
7	from cglims.api import ClinicalLims	×
8	from demux.exc import NoValidReagentKitFound	×
9	from demux.constants.constants import COMMA	×
10
11	from ..utils import (	×
12	Create2500Samplesheet,
13	CreateNovaseqSamplesheet,
14	HiSeq2500Samplesheet,
15	HiSeqXSamplesheet,
16	MiseqSamplesheet,
17	NIPTSamplesheet,
18	Samplesheet,
19	)
20
21	LOG = logging.getLogger(__name__)	×
22
23	ARGUMENT_SAMPLE_SHEET = click.argument("samplesheet", type=str)	×
24
25
26	@click.group()	×
27	def sheet() -> None:	×
28	"""Samplesheet commands"""
29	pass	×
30
31
32	@sheet.command()	×
33	@ARGUMENT_SAMPLE_SHEET	×
34	@click.option(	×
35	"-a",
36	"--application",
37	type=click.Choice(["wgs", "wes", "nipt", "miseq"]),
38	help="sequencing type",
39	)
40	def validate(samplesheet: str, application: str) -> None:	×
41	"""validate a samplesheet"""
42	if application == "nipt":	×
43	NIPTSamplesheet(samplesheet).validate()	×
44	elif application == "wes":	×
45	HiSeq2500Samplesheet(samplesheet).validate()	×
46	elif application == "miseq":	×
47	MiseqSamplesheet(samplesheet).validate()	×
48	elif application == "wgs":	×
49	HiSeqXSamplesheet(samplesheet).validate()	×
50
51
52	@sheet.command()	×
53	@ARGUMENT_SAMPLE_SHEET	×
54	def massage(samplesheet: str) -> None:	×
55	"""create a NIPT ready SampleSheet"""
56	click.echo(NIPTSamplesheet(samplesheet).massage())	×
57
58
59	@sheet.command()	×
60	@click.argument("sample", type=str)	×
61	@ARGUMENT_SAMPLE_SHEET	×
62	def sample_in_pooled_lane(sample: str, samplesheet: str) -> None:	×
63	"""Check if a sample is in a pooled lane"""
64	sample_sheet = Samplesheet(samplesheet)	×
65
66	if not any([sample == seq_sample for seq_sample in sample_sheet.samples()]):	×
67	LOG.error(f"{sample} not in {samplesheet}")	×
68	raise click.Abort()	×
69	if sample_sheet.sample_in_pooled_lane(sample):	×
70	click.echo("true")	×
71	else:
72	click.echo("false")	×
73
74
75	@sheet.command()	×
76	@ARGUMENT_SAMPLE_SHEET	×
77	@click.option(	×
78	"-a", "--application", type=click.Choice(["miseq", "nipt"]), help="sequencing type"
79	)
80	@click.option("-f", "--flowcell", help="for miseq, please provide a flowcell id")	×
81	def demux(samplesheet: str, application: str, flowcell: str):	×
82	if application == "nipt":	×
83	"""convert NIPT samplesheet to demux'able samplesheet"""
84	click.echo(NIPTSamplesheet(samplesheet).to_demux())	×
85	elif application == "miseq":	×
86	"""convert MiSeq samplesheet to demux'able samplesheet"""
87	click.echo(MiseqSamplesheet(samplesheet, flowcell).to_demux())	×
88	else:
89	LOG.error("No application provided!")	×
90	sys.exit(1)	×
91
92
93	@sheet.command()	×
94	@click.argument("flowcell")	×
95	@click.option(	×
96	"-a",
97	"--application",
98	type=click.Choice(["wgs", "wes", "nova", "iseq"]),
99	help="application type",
100	)
101	@click.option(	×
102	"-d", "--delimiter", default=COMMA, show_default=True, help="column delimiter"
103	)
104	@click.option(	×
105	"-i",
106	"--dualindex",
107	is_flag=True,
108	default=False,
109	help="X: force dual index, not used \
110	for NovaSeq!",
111	)
112	@click.option(	×
113	"-l",
114	"--index-length",
115	default=None,
116	help="2500 and NovaSeq: only return this index length",
117	)
118	@click.option(	×
119	"-L", "--longest", is_flag=True, help="2500 and NovaSeq: only return longest index"
120	)
121	@click.option(	×
122	"-p",
123	"--pad",
124	is_flag=True,
125	default=False,
126	help="add 2 bases to indices with length 8",
127	)
128	@click.option(	×
129	"-S",
130	"--shortest",
131	is_flag=True,
132	help="2500 and NovaSeq: only return shortest index",
133	)
134	@click.pass_context	×
135	def fetch(	×
136	context,
137	application: str,
138	delimiter: str,
139	dualindex: bool,
140	flowcell: str,
141	index_length: str,
142	longest: bool,
143	pad: bool,
144	shortest: bool,
145	) -> None:
146	"""
147	Fetch a samplesheet from LIMS.
148	If a flowcell has dual indices of length 10+10 bp (dual 10) and/or 8+8 bp (dual 8), use
149	the option -p, or --pad to add two bases to length 8 indices (AT for index1, AC for index2).
150	This will ensure that all indices in the sample sheet are of the same length, namely 10.
151	"""
152
153	def reverse_complement(dna: str) -> str:	×
154	complement = {"A": "T", "C": "G", "G": "C", "T": "A"}	×
155	return "".join([complement[base] for base in dna[::-1]])	×
156
157	def get_project(project: str) -> str:	×
158	"""Only keeps the first part of the project name"""
159	return project.split(" ")[0]	×
160
161	if application == "nova":	×
162	lims_config = context.obj["lims"]	×
163	dummy_indexes = context.obj["dummy_indexes"]	×
164	runs_dir = context.obj["runs_dir"]["novaseq"]	×
165
166	try:	×
167	demux_samplesheet = CreateNovaseqSamplesheet(	×
168	dummy_indexes,
169	flowcell,
170	lims_config,
171	pad,
172	runs_dir,
173	).construct_samplesheet()
174
175	# add [section] header
176	click.echo("[Data]")	×
177	click.echo(demux_samplesheet)	×
178	return	×
179	except NoValidReagentKitFound as error:	×
180	LOG.error(error.message)	×
181	raise click.Abort()	×
182
183	lims_api = ClinicalLims(**context.obj["lims"])	×
184	raw_samplesheet = list(lims_api.samplesheet(flowcell))	×
185
186	if len(raw_samplesheet) == 0:	×
187	sys.stderr.write(f"Samplesheet for {flowcell} not found in LIMS! ")	×
188	context.abort()	×
189
190	if longest:	×
191	longest_row = max(	×
192	raw_samplesheet, key=lambda x: len(x["index"].replace("-", ""))
193	)
194	index_length = len(longest_row["index"].replace("-", ""))	×
195
196	if shortest:	×
197	shortest_row = min(	×
198	raw_samplesheet, key=lambda x: len(x["index"].replace("-", ""))
199	)
200	index_length = len(shortest_row["index"].replace("-", ""))	×
201
202	# ... fix some 2500 specifics
203	if application == "wes":	×
204	demux_samplesheet = Create2500Samplesheet(	×
205	flowcell, index_length, raw_samplesheet
206	).construct_samplesheet()
207
208	# add [section] header
209	click.echo("[Data]")	×
210	click.echo(demux_samplesheet)	×
211	return	×
212
213	# ... fix some X specifics
214	if application == "wgs":	×
215	if dualindex:	×
216	lims_keys = [	×
217	"fcid",
218	"lane",
219	"sample_id",
220	"sample_ref",
221	"index",
222	"index2",
223	"sample_name",
224	"control",
225	"recipe",
226	"operator",
227	"project",
228	]
229	for line in raw_samplesheet:	×
230	line["index2"] = ""	×
231	else:
232	lims_keys = [	×
233	"fcid",
234	"lane",
235	"sample_id",
236	"sample_ref",
237	"index",
238	"sample_name",
239	"control",
240	"recipe",
241	"operator",
242	"project",
243	]
244
245	header = [Samplesheet.header_map[head] for head in lims_keys]	×
246
247	# first do some 10X magic, if any
248	new_samplesheet = []	×
249	for i, line in enumerate(raw_samplesheet):	×
250	index = line["index"]	×
251	if len(index.split("-")) == 4:	×
252	for tenx_index in index.split("-"):	×
253	tenx_line = copy.deepcopy(line)	×
254	tenx_line["sample_id"] = "{}_{}".format(	×
255	line["sample_id"], tenx_index
256	)
257	tenx_line["index"] = tenx_index	×
258	new_samplesheet.append(tenx_line)	×
259	else:
260	new_samplesheet.append(line)	×
261	raw_samplesheet = new_samplesheet	×
262
263	# do some single/dual index stuff
264	for i, line in enumerate(raw_samplesheet):	×
265	if not dualindex:	×
266	index = line["index"].split("-")[0]	×
267	raw_samplesheet[i]["index"] = index	×
268	raw_samplesheet[i]["sample_id"] = "{}_{}".format(	×
269	line["sample_id"], index
270	)
271	else:
272	ori_index = line["index"]	×
273	indexes = ori_index.split("-")	×
274	if len(indexes) == 2:	×
275	(index1, index2) = indexes	×
276	raw_samplesheet[i]["index"] = index1	×
277	raw_samplesheet[i]["index2"] = reverse_complement(index2)	×
278	raw_samplesheet[i]["sample_id"] = "{}_{}".format(	×
279	line["sample_id"], ori_index
280	)
281
282	# add [section] header
283	click.echo("[Data]")	×
284
285	click.echo(delimiter.join(header))	×
286	final_samplesheet = []	×
287	for line in raw_samplesheet:	×
288	# fix the project content
289	project = get_project(line["project"])	×
290	line["project"] = project	×
291	line["sample_name"] = project	×
292	final_samplesheet.append(	×
293	delimiter.join([str(line[lims_key]) for lims_key in lims_keys])
294	)
295
296	print("\n".join(final_samplesheet), end="")	×
297
298
299	@sheet.command()	×
300	@ARGUMENT_SAMPLE_SHEET	×
301	def convert(samplesheet: str):	×
302	"""CLI command to convert an old HiSeq2500 sample sheet for use on Hasta"""
303	click.echo(HiSeq2500Samplesheet(samplesheet).convert())	×

Clinical-Genomics / demultiplexing / 4627485829

Source File Press 'n' to go to next uncovered line, 'b' for previous

Source File
Press 'n' to go to next uncovered line, 'b' for previous